• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <limits.h>
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "./vp9_rtcd.h"
16 #include "./vpx_dsp_rtcd.h"
17 #include "./vpx_config.h"
18 
19 #include "vpx_dsp/vpx_dsp_common.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/vpx_timer.h"
22 #include "vpx_ports/system_state.h"
23 
24 #include "vp9/common/vp9_common.h"
25 #include "vp9/common/vp9_entropy.h"
26 #include "vp9/common/vp9_entropymode.h"
27 #include "vp9/common/vp9_idct.h"
28 #include "vp9/common/vp9_mvref_common.h"
29 #include "vp9/common/vp9_pred_common.h"
30 #include "vp9/common/vp9_quant_common.h"
31 #include "vp9/common/vp9_reconintra.h"
32 #include "vp9/common/vp9_reconinter.h"
33 #include "vp9/common/vp9_seg_common.h"
34 #include "vp9/common/vp9_tile_common.h"
35 
36 #include "vp9/encoder/vp9_aq_360.h"
37 #include "vp9/encoder/vp9_aq_complexity.h"
38 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
39 #include "vp9/encoder/vp9_aq_variance.h"
40 #include "vp9/encoder/vp9_encodeframe.h"
41 #include "vp9/encoder/vp9_encodemb.h"
42 #include "vp9/encoder/vp9_encodemv.h"
43 #include "vp9/encoder/vp9_ethread.h"
44 #include "vp9/encoder/vp9_extend.h"
45 #include "vp9/encoder/vp9_pickmode.h"
46 #include "vp9/encoder/vp9_rd.h"
47 #include "vp9/encoder/vp9_rdopt.h"
48 #include "vp9/encoder/vp9_segmentation.h"
49 #include "vp9/encoder/vp9_tokenize.h"
50 
51 static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
52                               int output_enabled, int mi_row, int mi_col,
53                               BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
54 
55 // Machine learning-based early termination parameters.
56 static const double train_mean[24] = {
57   303501.697372, 3042630.372158, 24.694696, 1.392182,
58   689.413511,    162.027012,     1.478213,  0.0,
59   135382.260230, 912738.513263,  28.845217, 1.515230,
60   544.158492,    131.807995,     1.436863,  0.0,
61   43682.377587,  208131.711766,  28.084737, 1.356677,
62   138.254122,    119.522553,     1.252322,  0.0
63 };
64 
65 static const double train_stdm[24] = {
66   673689.212982, 5996652.516628, 0.024449, 1.989792,
67   985.880847,    0.014638,       2.001898, 0.0,
68   208798.775332, 1812548.443284, 0.018693, 1.838009,
69   396.986910,    0.015657,       1.332541, 0.0,
70   55888.847031,  448587.962714,  0.017900, 1.904776,
71   98.652832,     0.016598,       1.320992, 0.0
72 };
73 
74 // Error tolerance: 0.01%-0.0.05%-0.1%
75 static const double classifiers[24] = {
76   0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863,
77   0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134,
78   0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700,
79   0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211
80 };
81 
82 // This is used as a reference when computing the source variance for the
83 //  purpose of activity masking.
84 // Eventually this should be replaced by custom no-reference routines,
85 //  which will be faster.
86 static const uint8_t VP9_VAR_OFFS[64] = {
87   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
92 };
93 
94 #if CONFIG_VP9_HIGHBITDEPTH
95 static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
96   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
97   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
98   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
101 };
102 
103 static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
104   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
105   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
106   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
107   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
108   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
109   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
110   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
112 };
113 
114 static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
115   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
116   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
117   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
118   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
119   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
120   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
121   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
122   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
123   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
124   128 * 16
125 };
126 #endif  // CONFIG_VP9_HIGHBITDEPTH
127 
vp9_get_sby_perpixel_variance(VP9_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)128 unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
129                                            const struct buf_2d *ref,
130                                            BLOCK_SIZE bs) {
131   unsigned int sse;
132   const unsigned int var =
133       cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse);
134   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
135 }
136 
137 #if CONFIG_VP9_HIGHBITDEPTH
vp9_high_get_sby_perpixel_variance(VP9_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)138 unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
139                                                 const struct buf_2d *ref,
140                                                 BLOCK_SIZE bs, int bd) {
141   unsigned int var, sse;
142   switch (bd) {
143     case 10:
144       var =
145           cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
146                              CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse);
147       break;
148     case 12:
149       var =
150           cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
151                              CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse);
152       break;
153     case 8:
154     default:
155       var =
156           cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
157                              CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse);
158       break;
159   }
160   return (unsigned int)ROUND64_POWER_OF_TWO((int64_t)var,
161                                             num_pels_log2_lookup[bs]);
162 }
163 #endif  // CONFIG_VP9_HIGHBITDEPTH
164 
get_sby_perpixel_diff_variance(VP9_COMP * cpi,const struct buf_2d * ref,int mi_row,int mi_col,BLOCK_SIZE bs)165 static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
166                                                    const struct buf_2d *ref,
167                                                    int mi_row, int mi_col,
168                                                    BLOCK_SIZE bs) {
169   unsigned int sse, var;
170   uint8_t *last_y;
171   const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
172 
173   assert(last != NULL);
174   last_y =
175       &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
176   var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
177   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
178 }
179 
get_rd_var_based_fixed_partition(VP9_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)180 static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
181                                                    int mi_row, int mi_col) {
182   unsigned int var = get_sby_perpixel_diff_variance(
183       cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
184   if (var < 8)
185     return BLOCK_64X64;
186   else if (var < 128)
187     return BLOCK_32X32;
188   else if (var < 2048)
189     return BLOCK_16X16;
190   else
191     return BLOCK_8X8;
192 }
193 
194 // Lighter version of set_offsets that only sets the mode info
195 // pointers.
set_mode_info_offsets(VP9_COMMON * const cm,MACROBLOCK * const x,MACROBLOCKD * const xd,int mi_row,int mi_col)196 static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
197                                          MACROBLOCK *const x,
198                                          MACROBLOCKD *const xd, int mi_row,
199                                          int mi_col) {
200   const int idx_str = xd->mi_stride * mi_row + mi_col;
201   xd->mi = cm->mi_grid_visible + idx_str;
202   xd->mi[0] = cm->mi + idx_str;
203   x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
204 }
205 
set_offsets(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)206 static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
207                         MACROBLOCK *const x, int mi_row, int mi_col,
208                         BLOCK_SIZE bsize) {
209   VP9_COMMON *const cm = &cpi->common;
210   MACROBLOCKD *const xd = &x->e_mbd;
211   MODE_INFO *mi;
212   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
213   const int mi_height = num_8x8_blocks_high_lookup[bsize];
214   const struct segmentation *const seg = &cm->seg;
215   MvLimits *const mv_limits = &x->mv_limits;
216 
217   set_skip_context(xd, mi_row, mi_col);
218 
219   set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
220 
221   mi = xd->mi[0];
222 
223   // Set up destination pointers.
224   vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
225 
226   // Set up limit values for MV components.
227   // Mv beyond the range do not produce new/different prediction block.
228   mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
229   mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
230   mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
231   mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
232 
233   // Set up distance of MB to edge of frame in 1/8th pel units.
234   assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
235   set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
236                  cm->mi_cols);
237 
238   // Set up source buffers.
239   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
240 
241   // R/D setup.
242   x->rddiv = cpi->rd.RDDIV;
243   x->rdmult = cpi->rd.RDMULT;
244 
245   // Setup segment ID.
246   if (seg->enabled) {
247     if (cpi->oxcf.aq_mode != VARIANCE_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ &&
248         cpi->oxcf.aq_mode != EQUATOR360_AQ) {
249       const uint8_t *const map =
250           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
251       mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
252     }
253     vp9_init_plane_quantizers(cpi, x);
254 
255     x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id];
256   } else {
257     mi->segment_id = 0;
258     x->encode_breakout = cpi->encode_breakout;
259   }
260 
261   // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
262   xd->tile = *tile;
263 }
264 
duplicate_mode_info_in_sb(VP9_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col,BLOCK_SIZE bsize)265 static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
266                                       int mi_row, int mi_col,
267                                       BLOCK_SIZE bsize) {
268   const int block_width =
269       VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
270   const int block_height =
271       VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
272   const int mi_stride = xd->mi_stride;
273   MODE_INFO *const src_mi = xd->mi[0];
274   int i, j;
275 
276   for (j = 0; j < block_height; ++j)
277     for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
278 }
279 
set_block_size(VP9_COMP * const cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,int mi_row,int mi_col,BLOCK_SIZE bsize)280 static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x,
281                            MACROBLOCKD *const xd, int mi_row, int mi_col,
282                            BLOCK_SIZE bsize) {
283   if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
284     set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
285     xd->mi[0]->sb_type = bsize;
286   }
287 }
288 
289 typedef struct {
290   int64_t sum_square_error;
291   int64_t sum_error;
292   int log2_count;
293   int variance;
294 } var;
295 
296 typedef struct {
297   var none;
298   var horz[2];
299   var vert[2];
300 } partition_variance;
301 
302 typedef struct {
303   partition_variance part_variances;
304   var split[4];
305 } v4x4;
306 
307 typedef struct {
308   partition_variance part_variances;
309   v4x4 split[4];
310 } v8x8;
311 
312 typedef struct {
313   partition_variance part_variances;
314   v8x8 split[4];
315 } v16x16;
316 
317 typedef struct {
318   partition_variance part_variances;
319   v16x16 split[4];
320 } v32x32;
321 
322 typedef struct {
323   partition_variance part_variances;
324   v32x32 split[4];
325 } v64x64;
326 
327 typedef struct {
328   partition_variance *part_variances;
329   var *split[4];
330 } variance_node;
331 
332 typedef enum {
333   V16X16,
334   V32X32,
335   V64X64,
336 } TREE_LEVEL;
337 
tree_to_node(void * data,BLOCK_SIZE bsize,variance_node * node)338 static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
339   int i;
340   node->part_variances = NULL;
341   switch (bsize) {
342     case BLOCK_64X64: {
343       v64x64 *vt = (v64x64 *)data;
344       node->part_variances = &vt->part_variances;
345       for (i = 0; i < 4; i++)
346         node->split[i] = &vt->split[i].part_variances.none;
347       break;
348     }
349     case BLOCK_32X32: {
350       v32x32 *vt = (v32x32 *)data;
351       node->part_variances = &vt->part_variances;
352       for (i = 0; i < 4; i++)
353         node->split[i] = &vt->split[i].part_variances.none;
354       break;
355     }
356     case BLOCK_16X16: {
357       v16x16 *vt = (v16x16 *)data;
358       node->part_variances = &vt->part_variances;
359       for (i = 0; i < 4; i++)
360         node->split[i] = &vt->split[i].part_variances.none;
361       break;
362     }
363     case BLOCK_8X8: {
364       v8x8 *vt = (v8x8 *)data;
365       node->part_variances = &vt->part_variances;
366       for (i = 0; i < 4; i++)
367         node->split[i] = &vt->split[i].part_variances.none;
368       break;
369     }
370     case BLOCK_4X4: {
371       v4x4 *vt = (v4x4 *)data;
372       node->part_variances = &vt->part_variances;
373       for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
374       break;
375     }
376     default: {
377       assert(0);
378       break;
379     }
380   }
381 }
382 
383 // Set variance values given sum square error, sum error, count.
fill_variance(int64_t s2,int64_t s,int c,var * v)384 static void fill_variance(int64_t s2, int64_t s, int c, var *v) {
385   v->sum_square_error = s2;
386   v->sum_error = s;
387   v->log2_count = c;
388 }
389 
get_variance(var * v)390 static void get_variance(var *v) {
391   v->variance =
392       (int)(256 * (v->sum_square_error -
393                    ((v->sum_error * v->sum_error) >> v->log2_count)) >>
394             v->log2_count);
395 }
396 
sum_2_variances(const var * a,const var * b,var * r)397 static void sum_2_variances(const var *a, const var *b, var *r) {
398   assert(a->log2_count == b->log2_count);
399   fill_variance(a->sum_square_error + b->sum_square_error,
400                 a->sum_error + b->sum_error, a->log2_count + 1, r);
401 }
402 
fill_variance_tree(void * data,BLOCK_SIZE bsize)403 static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
404   variance_node node;
405   memset(&node, 0, sizeof(node));
406   tree_to_node(data, bsize, &node);
407   sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
408   sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
409   sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
410   sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
411   sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
412                   &node.part_variances->none);
413 }
414 
set_vt_partitioning(VP9_COMP * cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,void * data,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t threshold,BLOCK_SIZE bsize_min,int force_split)415 static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
416                                MACROBLOCKD *const xd, void *data,
417                                BLOCK_SIZE bsize, int mi_row, int mi_col,
418                                int64_t threshold, BLOCK_SIZE bsize_min,
419                                int force_split) {
420   VP9_COMMON *const cm = &cpi->common;
421   variance_node vt;
422   const int block_width = num_8x8_blocks_wide_lookup[bsize];
423   const int block_height = num_8x8_blocks_high_lookup[bsize];
424 
425   assert(block_height == block_width);
426   tree_to_node(data, bsize, &vt);
427 
428   if (force_split == 1) return 0;
429 
430   // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
431   // variance is below threshold, otherwise split will be selected.
432   // No check for vert/horiz split as too few samples for variance.
433   if (bsize == bsize_min) {
434     // Variance already computed to set the force_split.
435     if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
436     if (mi_col + block_width / 2 < cm->mi_cols &&
437         mi_row + block_height / 2 < cm->mi_rows &&
438         vt.part_variances->none.variance < threshold) {
439       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
440       return 1;
441     }
442     return 0;
443   } else if (bsize > bsize_min) {
444     // Variance already computed to set the force_split.
445     if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
446     // For key frame: take split for bsize above 32X32 or very high variance.
447     if (cm->frame_type == KEY_FRAME &&
448         (bsize > BLOCK_32X32 ||
449          vt.part_variances->none.variance > (threshold << 4))) {
450       return 0;
451     }
452     // If variance is low, take the bsize (no split).
453     if (mi_col + block_width / 2 < cm->mi_cols &&
454         mi_row + block_height / 2 < cm->mi_rows &&
455         vt.part_variances->none.variance < threshold) {
456       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
457       return 1;
458     }
459 
460     // Check vertical split.
461     if (mi_row + block_height / 2 < cm->mi_rows) {
462       BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
463       get_variance(&vt.part_variances->vert[0]);
464       get_variance(&vt.part_variances->vert[1]);
465       if (vt.part_variances->vert[0].variance < threshold &&
466           vt.part_variances->vert[1].variance < threshold &&
467           get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
468         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
469         set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
470         return 1;
471       }
472     }
473     // Check horizontal split.
474     if (mi_col + block_width / 2 < cm->mi_cols) {
475       BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
476       get_variance(&vt.part_variances->horz[0]);
477       get_variance(&vt.part_variances->horz[1]);
478       if (vt.part_variances->horz[0].variance < threshold &&
479           vt.part_variances->horz[1].variance < threshold &&
480           get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
481         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
482         set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
483         return 1;
484       }
485     }
486 
487     return 0;
488   }
489   return 0;
490 }
491 
scale_part_thresh_sumdiff(int64_t threshold_base,int speed,int width,int height,int content_state)492 int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, int width,
493                                   int height, int content_state) {
494   if (speed >= 8) {
495     if (width <= 640 && height <= 480)
496       return (5 * threshold_base) >> 2;
497     else if ((content_state == kLowSadLowSumdiff) ||
498              (content_state == kHighSadLowSumdiff) ||
499              (content_state == kLowVarHighSumdiff))
500       return (5 * threshold_base) >> 2;
501   } else if (speed == 7) {
502     if ((content_state == kLowSadLowSumdiff) ||
503         (content_state == kHighSadLowSumdiff) ||
504         (content_state == kLowVarHighSumdiff)) {
505       return (5 * threshold_base) >> 2;
506     }
507   }
508   return threshold_base;
509 }
510 
511 // Set the variance split thresholds for following the block sizes:
512 // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
513 // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
514 // currently only used on key frame.
set_vbp_thresholds(VP9_COMP * cpi,int64_t thresholds[],int q,int content_state)515 static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
516                                int content_state) {
517   VP9_COMMON *const cm = &cpi->common;
518   const int is_key_frame = (cm->frame_type == KEY_FRAME);
519   const int threshold_multiplier = is_key_frame ? 20 : 1;
520   int64_t threshold_base =
521       (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
522 
523   if (is_key_frame) {
524     thresholds[0] = threshold_base;
525     thresholds[1] = threshold_base >> 2;
526     thresholds[2] = threshold_base >> 2;
527     thresholds[3] = threshold_base << 2;
528   } else {
529     // Increase base variance threshold based on estimated noise level.
530     if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) {
531       NOISE_LEVEL noise_level =
532           vp9_noise_estimate_extract_level(&cpi->noise_estimate);
533       if (noise_level == kHigh)
534         threshold_base = 3 * threshold_base;
535       else if (noise_level == kMedium)
536         threshold_base = threshold_base << 1;
537       else if (noise_level < kLow)
538         threshold_base = (7 * threshold_base) >> 3;
539     }
540 #if CONFIG_VP9_TEMPORAL_DENOISING
541     if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
542         cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
543       threshold_base =
544           vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
545                                 content_state, cpi->svc.temporal_layer_id);
546     else
547       threshold_base =
548           scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width,
549                                     cm->height, content_state);
550 #else
551     // Increase base variance threshold based on content_state/sum_diff level.
552     threshold_base = scale_part_thresh_sumdiff(
553         threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
554 #endif
555     thresholds[0] = threshold_base;
556     thresholds[2] = threshold_base << cpi->oxcf.speed;
557     if (cm->width <= 352 && cm->height <= 288) {
558       thresholds[0] = threshold_base >> 3;
559       thresholds[1] = threshold_base >> 1;
560       thresholds[2] = threshold_base << 3;
561     } else if (cm->width < 1280 && cm->height < 720) {
562       thresholds[1] = (5 * threshold_base) >> 2;
563     } else if (cm->width < 1920 && cm->height < 1080) {
564       thresholds[1] = threshold_base << 1;
565     } else {
566       thresholds[1] = (5 * threshold_base) >> 1;
567     }
568   }
569 }
570 
vp9_set_variance_partition_thresholds(VP9_COMP * cpi,int q,int content_state)571 void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
572                                            int content_state) {
573   VP9_COMMON *const cm = &cpi->common;
574   SPEED_FEATURES *const sf = &cpi->sf;
575   const int is_key_frame = (cm->frame_type == KEY_FRAME);
576   if (sf->partition_search_type != VAR_BASED_PARTITION &&
577       sf->partition_search_type != REFERENCE_PARTITION) {
578     return;
579   } else {
580     set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
581     // The thresholds below are not changed locally.
582     if (is_key_frame) {
583       cpi->vbp_threshold_sad = 0;
584       cpi->vbp_threshold_copy = 0;
585       cpi->vbp_bsize_min = BLOCK_8X8;
586     } else {
587       if (cm->width <= 352 && cm->height <= 288)
588         cpi->vbp_threshold_sad = 10;
589       else
590         cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
591                                      ? (cpi->y_dequant[q][1] << 1)
592                                      : 1000;
593       cpi->vbp_bsize_min = BLOCK_16X16;
594       if (cm->width <= 352 && cm->height <= 288)
595         cpi->vbp_threshold_copy = 4000;
596       else if (cm->width <= 640 && cm->height <= 360)
597         cpi->vbp_threshold_copy = 8000;
598       else
599         cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
600                                       ? (cpi->y_dequant[q][1] << 3)
601                                       : 8000;
602     }
603     cpi->vbp_threshold_minmax = 15 + (q >> 3);
604   }
605 }
606 
607 // Compute the minmax over the 8x8 subblocks.
compute_minmax_8x8(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,int highbd_flag,int pixels_wide,int pixels_high)608 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
609                               int dp, int x16_idx, int y16_idx,
610 #if CONFIG_VP9_HIGHBITDEPTH
611                               int highbd_flag,
612 #endif
613                               int pixels_wide, int pixels_high) {
614   int k;
615   int minmax_max = 0;
616   int minmax_min = 255;
617   // Loop over the 4 8x8 subblocks.
618   for (k = 0; k < 4; k++) {
619     int x8_idx = x16_idx + ((k & 1) << 3);
620     int y8_idx = y16_idx + ((k >> 1) << 3);
621     int min = 0;
622     int max = 0;
623     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
624 #if CONFIG_VP9_HIGHBITDEPTH
625       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
626         vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
627                               d + y8_idx * dp + x8_idx, dp, &min, &max);
628       } else {
629         vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
630                        dp, &min, &max);
631       }
632 #else
633       vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
634                      &min, &max);
635 #endif
636       if ((max - min) > minmax_max) minmax_max = (max - min);
637       if ((max - min) < minmax_min) minmax_min = (max - min);
638     }
639   }
640   return (minmax_max - minmax_min);
641 }
642 
fill_variance_4x4avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x8_idx,int y8_idx,v8x8 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame)643 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
644                                  int dp, int x8_idx, int y8_idx, v8x8 *vst,
645 #if CONFIG_VP9_HIGHBITDEPTH
646                                  int highbd_flag,
647 #endif
648                                  int pixels_wide, int pixels_high,
649                                  int is_key_frame) {
650   int k;
651   for (k = 0; k < 4; k++) {
652     int x4_idx = x8_idx + ((k & 1) << 2);
653     int y4_idx = y8_idx + ((k >> 1) << 2);
654     unsigned int sse = 0;
655     int sum = 0;
656     if (x4_idx < pixels_wide && y4_idx < pixels_high) {
657       int s_avg;
658       int d_avg = 128;
659 #if CONFIG_VP9_HIGHBITDEPTH
660       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
661         s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
662         if (!is_key_frame)
663           d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
664       } else {
665         s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
666         if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
667       }
668 #else
669       s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
670       if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
671 #endif
672       sum = s_avg - d_avg;
673       sse = sum * sum;
674     }
675     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
676   }
677 }
678 
fill_variance_8x8avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,v16x16 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame)679 static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
680                                  int dp, int x16_idx, int y16_idx, v16x16 *vst,
681 #if CONFIG_VP9_HIGHBITDEPTH
682                                  int highbd_flag,
683 #endif
684                                  int pixels_wide, int pixels_high,
685                                  int is_key_frame) {
686   int k;
687   for (k = 0; k < 4; k++) {
688     int x8_idx = x16_idx + ((k & 1) << 3);
689     int y8_idx = y16_idx + ((k >> 1) << 3);
690     unsigned int sse = 0;
691     int sum = 0;
692     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
693       int s_avg;
694       int d_avg = 128;
695 #if CONFIG_VP9_HIGHBITDEPTH
696       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
697         s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
698         if (!is_key_frame)
699           d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
700       } else {
701         s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
702         if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
703       }
704 #else
705       s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
706       if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
707 #endif
708       sum = s_avg - d_avg;
709       sse = sum * sum;
710     }
711     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
712   }
713 }
714 
715 // Check if most of the superblock is skin content, and if so, force split to
716 // 32x32, and set x->sb_is_skin for use in mode selection.
skin_sb_split(VP9_COMP * cpi,MACROBLOCK * x,const int low_res,int mi_row,int mi_col,int * force_split)717 static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
718                          int mi_row, int mi_col, int *force_split) {
719   VP9_COMMON *const cm = &cpi->common;
720 #if CONFIG_VP9_HIGHBITDEPTH
721   if (cm->use_highbitdepth) return 0;
722 #endif
723   // Avoid checking superblocks on/near boundary and avoid low resolutions.
724   // Note superblock may still pick 64X64 if y_sad is very small
725   // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
726   if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
727                    mi_row + 8 < cm->mi_rows)) {
728     int num_16x16_skin = 0;
729     int num_16x16_nonskin = 0;
730     uint8_t *ysignal = x->plane[0].src.buf;
731     uint8_t *usignal = x->plane[1].src.buf;
732     uint8_t *vsignal = x->plane[2].src.buf;
733     int sp = x->plane[0].src.stride;
734     int spuv = x->plane[1].src.stride;
735     const int block_index = mi_row * cm->mi_cols + mi_col;
736     const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
737     const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
738     const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
739     const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
740     // Loop through the 16x16 sub-blocks.
741     int i, j;
742     for (i = 0; i < ymis; i += 2) {
743       for (j = 0; j < xmis; j += 2) {
744         int bl_index = block_index + i * cm->mi_cols + j;
745         int bl_index1 = bl_index + 1;
746         int bl_index2 = bl_index + cm->mi_cols;
747         int bl_index3 = bl_index2 + 1;
748         int consec_zeromv =
749             VPXMIN(cpi->consec_zero_mv[bl_index],
750                    VPXMIN(cpi->consec_zero_mv[bl_index1],
751                           VPXMIN(cpi->consec_zero_mv[bl_index2],
752                                  cpi->consec_zero_mv[bl_index3])));
753         int is_skin = vp9_compute_skin_block(
754             ysignal, usignal, vsignal, sp, spuv, BLOCK_16X16, consec_zeromv, 0);
755         num_16x16_skin += is_skin;
756         num_16x16_nonskin += (1 - is_skin);
757         if (num_16x16_nonskin > 3) {
758           // Exit loop if at least 4 of the 16x16 blocks are not skin.
759           i = ymis;
760           break;
761         }
762         ysignal += 16;
763         usignal += 8;
764         vsignal += 8;
765       }
766       ysignal += (sp << 4) - 64;
767       usignal += (spuv << 3) - 32;
768       vsignal += (spuv << 3) - 32;
769     }
770     if (num_16x16_skin > 12) {
771       *force_split = 1;
772       return 1;
773     }
774   }
775   return 0;
776 }
777 
set_low_temp_var_flag(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,v64x64 * vt,int64_t thresholds[],MV_REFERENCE_FRAME ref_frame_partition,int mi_col,int mi_row)778 static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
779                                   v64x64 *vt, int64_t thresholds[],
780                                   MV_REFERENCE_FRAME ref_frame_partition,
781                                   int mi_col, int mi_row) {
782   int i, j;
783   VP9_COMMON *const cm = &cpi->common;
784   const int mv_thr = cm->width > 640 ? 8 : 4;
785   // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
786   // int_pro mv is small. If the temporal variance is small set the flag
787   // variance_low for the block. The variance threshold can be adjusted, the
788   // higher the more aggressive.
789   if (ref_frame_partition == LAST_FRAME &&
790       (cpi->sf.short_circuit_low_temp_var == 1 ||
791        (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
792         xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
793         xd->mi[0]->mv[0].as_mv.row < mv_thr &&
794         xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
795     if (xd->mi[0]->sb_type == BLOCK_64X64) {
796       if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
797         x->variance_low[0] = 1;
798     } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
799       for (i = 0; i < 2; i++) {
800         if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
801           x->variance_low[i + 1] = 1;
802       }
803     } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
804       for (i = 0; i < 2; i++) {
805         if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
806           x->variance_low[i + 3] = 1;
807       }
808     } else {
809       for (i = 0; i < 4; i++) {
810         const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } };
811         const int idx_str =
812             cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
813         MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
814 
815         if (cm->mi_cols <= mi_col + idx[i][1] ||
816             cm->mi_rows <= mi_row + idx[i][0])
817           continue;
818 
819         if ((*this_mi)->sb_type == BLOCK_32X32) {
820           int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 ||
821                                      cpi->sf.short_circuit_low_temp_var == 3)
822                                         ? ((5 * thresholds[1]) >> 3)
823                                         : (thresholds[1] >> 1);
824           if (vt->split[i].part_variances.none.variance < threshold_32x32)
825             x->variance_low[i + 5] = 1;
826         } else if (cpi->sf.short_circuit_low_temp_var >= 2) {
827           // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
828           // inside.
829           if ((*this_mi)->sb_type == BLOCK_16X16 ||
830               (*this_mi)->sb_type == BLOCK_32X16 ||
831               (*this_mi)->sb_type == BLOCK_16X32) {
832             for (j = 0; j < 4; j++) {
833               if (vt->split[i].split[j].part_variances.none.variance <
834                   (thresholds[2] >> 8))
835                 x->variance_low[(i << 2) + j + 9] = 1;
836             }
837           }
838         }
839       }
840     }
841   }
842 }
843 
copy_partitioning_helper(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,BLOCK_SIZE bsize,int mi_row,int mi_col)844 static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x,
845                                      MACROBLOCKD *xd, BLOCK_SIZE bsize,
846                                      int mi_row, int mi_col) {
847   VP9_COMMON *const cm = &cpi->common;
848   BLOCK_SIZE *prev_part = cpi->prev_partition;
849   int start_pos = mi_row * cm->mi_stride + mi_col;
850 
851   const int bsl = b_width_log2_lookup[bsize];
852   const int bs = (1 << bsl) / 4;
853   BLOCK_SIZE subsize;
854   PARTITION_TYPE partition;
855 
856   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
857 
858   partition = partition_lookup[bsl][prev_part[start_pos]];
859   subsize = get_subsize(bsize, partition);
860 
861   if (subsize < BLOCK_8X8) {
862     set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
863   } else {
864     switch (partition) {
865       case PARTITION_NONE:
866         set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
867         break;
868       case PARTITION_HORZ:
869         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
870         set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize);
871         break;
872       case PARTITION_VERT:
873         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
874         set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize);
875         break;
876       case PARTITION_SPLIT:
877         copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col);
878         copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col);
879         copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs);
880         copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs);
881         break;
882       default: assert(0);
883     }
884   }
885 }
886 
copy_partitioning(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,int mi_row,int mi_col,int segment_id,int sb_offset)887 static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
888                              int mi_row, int mi_col, int segment_id,
889                              int sb_offset) {
890   int svc_copy_allowed = 1;
891   int frames_since_key_thresh = 1;
892   if (cpi->use_svc) {
893     // For SVC, don't allow copy if base spatial layer is key frame, or if
894     // frame is not a temporal enhancement layer frame.
895     int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id,
896                                  cpi->svc.number_temporal_layers);
897     const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
898     if (lc->is_key_frame ||
899         (cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1 &&
900          cpi->svc.number_temporal_layers > 1))
901       svc_copy_allowed = 0;
902     frames_since_key_thresh = cpi->svc.number_spatial_layers << 1;
903   }
904   if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed &&
905       !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE &&
906       cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE &&
907       cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) {
908     if (cpi->prev_partition != NULL) {
909       copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col);
910       cpi->copied_frame_cnt[sb_offset] += 1;
911       memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]),
912              sizeof(x->variance_low));
913       return 1;
914     }
915   }
916 
917   return 0;
918 }
919 
update_prev_partition(VP9_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)920 static void update_prev_partition(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
921                                   int mi_col) {
922   VP9_COMMON *const cm = &cpi->common;
923   BLOCK_SIZE *prev_part = cpi->prev_partition;
924   int start_pos = mi_row * cm->mi_stride + mi_col;
925   const int bsl = b_width_log2_lookup[bsize];
926   const int bs = (1 << bsl) / 4;
927   BLOCK_SIZE subsize;
928   PARTITION_TYPE partition;
929   const MODE_INFO *mi = NULL;
930 
931   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
932 
933   mi = cm->mi_grid_visible[start_pos];
934   partition = partition_lookup[bsl][mi->sb_type];
935   subsize = get_subsize(bsize, partition);
936   if (subsize < BLOCK_8X8) {
937     prev_part[start_pos] = bsize;
938   } else {
939     switch (partition) {
940       case PARTITION_NONE: prev_part[start_pos] = bsize; break;
941       case PARTITION_HORZ:
942         prev_part[start_pos] = subsize;
943         if (mi_row + bs < cm->mi_rows)
944           prev_part[start_pos + bs * cm->mi_stride] = subsize;
945         break;
946       case PARTITION_VERT:
947         prev_part[start_pos] = subsize;
948         if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
949         break;
950       case PARTITION_SPLIT:
951         update_prev_partition(cpi, subsize, mi_row, mi_col);
952         update_prev_partition(cpi, subsize, mi_row + bs, mi_col);
953         update_prev_partition(cpi, subsize, mi_row, mi_col + bs);
954         update_prev_partition(cpi, subsize, mi_row + bs, mi_col + bs);
955         break;
956       default: assert(0);
957     }
958   }
959 }
960 
chroma_check(VP9_COMP * cpi,MACROBLOCK * x,int bsize,unsigned int y_sad,int is_key_frame)961 static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize,
962                          unsigned int y_sad, int is_key_frame) {
963   int i;
964   MACROBLOCKD *xd = &x->e_mbd;
965 
966   if (is_key_frame) return;
967 
968   // For speed >= 8, avoid the chroma check if y_sad is above threshold.
969   if (cpi->oxcf.speed >= 8) {
970     if (y_sad > cpi->vbp_thresholds[1] &&
971         (!cpi->noise_estimate.enabled ||
972          vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium))
973       return;
974   }
975 
976   for (i = 1; i <= 2; ++i) {
977     unsigned int uv_sad = UINT_MAX;
978     struct macroblock_plane *p = &x->plane[i];
979     struct macroblockd_plane *pd = &xd->plane[i];
980     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
981 
982     if (bs != BLOCK_INVALID)
983       uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
984                                    pd->dst.stride);
985 
986     // TODO(marpan): Investigate if we should lower this threshold if
987     // superblock is detected as skin.
988     x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
989   }
990 }
991 
avg_source_sad(VP9_COMP * cpi,MACROBLOCK * x,int shift,int sb_offset)992 static void avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift,
993                            int sb_offset) {
994   unsigned int tmp_sse;
995   uint64_t tmp_sad;
996   unsigned int tmp_variance;
997   const BLOCK_SIZE bsize = BLOCK_64X64;
998   uint8_t *src_y = cpi->Source->y_buffer;
999   int src_ystride = cpi->Source->y_stride;
1000   uint8_t *last_src_y = cpi->Last_Source->y_buffer;
1001   int last_src_ystride = cpi->Last_Source->y_stride;
1002   uint64_t avg_source_sad_threshold = 10000;
1003   uint64_t avg_source_sad_threshold2 = 12000;
1004 #if CONFIG_VP9_HIGHBITDEPTH
1005   if (cpi->common.use_highbitdepth) return;
1006 #endif
1007   src_y += shift;
1008   last_src_y += shift;
1009   tmp_sad =
1010       cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride);
1011   tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y,
1012                                    last_src_ystride, &tmp_sse);
1013   // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
1014   if (tmp_sad < avg_source_sad_threshold)
1015     x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff
1016                                                           : kLowSadHighSumdiff;
1017   else
1018     x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff
1019                                                           : kHighSadHighSumdiff;
1020 
1021   // Detect large lighting change.
1022   if (tmp_variance < (tmp_sse >> 3) && (tmp_sse - tmp_variance) > 10000)
1023     x->content_state_sb = kLowVarHighSumdiff;
1024 
1025   if (cpi->content_state_sb_fd != NULL) {
1026     if (tmp_sad < avg_source_sad_threshold2) {
1027       // Cap the increment to 255.
1028       if (cpi->content_state_sb_fd[sb_offset] < 255)
1029         cpi->content_state_sb_fd[sb_offset]++;
1030     } else {
1031       cpi->content_state_sb_fd[sb_offset] = 0;
1032     }
1033   }
1034   return;
1035 }
1036 
1037 // This function chooses partitioning based on the variance between source and
1038 // reconstructed last, where variance is computed for down-sampled inputs.
choose_partitioning(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)1039 static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
1040                                MACROBLOCK *x, int mi_row, int mi_col) {
1041   VP9_COMMON *const cm = &cpi->common;
1042   MACROBLOCKD *xd = &x->e_mbd;
1043   int i, j, k, m;
1044   v64x64 vt;
1045   v16x16 vt2[16];
1046   int force_split[21];
1047   int avg_32x32;
1048   int max_var_32x32 = 0;
1049   int min_var_32x32 = INT_MAX;
1050   int var_32x32;
1051   int avg_16x16[4];
1052   int maxvar_16x16[4];
1053   int minvar_16x16[4];
1054   int64_t threshold_4x4avg;
1055   NOISE_LEVEL noise_level = kLow;
1056   int content_state = 0;
1057   uint8_t *s;
1058   const uint8_t *d;
1059   int sp;
1060   int dp;
1061   unsigned int y_sad = UINT_MAX;
1062   BLOCK_SIZE bsize = BLOCK_64X64;
1063   // Ref frame used in partitioning.
1064   MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
1065   int pixels_wide = 64, pixels_high = 64;
1066   int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
1067                             cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] };
1068 
1069   // For the variance computation under SVC mode, we treat the frame as key if
1070   // the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
1071   const int is_key_frame =
1072       (cm->frame_type == KEY_FRAME ||
1073        (is_one_pass_cbr_svc(cpi) &&
1074         cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
1075   // Always use 4x4 partition for key frame.
1076   const int use_4x4_partition = cm->frame_type == KEY_FRAME;
1077   const int low_res = (cm->width <= 352 && cm->height <= 288);
1078   int variance4x4downsample[16];
1079   int segment_id;
1080   int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3);
1081 
1082   set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
1083   segment_id = xd->mi[0]->segment_id;
1084 
1085   if (cpi->sf.use_source_sad && !is_key_frame) {
1086     int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
1087     content_state = x->content_state_sb;
1088     x->skip_low_source_sad = (content_state == kLowSadLowSumdiff ||
1089                               content_state == kLowSadHighSumdiff)
1090                                  ? 1
1091                                  : 0;
1092     x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0;
1093     if (cpi->content_state_sb_fd != NULL)
1094       x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2];
1095     // If source_sad is low copy the partition without computing the y_sad.
1096     if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&
1097         copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
1098       return 0;
1099     }
1100   }
1101 
1102   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
1103       cyclic_refresh_segment_id_boosted(segment_id)) {
1104     int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
1105     set_vbp_thresholds(cpi, thresholds, q, content_state);
1106   } else {
1107     set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state);
1108   }
1109 
1110   // For non keyframes, disable 4x4 average for low resolution when speed = 8
1111   threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX;
1112 
1113   memset(x->variance_low, 0, sizeof(x->variance_low));
1114 
1115   if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
1116   if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
1117 
1118   s = x->plane[0].src.buf;
1119   sp = x->plane[0].src.stride;
1120 
1121   // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
1122   // 5-20 for the 16x16 blocks.
1123   force_split[0] = 0;
1124 
1125   if (!is_key_frame) {
1126     // In the case of spatial/temporal scalable coding, the assumption here is
1127     // that the temporal reference frame will always be of type LAST_FRAME.
1128     // TODO(marpan): If that assumption is broken, we need to revisit this code.
1129     MODE_INFO *mi = xd->mi[0];
1130     YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
1131 
1132     const YV12_BUFFER_CONFIG *yv12_g = NULL;
1133     unsigned int y_sad_g, y_sad_thr, y_sad_last;
1134     bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
1135             (mi_row + 4 < cm->mi_rows);
1136 
1137     assert(yv12 != NULL);
1138 
1139     if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
1140       // For now, GOLDEN will not be used for non-zero spatial layers, since
1141       // it may not be a temporal reference.
1142       yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
1143     }
1144 
1145     // Only compute y_sad_g (sad for golden reference) for speed < 8.
1146     if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
1147         (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
1148       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1149                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
1150       y_sad_g = cpi->fn_ptr[bsize].sdf(
1151           x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
1152           xd->plane[0].pre[0].stride);
1153     } else {
1154       y_sad_g = UINT_MAX;
1155     }
1156 
1157     if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
1158         cpi->rc.is_src_frame_alt_ref) {
1159       yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
1160       vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
1161                            &cm->frame_refs[ALTREF_FRAME - 1].sf);
1162       mi->ref_frame[0] = ALTREF_FRAME;
1163       y_sad_g = UINT_MAX;
1164     } else {
1165       vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
1166                            &cm->frame_refs[LAST_FRAME - 1].sf);
1167       mi->ref_frame[0] = LAST_FRAME;
1168     }
1169     mi->ref_frame[1] = NONE;
1170     mi->sb_type = BLOCK_64X64;
1171     mi->mv[0].as_int = 0;
1172     mi->interp_filter = BILINEAR;
1173 
1174     if (cpi->oxcf.speed >= 8 && !low_res)
1175       y_sad = cpi->fn_ptr[bsize].sdf(
1176           x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
1177           xd->plane[0].pre[0].stride);
1178     else
1179       y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
1180 
1181     y_sad_last = y_sad;
1182     // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
1183     // are close if short_circuit_low_temp_var is on.
1184     y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
1185     if (y_sad_g < y_sad_thr) {
1186       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1187                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
1188       mi->ref_frame[0] = GOLDEN_FRAME;
1189       mi->mv[0].as_int = 0;
1190       y_sad = y_sad_g;
1191       ref_frame_partition = GOLDEN_FRAME;
1192     } else {
1193       x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
1194       ref_frame_partition = LAST_FRAME;
1195     }
1196 
1197     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
1198     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
1199 
1200     x->sb_is_skin = skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split);
1201 
1202     d = xd->plane[0].dst.buf;
1203     dp = xd->plane[0].dst.stride;
1204 
1205     // If the y_sad is very small, take 64x64 as partition and exit.
1206     // Don't check on boosted segment for now, as 64x64 is suppressed there.
1207     if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
1208       const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64];
1209       const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64];
1210       if (mi_col + block_width / 2 < cm->mi_cols &&
1211           mi_row + block_height / 2 < cm->mi_rows) {
1212         set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64);
1213         x->variance_low[0] = 1;
1214         chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1215         return 0;
1216       }
1217     }
1218 
1219     // If the y_sad is small enough, copy the partition of the superblock in the
1220     // last frame to current frame only if the last frame is not a keyframe.
1221     // Stop the copy every cpi->max_copied_frame to refresh the partition.
1222     // TODO(jianj) : tune the threshold.
1223     if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy &&
1224         copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
1225       chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1226       return 0;
1227     }
1228   } else {
1229     d = VP9_VAR_OFFS;
1230     dp = 0;
1231 #if CONFIG_VP9_HIGHBITDEPTH
1232     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1233       switch (xd->bd) {
1234         case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break;
1235         case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break;
1236         case 8:
1237         default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break;
1238       }
1239     }
1240 #endif  // CONFIG_VP9_HIGHBITDEPTH
1241   }
1242 
1243   // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
1244   // for splits.
1245   for (i = 0; i < 4; i++) {
1246     const int x32_idx = ((i & 1) << 5);
1247     const int y32_idx = ((i >> 1) << 5);
1248     const int i2 = i << 2;
1249     force_split[i + 1] = 0;
1250     avg_16x16[i] = 0;
1251     maxvar_16x16[i] = 0;
1252     minvar_16x16[i] = INT_MAX;
1253     for (j = 0; j < 4; j++) {
1254       const int x16_idx = x32_idx + ((j & 1) << 4);
1255       const int y16_idx = y32_idx + ((j >> 1) << 4);
1256       const int split_index = 5 + i2 + j;
1257       v16x16 *vst = &vt.split[i].split[j];
1258       force_split[split_index] = 0;
1259       variance4x4downsample[i2 + j] = 0;
1260       if (!is_key_frame) {
1261         fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst,
1262 #if CONFIG_VP9_HIGHBITDEPTH
1263                              xd->cur_buf->flags,
1264 #endif
1265                              pixels_wide, pixels_high, is_key_frame);
1266         fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
1267         get_variance(&vt.split[i].split[j].part_variances.none);
1268         avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance;
1269         if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i])
1270           minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
1271         if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i])
1272           maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
1273         if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) {
1274           // 16X16 variance is above threshold for split, so force split to 8x8
1275           // for this 16x16 block (this also forces splits for upper levels).
1276           force_split[split_index] = 1;
1277           force_split[i + 1] = 1;
1278           force_split[0] = 1;
1279         } else if (cpi->oxcf.speed < 8 &&
1280                    vt.split[i].split[j].part_variances.none.variance >
1281                        thresholds[1] &&
1282                    !cyclic_refresh_segment_id_boosted(segment_id)) {
1283           // We have some nominal amount of 16x16 variance (based on average),
1284           // compute the minmax over the 8x8 sub-blocks, and if above threshold,
1285           // force split to 8x8 block for this 16x16 block.
1286           int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
1287 #if CONFIG_VP9_HIGHBITDEPTH
1288                                           xd->cur_buf->flags,
1289 #endif
1290                                           pixels_wide, pixels_high);
1291           if (minmax > cpi->vbp_threshold_minmax) {
1292             force_split[split_index] = 1;
1293             force_split[i + 1] = 1;
1294             force_split[0] = 1;
1295           }
1296         }
1297       }
1298       if (is_key_frame || (low_res &&
1299                            vt.split[i].split[j].part_variances.none.variance >
1300                                threshold_4x4avg)) {
1301         force_split[split_index] = 0;
1302         // Go down to 4x4 down-sampling for variance.
1303         variance4x4downsample[i2 + j] = 1;
1304         for (k = 0; k < 4; k++) {
1305           int x8_idx = x16_idx + ((k & 1) << 3);
1306           int y8_idx = y16_idx + ((k >> 1) << 3);
1307           v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
1308           fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
1309 #if CONFIG_VP9_HIGHBITDEPTH
1310                                xd->cur_buf->flags,
1311 #endif
1312                                pixels_wide, pixels_high, is_key_frame);
1313         }
1314       }
1315     }
1316   }
1317   if (cpi->noise_estimate.enabled)
1318     noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate);
1319   // Fill the rest of the variance tree by summing split partition values.
1320   avg_32x32 = 0;
1321   for (i = 0; i < 4; i++) {
1322     const int i2 = i << 2;
1323     for (j = 0; j < 4; j++) {
1324       if (variance4x4downsample[i2 + j] == 1) {
1325         v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j];
1326         for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
1327         fill_variance_tree(vtemp, BLOCK_16X16);
1328         // If variance of this 16x16 block is above the threshold, force block
1329         // to split. This also forces a split on the upper levels.
1330         get_variance(&vtemp->part_variances.none);
1331         if (vtemp->part_variances.none.variance > thresholds[2]) {
1332           force_split[5 + i2 + j] = 1;
1333           force_split[i + 1] = 1;
1334           force_split[0] = 1;
1335         }
1336       }
1337     }
1338     fill_variance_tree(&vt.split[i], BLOCK_32X32);
1339     // If variance of this 32x32 block is above the threshold, or if its above
1340     // (some threshold of) the average variance over the sub-16x16 blocks, then
1341     // force this block to split. This also forces a split on the upper
1342     // (64x64) level.
1343     if (!force_split[i + 1]) {
1344       get_variance(&vt.split[i].part_variances.none);
1345       var_32x32 = vt.split[i].part_variances.none.variance;
1346       max_var_32x32 = VPXMAX(var_32x32, max_var_32x32);
1347       min_var_32x32 = VPXMIN(var_32x32, min_var_32x32);
1348       if (vt.split[i].part_variances.none.variance > thresholds[1] ||
1349           (!is_key_frame &&
1350            vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
1351            vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) {
1352         force_split[i + 1] = 1;
1353         force_split[0] = 1;
1354       } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 &&
1355                  (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) &&
1356                  maxvar_16x16[i] > thresholds[1]) {
1357         force_split[i + 1] = 1;
1358         force_split[0] = 1;
1359       }
1360       avg_32x32 += var_32x32;
1361     }
1362   }
1363   if (!force_split[0]) {
1364     fill_variance_tree(&vt, BLOCK_64X64);
1365     get_variance(&vt.part_variances.none);
1366     // If variance of this 64x64 block is above (some threshold of) the average
1367     // variance over the sub-32x32 blocks, then force this block to split.
1368     // Only checking this for noise level >= medium for now.
1369     if (!is_key_frame && noise_level >= kMedium &&
1370         vt.part_variances.none.variance > (9 * avg_32x32) >> 5)
1371       force_split[0] = 1;
1372     // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in
1373     // a 64x64 block is greater than threshold and the maximum 32x32 variance is
1374     // above a miniumum threshold, then force the split of a 64x64 block
1375     // Only check this for low noise.
1376     else if (!is_key_frame && noise_level < kMedium &&
1377              (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) &&
1378              max_var_32x32 > thresholds[0] >> 1)
1379       force_split[0] = 1;
1380   }
1381 
1382   // Now go through the entire structure, splitting every block size until
1383   // we get to one that's got a variance lower than our threshold.
1384   if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
1385       !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col,
1386                            thresholds[0], BLOCK_16X16, force_split[0])) {
1387     for (i = 0; i < 4; ++i) {
1388       const int x32_idx = ((i & 1) << 2);
1389       const int y32_idx = ((i >> 1) << 2);
1390       const int i2 = i << 2;
1391       if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32,
1392                                (mi_row + y32_idx), (mi_col + x32_idx),
1393                                thresholds[1], BLOCK_16X16,
1394                                force_split[i + 1])) {
1395         for (j = 0; j < 4; ++j) {
1396           const int x16_idx = ((j & 1) << 1);
1397           const int y16_idx = ((j >> 1) << 1);
1398           // For inter frames: if variance4x4downsample[] == 1 for this 16x16
1399           // block, then the variance is based on 4x4 down-sampling, so use vt2
1400           // in set_vt_partioning(), otherwise use vt.
1401           v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1)
1402                               ? &vt2[i2 + j]
1403                               : &vt.split[i].split[j];
1404           if (!set_vt_partitioning(
1405                   cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx,
1406                   mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min,
1407                   force_split[5 + i2 + j])) {
1408             for (k = 0; k < 4; ++k) {
1409               const int x8_idx = (k & 1);
1410               const int y8_idx = (k >> 1);
1411               if (use_4x4_partition) {
1412                 if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k],
1413                                          BLOCK_8X8,
1414                                          mi_row + y32_idx + y16_idx + y8_idx,
1415                                          mi_col + x32_idx + x16_idx + x8_idx,
1416                                          thresholds[3], BLOCK_8X8, 0)) {
1417                   set_block_size(
1418                       cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
1419                       (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4);
1420                 }
1421               } else {
1422                 set_block_size(
1423                     cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
1424                     (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8);
1425               }
1426             }
1427           }
1428         }
1429       }
1430     }
1431   }
1432 
1433   if (cm->frame_type != KEY_FRAME && cpi->sf.copy_partition_flag) {
1434     update_prev_partition(cpi, BLOCK_64X64, mi_row, mi_col);
1435     cpi->prev_segment_id[sb_offset] = segment_id;
1436     memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low,
1437            sizeof(x->variance_low));
1438     // Reset the counter for copy partitioning
1439     if (cpi->copied_frame_cnt[sb_offset] == cpi->max_copied_frame)
1440       cpi->copied_frame_cnt[sb_offset] = 0;
1441   }
1442 
1443   if (cpi->sf.short_circuit_low_temp_var) {
1444     set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition,
1445                           mi_col, mi_row);
1446   }
1447 
1448   chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1449   return 0;
1450 }
1451 
update_state(VP9_COMP * cpi,ThreadData * td,PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,int output_enabled)1452 static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx,
1453                          int mi_row, int mi_col, BLOCK_SIZE bsize,
1454                          int output_enabled) {
1455   int i, x_idx, y;
1456   VP9_COMMON *const cm = &cpi->common;
1457   RD_COUNTS *const rdc = &td->rd_counts;
1458   MACROBLOCK *const x = &td->mb;
1459   MACROBLOCKD *const xd = &x->e_mbd;
1460   struct macroblock_plane *const p = x->plane;
1461   struct macroblockd_plane *const pd = xd->plane;
1462   MODE_INFO *mi = &ctx->mic;
1463   MODE_INFO *const xdmi = xd->mi[0];
1464   MODE_INFO *mi_addr = xd->mi[0];
1465   const struct segmentation *const seg = &cm->seg;
1466   const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
1467   const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
1468   const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
1469   const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
1470   MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
1471   int w, h;
1472 
1473   const int mis = cm->mi_stride;
1474   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1475   const int mi_height = num_8x8_blocks_high_lookup[bsize];
1476   int max_plane;
1477 
1478   assert(mi->sb_type == bsize);
1479 
1480   *mi_addr = *mi;
1481   *x->mbmi_ext = ctx->mbmi_ext;
1482 
1483   // If segmentation in use
1484   if (seg->enabled) {
1485     // For in frame complexity AQ copy the segment id from the segment map.
1486     if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
1487       const uint8_t *const map =
1488           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1489       mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1490     }
1491     // Else for cyclic refresh mode update the segment map, set the segment id
1492     // and then update the quantizer.
1493     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
1494       vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize,
1495                                         ctx->rate, ctx->dist, x->skip, p);
1496     }
1497   }
1498 
1499   max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1;
1500   for (i = 0; i < max_plane; ++i) {
1501     p[i].coeff = ctx->coeff_pbuf[i][1];
1502     p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
1503     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
1504     p[i].eobs = ctx->eobs_pbuf[i][1];
1505   }
1506 
1507   for (i = max_plane; i < MAX_MB_PLANE; ++i) {
1508     p[i].coeff = ctx->coeff_pbuf[i][2];
1509     p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1510     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1511     p[i].eobs = ctx->eobs_pbuf[i][2];
1512   }
1513 
1514   // Restore the coding context of the MB to that that was in place
1515   // when the mode was picked for it
1516   for (y = 0; y < mi_height; y++)
1517     for (x_idx = 0; x_idx < mi_width; x_idx++)
1518       if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
1519           (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
1520         xd->mi[x_idx + y * mis] = mi_addr;
1521       }
1522 
1523   if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x);
1524 
1525   if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) {
1526     xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
1527     xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
1528   }
1529 
1530   x->skip = ctx->skip;
1531   memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk,
1532          sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
1533 
1534   if (!output_enabled) return;
1535 
1536 #if CONFIG_INTERNAL_STATS
1537   if (frame_is_intra_only(cm)) {
1538     static const int kf_mode_index[] = {
1539       THR_DC /*DC_PRED*/,          THR_V_PRED /*V_PRED*/,
1540       THR_H_PRED /*H_PRED*/,       THR_D45_PRED /*D45_PRED*/,
1541       THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
1542       THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
1543       THR_D63_PRED /*D63_PRED*/,   THR_TM /*TM_PRED*/,
1544     };
1545     ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]];
1546   } else {
1547     // Note how often each mode chosen as best
1548     ++cpi->mode_chosen_counts[ctx->best_mode_index];
1549   }
1550 #endif
1551   if (!frame_is_intra_only(cm)) {
1552     if (is_inter_block(xdmi)) {
1553       vp9_update_mv_count(td);
1554 
1555       if (cm->interp_filter == SWITCHABLE) {
1556         const int ctx = get_pred_context_switchable_interp(xd);
1557         ++td->counts->switchable_interp[ctx][xdmi->interp_filter];
1558       }
1559     }
1560 
1561     rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
1562     rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
1563     rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
1564 
1565     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
1566       rdc->filter_diff[i] += ctx->best_filter_diff[i];
1567   }
1568 
1569   for (h = 0; h < y_mis; ++h) {
1570     MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
1571     for (w = 0; w < x_mis; ++w) {
1572       MV_REF *const mv = frame_mv + w;
1573       mv->ref_frame[0] = mi->ref_frame[0];
1574       mv->ref_frame[1] = mi->ref_frame[1];
1575       mv->mv[0].as_int = mi->mv[0].as_int;
1576       mv->mv[1].as_int = mi->mv[1].as_int;
1577     }
1578   }
1579 }
1580 
vp9_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col)1581 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
1582                           int mi_row, int mi_col) {
1583   uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer };
1584   const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
1585   int i;
1586 
1587   // Set current frame pointer.
1588   x->e_mbd.cur_buf = src;
1589 
1590   for (i = 0; i < MAX_MB_PLANE; i++)
1591     setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,
1592                      NULL, x->e_mbd.plane[i].subsampling_x,
1593                      x->e_mbd.plane[i].subsampling_y);
1594 }
1595 
set_mode_info_seg_skip(MACROBLOCK * x,TX_MODE tx_mode,RD_COST * rd_cost,BLOCK_SIZE bsize)1596 static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
1597                                    RD_COST *rd_cost, BLOCK_SIZE bsize) {
1598   MACROBLOCKD *const xd = &x->e_mbd;
1599   MODE_INFO *const mi = xd->mi[0];
1600   INTERP_FILTER filter_ref;
1601 
1602   filter_ref = get_pred_context_switchable_interp(xd);
1603   if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP;
1604 
1605   mi->sb_type = bsize;
1606   mi->mode = ZEROMV;
1607   mi->tx_size =
1608       VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]);
1609   mi->skip = 1;
1610   mi->uv_mode = DC_PRED;
1611   mi->ref_frame[0] = LAST_FRAME;
1612   mi->ref_frame[1] = NONE;
1613   mi->mv[0].as_int = 0;
1614   mi->interp_filter = filter_ref;
1615 
1616   xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
1617   x->skip = 1;
1618 
1619   vp9_rd_cost_init(rd_cost);
1620 }
1621 
set_segment_rdmult(VP9_COMP * const cpi,MACROBLOCK * const x,int8_t segment_id)1622 static int set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
1623                               int8_t segment_id) {
1624   int segment_qindex;
1625   VP9_COMMON *const cm = &cpi->common;
1626   vp9_init_plane_quantizers(cpi, x);
1627   vpx_clear_system_state();
1628   segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
1629   return vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
1630 }
1631 
rd_pick_sb_modes(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)1632 static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
1633                              MACROBLOCK *const x, int mi_row, int mi_col,
1634                              RD_COST *rd_cost, BLOCK_SIZE bsize,
1635                              PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
1636   VP9_COMMON *const cm = &cpi->common;
1637   TileInfo *const tile_info = &tile_data->tile_info;
1638   MACROBLOCKD *const xd = &x->e_mbd;
1639   MODE_INFO *mi;
1640   struct macroblock_plane *const p = x->plane;
1641   struct macroblockd_plane *const pd = xd->plane;
1642   const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
1643   int i, orig_rdmult;
1644 
1645   vpx_clear_system_state();
1646 
1647   // Use the lower precision, but faster, 32x32 fdct for mode selection.
1648   x->use_lp32x32fdct = 1;
1649 
1650   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1651   mi = xd->mi[0];
1652   mi->sb_type = bsize;
1653 
1654   for (i = 0; i < MAX_MB_PLANE; ++i) {
1655     p[i].coeff = ctx->coeff_pbuf[i][0];
1656     p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
1657     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
1658     p[i].eobs = ctx->eobs_pbuf[i][0];
1659   }
1660   ctx->is_coded = 0;
1661   ctx->skippable = 0;
1662   ctx->pred_pixel_ready = 0;
1663   x->skip_recode = 0;
1664 
1665   // Set to zero to make sure we do not use the previous encoded frame stats
1666   mi->skip = 0;
1667 
1668 #if CONFIG_VP9_HIGHBITDEPTH
1669   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1670     x->source_variance = vp9_high_get_sby_perpixel_variance(
1671         cpi, &x->plane[0].src, bsize, xd->bd);
1672   } else {
1673     x->source_variance =
1674         vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
1675   }
1676 #else
1677   x->source_variance =
1678       vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
1679 #endif  // CONFIG_VP9_HIGHBITDEPTH
1680 
1681   // Save rdmult before it might be changed, so it can be restored later.
1682   orig_rdmult = x->rdmult;
1683 
1684   if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) {
1685     double logvar = vp9_log_block_var(cpi, x, bsize);
1686     // Check block complexity as part of descision on using pixel or transform
1687     // domain distortion in rd tests.
1688     x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion &&
1689                          (logvar >= cpi->sf.tx_domain_thresh);
1690 
1691     // Check block complexity as part of descision on using quantized
1692     // coefficient optimisation inside the rd loop.
1693     x->block_qcoeff_opt =
1694         cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh);
1695   } else {
1696     x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion;
1697     x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt;
1698   }
1699 
1700   if (aq_mode == VARIANCE_AQ) {
1701     const int energy =
1702         bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize);
1703 
1704     if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
1705         cpi->force_update_segmentation ||
1706         (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
1707       mi->segment_id = vp9_vaq_segment_id(energy);
1708     } else {
1709       const uint8_t *const map =
1710           cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1711       mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1712     }
1713     x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
1714   } else if (aq_mode == LOOKAHEAD_AQ) {
1715     const uint8_t *const map = cpi->segmentation_map;
1716 
1717     // I do not change rdmult here consciously.
1718     mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1719   } else if (aq_mode == EQUATOR360_AQ) {
1720     if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) {
1721       mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows);
1722     } else {
1723       const uint8_t *const map =
1724           cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1725       mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1726     }
1727     x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
1728   } else if (aq_mode == COMPLEXITY_AQ) {
1729     x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
1730   } else if (aq_mode == CYCLIC_REFRESH_AQ) {
1731     const uint8_t *const map =
1732         cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1733     // If segment is boosted, use rdmult for that segment.
1734     if (cyclic_refresh_segment_id_boosted(
1735             get_segment_id(cm, map, bsize, mi_row, mi_col)))
1736       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
1737   }
1738 
1739   // Find best coding mode & reconstruct the MB so it is available
1740   // as a predictor for MBs that follow in the SB
1741   if (frame_is_intra_only(cm)) {
1742     vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
1743   } else {
1744     if (bsize >= BLOCK_8X8) {
1745       if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
1746         vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
1747                                            ctx, best_rd);
1748       else
1749         vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
1750                                   bsize, ctx, best_rd);
1751     } else {
1752       vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
1753                                     bsize, ctx, best_rd);
1754     }
1755   }
1756 
1757   // Examine the resulting rate and for AQ mode 2 make a segment choice.
1758   if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
1759       (bsize >= BLOCK_16X16) &&
1760       (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
1761        (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
1762     vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
1763   }
1764 
1765   x->rdmult = orig_rdmult;
1766 
1767   // TODO(jingning) The rate-distortion optimization flow needs to be
1768   // refactored to provide proper exit/return handle.
1769   if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
1770 
1771   ctx->rate = rd_cost->rate;
1772   ctx->dist = rd_cost->dist;
1773 }
1774 
update_stats(VP9_COMMON * cm,ThreadData * td)1775 static void update_stats(VP9_COMMON *cm, ThreadData *td) {
1776   const MACROBLOCK *x = &td->mb;
1777   const MACROBLOCKD *const xd = &x->e_mbd;
1778   const MODE_INFO *const mi = xd->mi[0];
1779   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1780   const BLOCK_SIZE bsize = mi->sb_type;
1781 
1782   if (!frame_is_intra_only(cm)) {
1783     FRAME_COUNTS *const counts = td->counts;
1784     const int inter_block = is_inter_block(mi);
1785     const int seg_ref_active =
1786         segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME);
1787     if (!seg_ref_active) {
1788       counts->intra_inter[get_intra_inter_context(xd)][inter_block]++;
1789       // If the segment reference feature is enabled we have only a single
1790       // reference frame allowed for the segment so exclude it from
1791       // the reference frame counts used to work out probabilities.
1792       if (inter_block) {
1793         const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
1794         if (cm->reference_mode == REFERENCE_MODE_SELECT)
1795           counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
1796                             [has_second_ref(mi)]++;
1797 
1798         if (has_second_ref(mi)) {
1799           counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
1800                           [ref0 == GOLDEN_FRAME]++;
1801         } else {
1802           counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
1803                             [ref0 != LAST_FRAME]++;
1804           if (ref0 != LAST_FRAME)
1805             counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
1806                               [ref0 != GOLDEN_FRAME]++;
1807         }
1808       }
1809     }
1810     if (inter_block &&
1811         !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) {
1812       const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
1813       if (bsize >= BLOCK_8X8) {
1814         const PREDICTION_MODE mode = mi->mode;
1815         ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
1816       } else {
1817         const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
1818         const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
1819         int idx, idy;
1820         for (idy = 0; idy < 2; idy += num_4x4_h) {
1821           for (idx = 0; idx < 2; idx += num_4x4_w) {
1822             const int j = idy * 2 + idx;
1823             const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
1824             ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
1825           }
1826         }
1827       }
1828     }
1829   }
1830 }
1831 
restore_context(MACROBLOCK * const x,int mi_row,int mi_col,ENTROPY_CONTEXT a[16* MAX_MB_PLANE],ENTROPY_CONTEXT l[16* MAX_MB_PLANE],PARTITION_CONTEXT sa[8],PARTITION_CONTEXT sl[8],BLOCK_SIZE bsize)1832 static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
1833                             ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
1834                             ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
1835                             PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
1836                             BLOCK_SIZE bsize) {
1837   MACROBLOCKD *const xd = &x->e_mbd;
1838   int p;
1839   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1840   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1841   int mi_width = num_8x8_blocks_wide_lookup[bsize];
1842   int mi_height = num_8x8_blocks_high_lookup[bsize];
1843   for (p = 0; p < MAX_MB_PLANE; p++) {
1844     memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
1845            a + num_4x4_blocks_wide * p,
1846            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1847                xd->plane[p].subsampling_x);
1848     memcpy(xd->left_context[p] +
1849                ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
1850            l + num_4x4_blocks_high * p,
1851            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1852                xd->plane[p].subsampling_y);
1853   }
1854   memcpy(xd->above_seg_context + mi_col, sa,
1855          sizeof(*xd->above_seg_context) * mi_width);
1856   memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
1857          sizeof(xd->left_seg_context[0]) * mi_height);
1858 }
1859 
save_context(MACROBLOCK * const x,int mi_row,int mi_col,ENTROPY_CONTEXT a[16* MAX_MB_PLANE],ENTROPY_CONTEXT l[16* MAX_MB_PLANE],PARTITION_CONTEXT sa[8],PARTITION_CONTEXT sl[8],BLOCK_SIZE bsize)1860 static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
1861                          ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
1862                          ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
1863                          PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
1864                          BLOCK_SIZE bsize) {
1865   const MACROBLOCKD *const xd = &x->e_mbd;
1866   int p;
1867   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1868   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1869   int mi_width = num_8x8_blocks_wide_lookup[bsize];
1870   int mi_height = num_8x8_blocks_high_lookup[bsize];
1871 
1872   // buffer the above/left context information of the block in search.
1873   for (p = 0; p < MAX_MB_PLANE; ++p) {
1874     memcpy(a + num_4x4_blocks_wide * p,
1875            xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
1876            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1877                xd->plane[p].subsampling_x);
1878     memcpy(l + num_4x4_blocks_high * p,
1879            xd->left_context[p] +
1880                ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
1881            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1882                xd->plane[p].subsampling_y);
1883   }
1884   memcpy(sa, xd->above_seg_context + mi_col,
1885          sizeof(*xd->above_seg_context) * mi_width);
1886   memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
1887          sizeof(xd->left_seg_context[0]) * mi_height);
1888 }
1889 
encode_b(VP9_COMP * cpi,const TileInfo * const tile,ThreadData * td,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)1890 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td,
1891                      TOKENEXTRA **tp, int mi_row, int mi_col,
1892                      int output_enabled, BLOCK_SIZE bsize,
1893                      PICK_MODE_CONTEXT *ctx) {
1894   MACROBLOCK *const x = &td->mb;
1895   set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
1896   update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
1897   encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
1898 
1899   if (output_enabled) {
1900     update_stats(&cpi->common, td);
1901 
1902     (*tp)->token = EOSB_TOKEN;
1903     (*tp)++;
1904   }
1905 }
1906 
encode_sb(VP9_COMP * cpi,ThreadData * td,const TileInfo * const tile,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PC_TREE * pc_tree)1907 static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
1908                       TOKENEXTRA **tp, int mi_row, int mi_col,
1909                       int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
1910   VP9_COMMON *const cm = &cpi->common;
1911   MACROBLOCK *const x = &td->mb;
1912   MACROBLOCKD *const xd = &x->e_mbd;
1913 
1914   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
1915   int ctx;
1916   PARTITION_TYPE partition;
1917   BLOCK_SIZE subsize = bsize;
1918 
1919   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1920 
1921   if (bsize >= BLOCK_8X8) {
1922     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
1923     subsize = get_subsize(bsize, pc_tree->partitioning);
1924   } else {
1925     ctx = 0;
1926     subsize = BLOCK_4X4;
1927   }
1928 
1929   partition = partition_lookup[bsl][subsize];
1930   if (output_enabled && bsize != BLOCK_4X4)
1931     td->counts->partition[ctx][partition]++;
1932 
1933   switch (partition) {
1934     case PARTITION_NONE:
1935       encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1936                &pc_tree->none);
1937       break;
1938     case PARTITION_VERT:
1939       encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1940                &pc_tree->vertical[0]);
1941       if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
1942         encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled,
1943                  subsize, &pc_tree->vertical[1]);
1944       }
1945       break;
1946     case PARTITION_HORZ:
1947       encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1948                &pc_tree->horizontal[0]);
1949       if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
1950         encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled,
1951                  subsize, &pc_tree->horizontal[1]);
1952       }
1953       break;
1954     case PARTITION_SPLIT:
1955       if (bsize == BLOCK_8X8) {
1956         encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1957                  pc_tree->leaf_split[0]);
1958       } else {
1959         encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
1960                   pc_tree->split[0]);
1961         encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
1962                   subsize, pc_tree->split[1]);
1963         encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
1964                   subsize, pc_tree->split[2]);
1965         encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
1966                   subsize, pc_tree->split[3]);
1967       }
1968       break;
1969     default: assert(0 && "Invalid partition type."); break;
1970   }
1971 
1972   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
1973     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
1974 }
1975 
1976 // Check to see if the given partition size is allowed for a specified number
1977 // of 8x8 block rows and columns remaining in the image.
1978 // If not then return the largest allowed partition size
find_partition_size(BLOCK_SIZE bsize,int rows_left,int cols_left,int * bh,int * bw)1979 static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
1980                                       int cols_left, int *bh, int *bw) {
1981   if (rows_left <= 0 || cols_left <= 0) {
1982     return VPXMIN(bsize, BLOCK_8X8);
1983   } else {
1984     for (; bsize > 0; bsize -= 3) {
1985       *bh = num_8x8_blocks_high_lookup[bsize];
1986       *bw = num_8x8_blocks_wide_lookup[bsize];
1987       if ((*bh <= rows_left) && (*bw <= cols_left)) {
1988         break;
1989       }
1990     }
1991   }
1992   return bsize;
1993 }
1994 
set_partial_b64x64_partition(MODE_INFO * mi,int mis,int bh_in,int bw_in,int row8x8_remaining,int col8x8_remaining,BLOCK_SIZE bsize,MODE_INFO ** mi_8x8)1995 static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in,
1996                                          int bw_in, int row8x8_remaining,
1997                                          int col8x8_remaining, BLOCK_SIZE bsize,
1998                                          MODE_INFO **mi_8x8) {
1999   int bh = bh_in;
2000   int r, c;
2001   for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
2002     int bw = bw_in;
2003     for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
2004       const int index = r * mis + c;
2005       mi_8x8[index] = mi + index;
2006       mi_8x8[index]->sb_type = find_partition_size(
2007           bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
2008     }
2009   }
2010 }
2011 
2012 // This function attempts to set all mode info entries in a given SB64
2013 // to the same block partition size.
2014 // However, at the bottom and right borders of the image the requested size
2015 // may not be allowed in which case this code attempts to choose the largest
2016 // allowable partition.
set_fixed_partitioning(VP9_COMP * cpi,const TileInfo * const tile,MODE_INFO ** mi_8x8,int mi_row,int mi_col,BLOCK_SIZE bsize)2017 static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
2018                                    MODE_INFO **mi_8x8, int mi_row, int mi_col,
2019                                    BLOCK_SIZE bsize) {
2020   VP9_COMMON *const cm = &cpi->common;
2021   const int mis = cm->mi_stride;
2022   const int row8x8_remaining = tile->mi_row_end - mi_row;
2023   const int col8x8_remaining = tile->mi_col_end - mi_col;
2024   int block_row, block_col;
2025   MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
2026   int bh = num_8x8_blocks_high_lookup[bsize];
2027   int bw = num_8x8_blocks_wide_lookup[bsize];
2028 
2029   assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
2030 
2031   // Apply the requested partition size to the SB64 if it is all "in image"
2032   if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
2033       (row8x8_remaining >= MI_BLOCK_SIZE)) {
2034     for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
2035       for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
2036         int index = block_row * mis + block_col;
2037         mi_8x8[index] = mi_upper_left + index;
2038         mi_8x8[index]->sb_type = bsize;
2039       }
2040     }
2041   } else {
2042     // Else this is a partial SB64.
2043     set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
2044                                  col8x8_remaining, bsize, mi_8x8);
2045   }
2046 }
2047 
2048 static const struct {
2049   int row;
2050   int col;
2051 } coord_lookup[16] = {
2052   // 32x32 index = 0
2053   { 0, 0 },
2054   { 0, 2 },
2055   { 2, 0 },
2056   { 2, 2 },
2057   // 32x32 index = 1
2058   { 0, 4 },
2059   { 0, 6 },
2060   { 2, 4 },
2061   { 2, 6 },
2062   // 32x32 index = 2
2063   { 4, 0 },
2064   { 4, 2 },
2065   { 6, 0 },
2066   { 6, 2 },
2067   // 32x32 index = 3
2068   { 4, 4 },
2069   { 4, 6 },
2070   { 6, 4 },
2071   { 6, 6 },
2072 };
2073 
set_source_var_based_partition(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * const x,MODE_INFO ** mi_8x8,int mi_row,int mi_col)2074 static void set_source_var_based_partition(VP9_COMP *cpi,
2075                                            const TileInfo *const tile,
2076                                            MACROBLOCK *const x,
2077                                            MODE_INFO **mi_8x8, int mi_row,
2078                                            int mi_col) {
2079   VP9_COMMON *const cm = &cpi->common;
2080   const int mis = cm->mi_stride;
2081   const int row8x8_remaining = tile->mi_row_end - mi_row;
2082   const int col8x8_remaining = tile->mi_col_end - mi_col;
2083   MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
2084 
2085   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
2086 
2087   assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
2088 
2089   // In-image SB64
2090   if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
2091       (row8x8_remaining >= MI_BLOCK_SIZE)) {
2092     int i, j;
2093     int index;
2094     diff d32[4];
2095     const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1);
2096     int is_larger_better = 0;
2097     int use32x32 = 0;
2098     unsigned int thr = cpi->source_var_thresh;
2099 
2100     memset(d32, 0, 4 * sizeof(diff));
2101 
2102     for (i = 0; i < 4; i++) {
2103       diff *d16[4];
2104 
2105       for (j = 0; j < 4; j++) {
2106         int b_mi_row = coord_lookup[i * 4 + j].row;
2107         int b_mi_col = coord_lookup[i * 4 + j].col;
2108         int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2;
2109 
2110         d16[j] = cpi->source_diff_var + offset + boffset;
2111 
2112         index = b_mi_row * mis + b_mi_col;
2113         mi_8x8[index] = mi_upper_left + index;
2114         mi_8x8[index]->sb_type = BLOCK_16X16;
2115 
2116         // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
2117         // size to further improve quality.
2118       }
2119 
2120       is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) &&
2121                          (d16[2]->var < thr) && (d16[3]->var < thr);
2122 
2123       // Use 32x32 partition
2124       if (is_larger_better) {
2125         use32x32 += 1;
2126 
2127         for (j = 0; j < 4; j++) {
2128           d32[i].sse += d16[j]->sse;
2129           d32[i].sum += d16[j]->sum;
2130         }
2131 
2132         d32[i].var =
2133             (unsigned int)(d32[i].sse -
2134                            (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >>
2135                                           10));
2136 
2137         index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col;
2138         mi_8x8[index] = mi_upper_left + index;
2139         mi_8x8[index]->sb_type = BLOCK_32X32;
2140       }
2141     }
2142 
2143     if (use32x32 == 4) {
2144       thr <<= 1;
2145       is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) &&
2146                          (d32[2].var < thr) && (d32[3].var < thr);
2147 
2148       // Use 64x64 partition
2149       if (is_larger_better) {
2150         mi_8x8[0] = mi_upper_left;
2151         mi_8x8[0]->sb_type = BLOCK_64X64;
2152       }
2153     }
2154   } else {  // partial in-image SB64
2155     int bh = num_8x8_blocks_high_lookup[BLOCK_16X16];
2156     int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16];
2157     set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
2158                                  col8x8_remaining, BLOCK_16X16, mi_8x8);
2159   }
2160 }
2161 
update_state_rt(VP9_COMP * cpi,ThreadData * td,PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,int bsize)2162 static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
2163                             PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
2164                             int bsize) {
2165   VP9_COMMON *const cm = &cpi->common;
2166   MACROBLOCK *const x = &td->mb;
2167   MACROBLOCKD *const xd = &x->e_mbd;
2168   MODE_INFO *const mi = xd->mi[0];
2169   struct macroblock_plane *const p = x->plane;
2170   const struct segmentation *const seg = &cm->seg;
2171   const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
2172   const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
2173   const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
2174   const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
2175 
2176   *(xd->mi[0]) = ctx->mic;
2177   *(x->mbmi_ext) = ctx->mbmi_ext;
2178 
2179   if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) {
2180     // For in frame complexity AQ or variance AQ, copy segment_id from
2181     // segmentation_map.
2182     if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) {
2183       const uint8_t *const map =
2184           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
2185       mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
2186     } else {
2187       // Setting segmentation map for cyclic_refresh.
2188       vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize,
2189                                         ctx->rate, ctx->dist, x->skip, p);
2190     }
2191     vp9_init_plane_quantizers(cpi, x);
2192   }
2193 
2194   if (is_inter_block(mi)) {
2195     vp9_update_mv_count(td);
2196     if (cm->interp_filter == SWITCHABLE) {
2197       const int pred_ctx = get_pred_context_switchable_interp(xd);
2198       ++td->counts->switchable_interp[pred_ctx][mi->interp_filter];
2199     }
2200 
2201     if (mi->sb_type < BLOCK_8X8) {
2202       mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
2203       mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
2204     }
2205   }
2206 
2207   if (cm->use_prev_frame_mvs || !cm->error_resilient_mode ||
2208       (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 &&
2209        cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) {
2210     MV_REF *const frame_mvs =
2211         cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
2212     int w, h;
2213 
2214     for (h = 0; h < y_mis; ++h) {
2215       MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
2216       for (w = 0; w < x_mis; ++w) {
2217         MV_REF *const mv = frame_mv + w;
2218         mv->ref_frame[0] = mi->ref_frame[0];
2219         mv->ref_frame[1] = mi->ref_frame[1];
2220         mv->mv[0].as_int = mi->mv[0].as_int;
2221         mv->mv[1].as_int = mi->mv[1].as_int;
2222       }
2223     }
2224   }
2225 
2226   x->skip = ctx->skip;
2227   x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0];
2228 }
2229 
encode_b_rt(VP9_COMP * cpi,ThreadData * td,const TileInfo * const tile,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)2230 static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
2231                         const TileInfo *const tile, TOKENEXTRA **tp, int mi_row,
2232                         int mi_col, int output_enabled, BLOCK_SIZE bsize,
2233                         PICK_MODE_CONTEXT *ctx) {
2234   MACROBLOCK *const x = &td->mb;
2235   set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
2236   update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
2237 
2238   encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
2239   update_stats(&cpi->common, td);
2240 
2241   (*tp)->token = EOSB_TOKEN;
2242   (*tp)++;
2243 }
2244 
encode_sb_rt(VP9_COMP * cpi,ThreadData * td,const TileInfo * const tile,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PC_TREE * pc_tree)2245 static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
2246                          const TileInfo *const tile, TOKENEXTRA **tp,
2247                          int mi_row, int mi_col, int output_enabled,
2248                          BLOCK_SIZE bsize, PC_TREE *pc_tree) {
2249   VP9_COMMON *const cm = &cpi->common;
2250   MACROBLOCK *const x = &td->mb;
2251   MACROBLOCKD *const xd = &x->e_mbd;
2252 
2253   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
2254   int ctx;
2255   PARTITION_TYPE partition;
2256   BLOCK_SIZE subsize;
2257 
2258   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
2259 
2260   if (bsize >= BLOCK_8X8) {
2261     const int idx_str = xd->mi_stride * mi_row + mi_col;
2262     MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
2263     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
2264     subsize = mi_8x8[0]->sb_type;
2265   } else {
2266     ctx = 0;
2267     subsize = BLOCK_4X4;
2268   }
2269 
2270   partition = partition_lookup[bsl][subsize];
2271   if (output_enabled && bsize != BLOCK_4X4)
2272     td->counts->partition[ctx][partition]++;
2273 
2274   switch (partition) {
2275     case PARTITION_NONE:
2276       encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2277                   &pc_tree->none);
2278       break;
2279     case PARTITION_VERT:
2280       encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2281                   &pc_tree->vertical[0]);
2282       if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
2283         encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
2284                     subsize, &pc_tree->vertical[1]);
2285       }
2286       break;
2287     case PARTITION_HORZ:
2288       encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2289                   &pc_tree->horizontal[0]);
2290       if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
2291         encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
2292                     subsize, &pc_tree->horizontal[1]);
2293       }
2294       break;
2295     case PARTITION_SPLIT:
2296       subsize = get_subsize(bsize, PARTITION_SPLIT);
2297       encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2298                    pc_tree->split[0]);
2299       encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
2300                    subsize, pc_tree->split[1]);
2301       encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
2302                    subsize, pc_tree->split[2]);
2303       encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs,
2304                    output_enabled, subsize, pc_tree->split[3]);
2305       break;
2306     default: assert(0 && "Invalid partition type."); break;
2307   }
2308 
2309   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
2310     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
2311 }
2312 
rd_use_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MODE_INFO ** mi_8x8,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate,int64_t * dist,int do_recon,PC_TREE * pc_tree)2313 static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
2314                              TileDataEnc *tile_data, MODE_INFO **mi_8x8,
2315                              TOKENEXTRA **tp, int mi_row, int mi_col,
2316                              BLOCK_SIZE bsize, int *rate, int64_t *dist,
2317                              int do_recon, PC_TREE *pc_tree) {
2318   VP9_COMMON *const cm = &cpi->common;
2319   TileInfo *const tile_info = &tile_data->tile_info;
2320   MACROBLOCK *const x = &td->mb;
2321   MACROBLOCKD *const xd = &x->e_mbd;
2322   const int mis = cm->mi_stride;
2323   const int bsl = b_width_log2_lookup[bsize];
2324   const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
2325   const int bss = (1 << bsl) / 4;
2326   int i, pl;
2327   PARTITION_TYPE partition = PARTITION_NONE;
2328   BLOCK_SIZE subsize;
2329   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2330   PARTITION_CONTEXT sl[8], sa[8];
2331   RD_COST last_part_rdc, none_rdc, chosen_rdc;
2332   BLOCK_SIZE sub_subsize = BLOCK_4X4;
2333   int splits_below = 0;
2334   BLOCK_SIZE bs_type = mi_8x8[0]->sb_type;
2335   int do_partition_search = 1;
2336   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
2337 
2338   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
2339 
2340   assert(num_4x4_blocks_wide_lookup[bsize] ==
2341          num_4x4_blocks_high_lookup[bsize]);
2342 
2343   vp9_rd_cost_reset(&last_part_rdc);
2344   vp9_rd_cost_reset(&none_rdc);
2345   vp9_rd_cost_reset(&chosen_rdc);
2346 
2347   partition = partition_lookup[bsl][bs_type];
2348   subsize = get_subsize(bsize, partition);
2349 
2350   pc_tree->partitioning = partition;
2351   save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2352 
2353   if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) {
2354     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2355     x->mb_energy = vp9_block_energy(cpi, x, bsize);
2356   }
2357 
2358   if (do_partition_search &&
2359       cpi->sf.partition_search_type == SEARCH_PARTITION &&
2360       cpi->sf.adjust_partitioning_from_last_frame) {
2361     // Check if any of the sub blocks are further split.
2362     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
2363       sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
2364       splits_below = 1;
2365       for (i = 0; i < 4; i++) {
2366         int jj = i >> 1, ii = i & 0x01;
2367         MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss];
2368         if (this_mi && this_mi->sb_type >= sub_subsize) {
2369           splits_below = 0;
2370         }
2371       }
2372     }
2373 
2374     // If partition is not none try none unless each of the 4 splits are split
2375     // even further..
2376     if (partition != PARTITION_NONE && !splits_below &&
2377         mi_row + (mi_step >> 1) < cm->mi_rows &&
2378         mi_col + (mi_step >> 1) < cm->mi_cols) {
2379       pc_tree->partitioning = PARTITION_NONE;
2380       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx,
2381                        INT64_MAX);
2382 
2383       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2384 
2385       if (none_rdc.rate < INT_MAX) {
2386         none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
2387         none_rdc.rdcost =
2388             RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist);
2389       }
2390 
2391       restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2392       mi_8x8[0]->sb_type = bs_type;
2393       pc_tree->partitioning = partition;
2394     }
2395   }
2396 
2397   switch (partition) {
2398     case PARTITION_NONE:
2399       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize,
2400                        ctx, INT64_MAX);
2401       break;
2402     case PARTITION_HORZ:
2403       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2404                        subsize, &pc_tree->horizontal[0], INT64_MAX);
2405       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2406           mi_row + (mi_step >> 1) < cm->mi_rows) {
2407         RD_COST tmp_rdc;
2408         PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
2409         vp9_rd_cost_init(&tmp_rdc);
2410         update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
2411         encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
2412         rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
2413                          &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX);
2414         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2415           vp9_rd_cost_reset(&last_part_rdc);
2416           break;
2417         }
2418         last_part_rdc.rate += tmp_rdc.rate;
2419         last_part_rdc.dist += tmp_rdc.dist;
2420         last_part_rdc.rdcost += tmp_rdc.rdcost;
2421       }
2422       break;
2423     case PARTITION_VERT:
2424       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2425                        subsize, &pc_tree->vertical[0], INT64_MAX);
2426       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2427           mi_col + (mi_step >> 1) < cm->mi_cols) {
2428         RD_COST tmp_rdc;
2429         PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
2430         vp9_rd_cost_init(&tmp_rdc);
2431         update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
2432         encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
2433         rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1),
2434                          &tmp_rdc, subsize,
2435                          &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
2436         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2437           vp9_rd_cost_reset(&last_part_rdc);
2438           break;
2439         }
2440         last_part_rdc.rate += tmp_rdc.rate;
2441         last_part_rdc.dist += tmp_rdc.dist;
2442         last_part_rdc.rdcost += tmp_rdc.rdcost;
2443       }
2444       break;
2445     case PARTITION_SPLIT:
2446       if (bsize == BLOCK_8X8) {
2447         rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2448                          subsize, pc_tree->leaf_split[0], INT64_MAX);
2449         break;
2450       }
2451       last_part_rdc.rate = 0;
2452       last_part_rdc.dist = 0;
2453       last_part_rdc.rdcost = 0;
2454       for (i = 0; i < 4; i++) {
2455         int x_idx = (i & 1) * (mi_step >> 1);
2456         int y_idx = (i >> 1) * (mi_step >> 1);
2457         int jj = i >> 1, ii = i & 0x01;
2458         RD_COST tmp_rdc;
2459         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
2460           continue;
2461 
2462         vp9_rd_cost_init(&tmp_rdc);
2463         rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss,
2464                          tp, mi_row + y_idx, mi_col + x_idx, subsize,
2465                          &tmp_rdc.rate, &tmp_rdc.dist, i != 3,
2466                          pc_tree->split[i]);
2467         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2468           vp9_rd_cost_reset(&last_part_rdc);
2469           break;
2470         }
2471         last_part_rdc.rate += tmp_rdc.rate;
2472         last_part_rdc.dist += tmp_rdc.dist;
2473       }
2474       break;
2475     default: assert(0); break;
2476   }
2477 
2478   pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2479   if (last_part_rdc.rate < INT_MAX) {
2480     last_part_rdc.rate += cpi->partition_cost[pl][partition];
2481     last_part_rdc.rdcost =
2482         RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist);
2483   }
2484 
2485   if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
2486       cpi->sf.partition_search_type == SEARCH_PARTITION &&
2487       partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
2488       (mi_row + mi_step < cm->mi_rows ||
2489        mi_row + (mi_step >> 1) == cm->mi_rows) &&
2490       (mi_col + mi_step < cm->mi_cols ||
2491        mi_col + (mi_step >> 1) == cm->mi_cols)) {
2492     BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
2493     chosen_rdc.rate = 0;
2494     chosen_rdc.dist = 0;
2495     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2496     pc_tree->partitioning = PARTITION_SPLIT;
2497 
2498     // Split partition.
2499     for (i = 0; i < 4; i++) {
2500       int x_idx = (i & 1) * (mi_step >> 1);
2501       int y_idx = (i >> 1) * (mi_step >> 1);
2502       RD_COST tmp_rdc;
2503       ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2504       PARTITION_CONTEXT sl[8], sa[8];
2505 
2506       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
2507         continue;
2508 
2509       save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2510       pc_tree->split[i]->partitioning = PARTITION_NONE;
2511       rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2512                        &tmp_rdc, split_subsize, &pc_tree->split[i]->none,
2513                        INT64_MAX);
2514 
2515       restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2516 
2517       if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2518         vp9_rd_cost_reset(&chosen_rdc);
2519         break;
2520       }
2521 
2522       chosen_rdc.rate += tmp_rdc.rate;
2523       chosen_rdc.dist += tmp_rdc.dist;
2524 
2525       if (i != 3)
2526         encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0,
2527                   split_subsize, pc_tree->split[i]);
2528 
2529       pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
2530                                    split_subsize);
2531       chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
2532     }
2533     pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2534     if (chosen_rdc.rate < INT_MAX) {
2535       chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
2536       chosen_rdc.rdcost =
2537           RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist);
2538     }
2539   }
2540 
2541   // If last_part is better set the partitioning to that.
2542   if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
2543     mi_8x8[0]->sb_type = bsize;
2544     if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
2545     chosen_rdc = last_part_rdc;
2546   }
2547   // If none was better set the partitioning to that.
2548   if (none_rdc.rdcost < chosen_rdc.rdcost) {
2549     if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
2550     chosen_rdc = none_rdc;
2551   }
2552 
2553   restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2554 
2555   // We must have chosen a partitioning and encoding or we'll fail later on.
2556   // No other opportunities for success.
2557   if (bsize == BLOCK_64X64)
2558     assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
2559 
2560   if (do_recon) {
2561     int output_enabled = (bsize == BLOCK_64X64);
2562     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
2563               pc_tree);
2564   }
2565 
2566   *rate = chosen_rdc.rate;
2567   *dist = chosen_rdc.dist;
2568 }
2569 
2570 static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
2571   BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,  BLOCK_4X4, BLOCK_4X4,
2572   BLOCK_4X4,   BLOCK_8X8,   BLOCK_8X8,  BLOCK_8X8, BLOCK_16X16,
2573   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
2574 };
2575 
2576 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
2577   BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
2578   BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
2579   BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
2580 };
2581 
2582 // Look at all the mode_info entries for blocks that are part of this
2583 // partition and find the min and max values for sb_type.
2584 // At the moment this is designed to work on a 64x64 SB but could be
2585 // adjusted to use a size parameter.
2586 //
2587 // The min and max are assumed to have been initialized prior to calling this
2588 // function so repeat calls can accumulate a min and max of more than one sb64.
get_sb_partition_size_range(MACROBLOCKD * xd,MODE_INFO ** mi_8x8,BLOCK_SIZE * min_block_size,BLOCK_SIZE * max_block_size,int bs_hist[BLOCK_SIZES])2589 static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
2590                                         BLOCK_SIZE *min_block_size,
2591                                         BLOCK_SIZE *max_block_size,
2592                                         int bs_hist[BLOCK_SIZES]) {
2593   int sb_width_in_blocks = MI_BLOCK_SIZE;
2594   int sb_height_in_blocks = MI_BLOCK_SIZE;
2595   int i, j;
2596   int index = 0;
2597 
2598   // Check the sb_type for each block that belongs to this region.
2599   for (i = 0; i < sb_height_in_blocks; ++i) {
2600     for (j = 0; j < sb_width_in_blocks; ++j) {
2601       MODE_INFO *mi = mi_8x8[index + j];
2602       BLOCK_SIZE sb_type = mi ? mi->sb_type : 0;
2603       bs_hist[sb_type]++;
2604       *min_block_size = VPXMIN(*min_block_size, sb_type);
2605       *max_block_size = VPXMAX(*max_block_size, sb_type);
2606     }
2607     index += xd->mi_stride;
2608   }
2609 }
2610 
2611 // Next square block size less or equal than current block size.
2612 static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
2613   BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,   BLOCK_8X8,
2614   BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
2615   BLOCK_32X32, BLOCK_32X32, BLOCK_64X64
2616 };
2617 
2618 // Look at neighboring blocks and set a min and max partition size based on
2619 // what they chose.
rd_auto_partition_range(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCKD * const xd,int mi_row,int mi_col,BLOCK_SIZE * min_block_size,BLOCK_SIZE * max_block_size)2620 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
2621                                     MACROBLOCKD *const xd, int mi_row,
2622                                     int mi_col, BLOCK_SIZE *min_block_size,
2623                                     BLOCK_SIZE *max_block_size) {
2624   VP9_COMMON *const cm = &cpi->common;
2625   MODE_INFO **mi = xd->mi;
2626   const int left_in_image = !!xd->left_mi;
2627   const int above_in_image = !!xd->above_mi;
2628   const int row8x8_remaining = tile->mi_row_end - mi_row;
2629   const int col8x8_remaining = tile->mi_col_end - mi_col;
2630   int bh, bw;
2631   BLOCK_SIZE min_size = BLOCK_4X4;
2632   BLOCK_SIZE max_size = BLOCK_64X64;
2633   int bs_hist[BLOCK_SIZES] = { 0 };
2634 
2635   // Trap case where we do not have a prediction.
2636   if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
2637     // Default "min to max" and "max to min"
2638     min_size = BLOCK_64X64;
2639     max_size = BLOCK_4X4;
2640 
2641     // NOTE: each call to get_sb_partition_size_range() uses the previous
2642     // passed in values for min and max as a starting point.
2643     // Find the min and max partition used in previous frame at this location
2644     if (cm->frame_type != KEY_FRAME) {
2645       MODE_INFO **prev_mi =
2646           &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
2647       get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist);
2648     }
2649     // Find the min and max partition sizes used in the left SB64
2650     if (left_in_image) {
2651       MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
2652       get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size,
2653                                   bs_hist);
2654     }
2655     // Find the min and max partition sizes used in the above SB64.
2656     if (above_in_image) {
2657       MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
2658       get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size,
2659                                   bs_hist);
2660     }
2661 
2662     // Adjust observed min and max for "relaxed" auto partition case.
2663     if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
2664       min_size = min_partition_size[min_size];
2665       max_size = max_partition_size[max_size];
2666     }
2667   }
2668 
2669   // Check border cases where max and min from neighbors may not be legal.
2670   max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
2671                                  &bh, &bw);
2672   // Test for blocks at the edge of the active image.
2673   // This may be the actual edge of the image or where there are formatting
2674   // bars.
2675   if (vp9_active_edge_sb(cpi, mi_row, mi_col)) {
2676     min_size = BLOCK_4X4;
2677   } else {
2678     min_size =
2679         VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size));
2680   }
2681 
2682   // When use_square_partition_only is true, make sure at least one square
2683   // partition is allowed by selecting the next smaller square size as
2684   // *min_block_size.
2685   if (cpi->sf.use_square_partition_only &&
2686       next_square_size[max_size] < min_size) {
2687     min_size = next_square_size[max_size];
2688   }
2689 
2690   *min_block_size = min_size;
2691   *max_block_size = max_size;
2692 }
2693 
2694 // TODO(jingning) refactor functions setting partition search range
set_partition_range(VP9_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col,BLOCK_SIZE bsize,BLOCK_SIZE * min_bs,BLOCK_SIZE * max_bs)2695 static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row,
2696                                 int mi_col, BLOCK_SIZE bsize,
2697                                 BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
2698   int mi_width = num_8x8_blocks_wide_lookup[bsize];
2699   int mi_height = num_8x8_blocks_high_lookup[bsize];
2700   int idx, idy;
2701 
2702   MODE_INFO *mi;
2703   const int idx_str = cm->mi_stride * mi_row + mi_col;
2704   MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
2705   BLOCK_SIZE bs, min_size, max_size;
2706 
2707   min_size = BLOCK_64X64;
2708   max_size = BLOCK_4X4;
2709 
2710   if (prev_mi) {
2711     for (idy = 0; idy < mi_height; ++idy) {
2712       for (idx = 0; idx < mi_width; ++idx) {
2713         mi = prev_mi[idy * cm->mi_stride + idx];
2714         bs = mi ? mi->sb_type : bsize;
2715         min_size = VPXMIN(min_size, bs);
2716         max_size = VPXMAX(max_size, bs);
2717       }
2718     }
2719   }
2720 
2721   if (xd->left_mi) {
2722     for (idy = 0; idy < mi_height; ++idy) {
2723       mi = xd->mi[idy * cm->mi_stride - 1];
2724       bs = mi ? mi->sb_type : bsize;
2725       min_size = VPXMIN(min_size, bs);
2726       max_size = VPXMAX(max_size, bs);
2727     }
2728   }
2729 
2730   if (xd->above_mi) {
2731     for (idx = 0; idx < mi_width; ++idx) {
2732       mi = xd->mi[idx - cm->mi_stride];
2733       bs = mi ? mi->sb_type : bsize;
2734       min_size = VPXMIN(min_size, bs);
2735       max_size = VPXMAX(max_size, bs);
2736     }
2737   }
2738 
2739   if (min_size == max_size) {
2740     min_size = min_partition_size[min_size];
2741     max_size = max_partition_size[max_size];
2742   }
2743 
2744   *min_bs = min_size;
2745   *max_bs = max_size;
2746 }
2747 
store_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)2748 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2749   memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
2750 }
2751 
load_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)2752 static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2753   memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
2754 }
2755 
2756 #if CONFIG_FP_MB_STATS
2757 const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
2758                                                         1, 2, 2, 2, 4, 4 };
2759 const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
2760                                                         2, 1, 2, 4, 2, 4 };
2761 const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
2762   0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120
2763 };
2764 const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
2765   0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120
2766 };
2767 const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
2768   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6
2769 };
2770 
2771 typedef enum {
2772   MV_ZERO = 0,
2773   MV_LEFT = 1,
2774   MV_UP = 2,
2775   MV_RIGHT = 3,
2776   MV_DOWN = 4,
2777   MV_INVALID
2778 } MOTION_DIRECTION;
2779 
get_motion_direction_fp(uint8_t fp_byte)2780 static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
2781   if (fp_byte & FPMB_MOTION_ZERO_MASK) {
2782     return MV_ZERO;
2783   } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
2784     return MV_LEFT;
2785   } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
2786     return MV_RIGHT;
2787   } else if (fp_byte & FPMB_MOTION_UP_MASK) {
2788     return MV_UP;
2789   } else {
2790     return MV_DOWN;
2791   }
2792 }
2793 
get_motion_inconsistency(MOTION_DIRECTION this_mv,MOTION_DIRECTION that_mv)2794 static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
2795                                            MOTION_DIRECTION that_mv) {
2796   if (this_mv == that_mv) {
2797     return 0;
2798   } else {
2799     return abs(this_mv - that_mv) == 2 ? 2 : 1;
2800   }
2801 }
2802 #endif
2803 
2804 // Calculate the score used in machine-learning based partition search early
2805 // termination.
compute_score(VP9_COMMON * const cm,MACROBLOCKD * const xd,PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize)2806 static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd,
2807                             PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
2808                             BLOCK_SIZE bsize) {
2809   const double *clf;
2810   const double *mean;
2811   const double *sd;
2812   const int mag_mv =
2813       abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row);
2814   const int left_in_image = !!xd->left_mi;
2815   const int above_in_image = !!xd->above_mi;
2816   MODE_INFO **prev_mi =
2817       &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row];
2818   int above_par = 0;  // above_partitioning
2819   int left_par = 0;   // left_partitioning
2820   int last_par = 0;   // last_partitioning
2821   BLOCK_SIZE context_size;
2822   double score;
2823   int offset = 0;
2824 
2825   assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
2826 
2827   if (above_in_image) {
2828     context_size = xd->above_mi->sb_type;
2829     if (context_size < bsize)
2830       above_par = 2;
2831     else if (context_size == bsize)
2832       above_par = 1;
2833   }
2834 
2835   if (left_in_image) {
2836     context_size = xd->left_mi->sb_type;
2837     if (context_size < bsize)
2838       left_par = 2;
2839     else if (context_size == bsize)
2840       left_par = 1;
2841   }
2842 
2843   if (prev_mi) {
2844     context_size = prev_mi[0]->sb_type;
2845     if (context_size < bsize)
2846       last_par = 2;
2847     else if (context_size == bsize)
2848       last_par = 1;
2849   }
2850 
2851   if (bsize == BLOCK_64X64)
2852     offset = 0;
2853   else if (bsize == BLOCK_32X32)
2854     offset = 8;
2855   else if (bsize == BLOCK_16X16)
2856     offset = 16;
2857 
2858   // early termination score calculation
2859   clf = &classifiers[offset];
2860   mean = &train_mean[offset];
2861   sd = &train_stdm[offset];
2862   score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) +
2863           clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) +
2864           clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) +
2865           clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) * sd[3]) +
2866           clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) +
2867           clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) +
2868           clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7];
2869   return score;
2870 }
2871 
2872 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
2873 // unlikely to be selected depending on previous rate-distortion optimization
2874 // results, for encoding speed-up.
rd_pick_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,RD_COST * rd_cost,int64_t best_rd,PC_TREE * pc_tree)2875 static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
2876                               TileDataEnc *tile_data, TOKENEXTRA **tp,
2877                               int mi_row, int mi_col, BLOCK_SIZE bsize,
2878                               RD_COST *rd_cost, int64_t best_rd,
2879                               PC_TREE *pc_tree) {
2880   VP9_COMMON *const cm = &cpi->common;
2881   TileInfo *const tile_info = &tile_data->tile_info;
2882   MACROBLOCK *const x = &td->mb;
2883   MACROBLOCKD *const xd = &x->e_mbd;
2884   const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
2885   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2886   PARTITION_CONTEXT sl[8], sa[8];
2887   TOKENEXTRA *tp_orig = *tp;
2888   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
2889   int i;
2890   const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2891   BLOCK_SIZE subsize;
2892   RD_COST this_rdc, sum_rdc, best_rdc;
2893   int do_split = bsize >= BLOCK_8X8;
2894   int do_rect = 1;
2895   INTERP_FILTER pred_interp_filter;
2896 
2897   // Override skipping rectangular partition operations for edge blocks
2898   const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
2899   const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
2900   const int xss = x->e_mbd.plane[1].subsampling_x;
2901   const int yss = x->e_mbd.plane[1].subsampling_y;
2902 
2903   BLOCK_SIZE min_size = x->min_partition_size;
2904   BLOCK_SIZE max_size = x->max_partition_size;
2905 
2906 #if CONFIG_FP_MB_STATS
2907   unsigned int src_diff_var = UINT_MAX;
2908   int none_complexity = 0;
2909 #endif
2910 
2911   int partition_none_allowed = !force_horz_split && !force_vert_split;
2912   int partition_horz_allowed =
2913       !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
2914   int partition_vert_allowed =
2915       !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
2916 
2917   int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist;
2918   int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
2919 
2920   (void)*tp_orig;
2921 
2922   assert(num_8x8_blocks_wide_lookup[bsize] ==
2923          num_8x8_blocks_high_lookup[bsize]);
2924 
2925   // Adjust dist breakout threshold according to the partition size.
2926   dist_breakout_thr >>=
2927       8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
2928   rate_breakout_thr *= num_pels_log2_lookup[bsize];
2929 
2930   vp9_rd_cost_init(&this_rdc);
2931   vp9_rd_cost_init(&sum_rdc);
2932   vp9_rd_cost_reset(&best_rdc);
2933   best_rdc.rdcost = best_rd;
2934 
2935   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2936 
2937   if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ &&
2938       cpi->oxcf.aq_mode != LOOKAHEAD_AQ)
2939     x->mb_energy = vp9_block_energy(cpi, x, bsize);
2940 
2941   if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
2942     int cb_partition_search_ctrl =
2943         ((pc_tree->index == 0 || pc_tree->index == 3) +
2944          get_chessboard_index(cm->current_video_frame)) &
2945         0x1;
2946 
2947     if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
2948       set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
2949   }
2950 
2951   // Determine partition types in search according to the speed features.
2952   // The threshold set here has to be of square block size.
2953   if (cpi->sf.auto_min_max_partition_size) {
2954     partition_none_allowed &= (bsize <= max_size && bsize >= min_size);
2955     partition_horz_allowed &=
2956         ((bsize <= max_size && bsize > min_size) || force_horz_split);
2957     partition_vert_allowed &=
2958         ((bsize <= max_size && bsize > min_size) || force_vert_split);
2959     do_split &= bsize > min_size;
2960   }
2961 
2962   if (cpi->sf.use_square_partition_only &&
2963       bsize > cpi->sf.use_square_only_threshold) {
2964     if (cpi->use_svc) {
2965       if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
2966         partition_horz_allowed &= force_horz_split;
2967       if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
2968         partition_vert_allowed &= force_vert_split;
2969     } else {
2970       partition_horz_allowed &= force_horz_split;
2971       partition_vert_allowed &= force_vert_split;
2972     }
2973   }
2974 
2975   save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2976 
2977 #if CONFIG_FP_MB_STATS
2978   if (cpi->use_fp_mb_stats) {
2979     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2980     src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
2981                                                   mi_col, bsize);
2982   }
2983 #endif
2984 
2985 #if CONFIG_FP_MB_STATS
2986   // Decide whether we shall split directly and skip searching NONE by using
2987   // the first pass block statistics
2988   if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
2989       partition_none_allowed && src_diff_var > 4 &&
2990       cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
2991     int mb_row = mi_row >> 1;
2992     int mb_col = mi_col >> 1;
2993     int mb_row_end =
2994         VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
2995     int mb_col_end =
2996         VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
2997     int r, c;
2998 
2999     // compute a complexity measure, basically measure inconsistency of motion
3000     // vectors obtained from the first pass in the current block
3001     for (r = mb_row; r < mb_row_end; r++) {
3002       for (c = mb_col; c < mb_col_end; c++) {
3003         const int mb_index = r * cm->mb_cols + c;
3004 
3005         MOTION_DIRECTION this_mv;
3006         MOTION_DIRECTION right_mv;
3007         MOTION_DIRECTION bottom_mv;
3008 
3009         this_mv =
3010             get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
3011 
3012         // to its right
3013         if (c != mb_col_end - 1) {
3014           right_mv = get_motion_direction_fp(
3015               cpi->twopass.this_frame_mb_stats[mb_index + 1]);
3016           none_complexity += get_motion_inconsistency(this_mv, right_mv);
3017         }
3018 
3019         // to its bottom
3020         if (r != mb_row_end - 1) {
3021           bottom_mv = get_motion_direction_fp(
3022               cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
3023           none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
3024         }
3025 
3026         // do not count its left and top neighbors to avoid double counting
3027       }
3028     }
3029 
3030     if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
3031       partition_none_allowed = 0;
3032     }
3033   }
3034 #endif
3035 
3036   // PARTITION_NONE
3037   if (partition_none_allowed) {
3038     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
3039                      best_rdc.rdcost);
3040     if (this_rdc.rate != INT_MAX) {
3041       if (bsize >= BLOCK_8X8) {
3042         this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
3043         this_rdc.rdcost =
3044             RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
3045       }
3046 
3047       if (this_rdc.rdcost < best_rdc.rdcost) {
3048         MODE_INFO *mi = xd->mi[0];
3049 
3050         best_rdc = this_rdc;
3051         if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
3052 
3053         if (!cpi->sf.ml_partition_search_early_termination) {
3054           // If all y, u, v transform blocks in this partition are skippable,
3055           // and the dist & rate are within the thresholds, the partition search
3056           // is terminated for current branch of the partition search tree.
3057           if (!x->e_mbd.lossless && ctx->skippable &&
3058               ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
3059                (best_rdc.dist < dist_breakout_thr &&
3060                 best_rdc.rate < rate_breakout_thr))) {
3061             do_split = 0;
3062             do_rect = 0;
3063           }
3064         } else {
3065           // Currently, the machine-learning based partition search early
3066           // termination is only used while bsize is 16x16, 32x32 or 64x64,
3067           // VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
3068           if (!x->e_mbd.lossless &&
3069               !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) &&
3070               ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
3071             if (compute_score(cm, xd, ctx, mi_row, mi_col, bsize) < 0.0) {
3072               do_split = 0;
3073               do_rect = 0;
3074             }
3075           }
3076         }
3077 
3078 #if CONFIG_FP_MB_STATS
3079         // Check if every 16x16 first pass block statistics has zero
3080         // motion and the corresponding first pass residue is small enough.
3081         // If that is the case, check the difference variance between the
3082         // current frame and the last frame. If the variance is small enough,
3083         // stop further splitting in RD optimization
3084         if (cpi->use_fp_mb_stats && do_split != 0 &&
3085             cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
3086           int mb_row = mi_row >> 1;
3087           int mb_col = mi_col >> 1;
3088           int mb_row_end =
3089               VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
3090           int mb_col_end =
3091               VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
3092           int r, c;
3093 
3094           int skip = 1;
3095           for (r = mb_row; r < mb_row_end; r++) {
3096             for (c = mb_col; c < mb_col_end; c++) {
3097               const int mb_index = r * cm->mb_cols + c;
3098               if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
3099                     FPMB_MOTION_ZERO_MASK) ||
3100                   !(cpi->twopass.this_frame_mb_stats[mb_index] &
3101                     FPMB_ERROR_SMALL_MASK)) {
3102                 skip = 0;
3103                 break;
3104               }
3105             }
3106             if (skip == 0) {
3107               break;
3108             }
3109           }
3110 
3111           if (skip) {
3112             if (src_diff_var == UINT_MAX) {
3113               set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
3114               src_diff_var = get_sby_perpixel_diff_variance(
3115                   cpi, &x->plane[0].src, mi_row, mi_col, bsize);
3116             }
3117             if (src_diff_var < 8) {
3118               do_split = 0;
3119               do_rect = 0;
3120             }
3121           }
3122         }
3123 #endif
3124       }
3125     }
3126     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
3127   }
3128 
3129   // store estimated motion vector
3130   if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx);
3131 
3132   // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an
3133   // intra block and used for context purposes.
3134   if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) {
3135     pred_interp_filter = EIGHTTAP;
3136   } else {
3137     pred_interp_filter = ctx->mic.interp_filter;
3138   }
3139 
3140   // PARTITION_SPLIT
3141   // TODO(jingning): use the motion vectors given by the above search as
3142   // the starting point of motion search in the following partition type check.
3143   if (do_split) {
3144     subsize = get_subsize(bsize, PARTITION_SPLIT);
3145     if (bsize == BLOCK_8X8) {
3146       i = 4;
3147       if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
3148         pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
3149       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3150                        pc_tree->leaf_split[0], best_rdc.rdcost);
3151 
3152       if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX;
3153     } else {
3154       for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
3155         const int x_idx = (i & 1) * mi_step;
3156         const int y_idx = (i >> 1) * mi_step;
3157 
3158         if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
3159           continue;
3160 
3161         if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
3162 
3163         pc_tree->split[i]->index = i;
3164         rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
3165                           mi_col + x_idx, subsize, &this_rdc,
3166                           best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
3167 
3168         if (this_rdc.rate == INT_MAX) {
3169           sum_rdc.rdcost = INT64_MAX;
3170           break;
3171         } else {
3172           sum_rdc.rate += this_rdc.rate;
3173           sum_rdc.dist += this_rdc.dist;
3174           sum_rdc.rdcost += this_rdc.rdcost;
3175         }
3176       }
3177     }
3178 
3179     if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
3180       sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
3181       sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3182 
3183       if (sum_rdc.rdcost < best_rdc.rdcost) {
3184         best_rdc = sum_rdc;
3185         pc_tree->partitioning = PARTITION_SPLIT;
3186 
3187         // Rate and distortion based partition search termination clause.
3188         if (!cpi->sf.ml_partition_search_early_termination &&
3189             !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
3190                                    (best_rdc.dist < dist_breakout_thr &&
3191                                     best_rdc.rate < rate_breakout_thr))) {
3192           do_rect = 0;
3193         }
3194       }
3195     } else {
3196       // skip rectangular partition test when larger block size
3197       // gives better rd cost
3198       if ((cpi->sf.less_rectangular_check) &&
3199           ((bsize > cpi->sf.use_square_only_threshold) ||
3200            (best_rdc.dist < dist_breakout_thr)))
3201         do_rect &= !partition_none_allowed;
3202     }
3203     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
3204   }
3205 
3206   // PARTITION_HORZ
3207   if (partition_horz_allowed &&
3208       (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
3209     subsize = get_subsize(bsize, PARTITION_HORZ);
3210     if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
3211     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3212         partition_none_allowed)
3213       pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter;
3214     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3215                      &pc_tree->horizontal[0], best_rdc.rdcost);
3216 
3217     if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
3218         bsize > BLOCK_8X8) {
3219       PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
3220       update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
3221       encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
3222 
3223       if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
3224       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3225           partition_none_allowed)
3226         pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter;
3227       rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
3228                        subsize, &pc_tree->horizontal[1],
3229                        best_rdc.rdcost - sum_rdc.rdcost);
3230       if (this_rdc.rate == INT_MAX) {
3231         sum_rdc.rdcost = INT64_MAX;
3232       } else {
3233         sum_rdc.rate += this_rdc.rate;
3234         sum_rdc.dist += this_rdc.dist;
3235         sum_rdc.rdcost += this_rdc.rdcost;
3236       }
3237     }
3238 
3239     if (sum_rdc.rdcost < best_rdc.rdcost) {
3240       sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
3241       sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3242       if (sum_rdc.rdcost < best_rdc.rdcost) {
3243         best_rdc = sum_rdc;
3244         pc_tree->partitioning = PARTITION_HORZ;
3245 
3246         if ((cpi->sf.less_rectangular_check) &&
3247             (bsize > cpi->sf.use_square_only_threshold))
3248           do_rect = 0;
3249       }
3250     }
3251     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
3252   }
3253 
3254   // PARTITION_VERT
3255   if (partition_vert_allowed &&
3256       (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
3257     subsize = get_subsize(bsize, PARTITION_VERT);
3258 
3259     if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
3260     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3261         partition_none_allowed)
3262       pc_tree->vertical[0].pred_interp_filter = pred_interp_filter;
3263     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3264                      &pc_tree->vertical[0], best_rdc.rdcost);
3265     if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
3266         bsize > BLOCK_8X8) {
3267       update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
3268       encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
3269                         &pc_tree->vertical[0]);
3270 
3271       if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
3272       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3273           partition_none_allowed)
3274         pc_tree->vertical[1].pred_interp_filter = pred_interp_filter;
3275       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
3276                        subsize, &pc_tree->vertical[1],
3277                        best_rdc.rdcost - sum_rdc.rdcost);
3278       if (this_rdc.rate == INT_MAX) {
3279         sum_rdc.rdcost = INT64_MAX;
3280       } else {
3281         sum_rdc.rate += this_rdc.rate;
3282         sum_rdc.dist += this_rdc.dist;
3283         sum_rdc.rdcost += this_rdc.rdcost;
3284       }
3285     }
3286 
3287     if (sum_rdc.rdcost < best_rdc.rdcost) {
3288       sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
3289       sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3290       if (sum_rdc.rdcost < best_rdc.rdcost) {
3291         best_rdc = sum_rdc;
3292         pc_tree->partitioning = PARTITION_VERT;
3293       }
3294     }
3295     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
3296   }
3297 
3298   // TODO(jbb): This code added so that we avoid static analysis
3299   // warning related to the fact that best_rd isn't used after this
3300   // point.  This code should be refactored so that the duplicate
3301   // checks occur in some sub function and thus are used...
3302   (void)best_rd;
3303   *rd_cost = best_rdc;
3304 
3305   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
3306       pc_tree->index != 3) {
3307     int output_enabled = (bsize == BLOCK_64X64);
3308     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
3309               pc_tree);
3310   }
3311 
3312   if (bsize == BLOCK_64X64) {
3313     assert(tp_orig < *tp);
3314     assert(best_rdc.rate < INT_MAX);
3315     assert(best_rdc.dist < INT64_MAX);
3316   } else {
3317     assert(tp_orig == *tp);
3318   }
3319 }
3320 
encode_rd_sb_row(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp)3321 static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
3322                              TileDataEnc *tile_data, int mi_row,
3323                              TOKENEXTRA **tp) {
3324   VP9_COMMON *const cm = &cpi->common;
3325   TileInfo *const tile_info = &tile_data->tile_info;
3326   MACROBLOCK *const x = &td->mb;
3327   MACROBLOCKD *const xd = &x->e_mbd;
3328   SPEED_FEATURES *const sf = &cpi->sf;
3329   const int mi_col_start = tile_info->mi_col_start;
3330   const int mi_col_end = tile_info->mi_col_end;
3331   int mi_col;
3332   const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
3333   const int num_sb_cols =
3334       get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
3335   int sb_col_in_tile;
3336 
3337   // Initialize the left context for the new SB row
3338   memset(&xd->left_context, 0, sizeof(xd->left_context));
3339   memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
3340 
3341   // Code each SB in the row
3342   for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
3343        mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) {
3344     const struct segmentation *const seg = &cm->seg;
3345     int dummy_rate;
3346     int64_t dummy_dist;
3347     RD_COST dummy_rdc;
3348     int i;
3349     int seg_skip = 0;
3350 
3351     const int idx_str = cm->mi_stride * mi_row + mi_col;
3352     MODE_INFO **mi = cm->mi_grid_visible + idx_str;
3353 
3354     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
3355                                    sb_col_in_tile);
3356 
3357     if (sf->adaptive_pred_interp_filter) {
3358       for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
3359 
3360       for (i = 0; i < 64; ++i) {
3361         td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
3362         td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
3363         td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
3364         td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
3365       }
3366     }
3367 
3368     vp9_zero(x->pred_mv);
3369     td->pc_root->index = 0;
3370 
3371     if (seg->enabled) {
3372       const uint8_t *const map =
3373           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
3374       int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
3375       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
3376     }
3377 
3378     x->source_variance = UINT_MAX;
3379     if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
3380       const BLOCK_SIZE bsize =
3381           seg_skip ? BLOCK_64X64 : sf->always_this_block_size;
3382       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
3383       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
3384       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
3385                        &dummy_rate, &dummy_dist, 1, td->pc_root);
3386     } else if (cpi->partition_search_skippable_frame) {
3387       BLOCK_SIZE bsize;
3388       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
3389       bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
3390       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
3391       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
3392                        &dummy_rate, &dummy_dist, 1, td->pc_root);
3393     } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
3394                cm->frame_type != KEY_FRAME) {
3395       choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
3396       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
3397                        &dummy_rate, &dummy_dist, 1, td->pc_root);
3398     } else {
3399       // If required set upper and lower partition size limits
3400       if (sf->auto_min_max_partition_size) {
3401         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
3402         rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
3403                                 &x->min_partition_size, &x->max_partition_size);
3404       }
3405       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
3406                         &dummy_rdc, INT64_MAX, td->pc_root);
3407     }
3408     (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
3409                                     sb_col_in_tile, num_sb_cols);
3410   }
3411 }
3412 
init_encode_frame_mb_context(VP9_COMP * cpi)3413 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
3414   MACROBLOCK *const x = &cpi->td.mb;
3415   VP9_COMMON *const cm = &cpi->common;
3416   MACROBLOCKD *const xd = &x->e_mbd;
3417   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
3418 
3419   // Copy data over into macro block data structures.
3420   vp9_setup_src_planes(x, cpi->Source, 0, 0);
3421 
3422   vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
3423 
3424   // Note: this memset assumes above_context[0], [1] and [2]
3425   // are allocated as part of the same buffer.
3426   memset(xd->above_context[0], 0,
3427          sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE);
3428   memset(xd->above_seg_context, 0,
3429          sizeof(*xd->above_seg_context) * aligned_mi_cols);
3430 }
3431 
check_dual_ref_flags(VP9_COMP * cpi)3432 static int check_dual_ref_flags(VP9_COMP *cpi) {
3433   const int ref_flags = cpi->ref_frame_flags;
3434 
3435   if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
3436     return 0;
3437   } else {
3438     return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) +
3439             !!(ref_flags & VP9_ALT_FLAG)) >= 2;
3440   }
3441 }
3442 
reset_skip_tx_size(VP9_COMMON * cm,TX_SIZE max_tx_size)3443 static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
3444   int mi_row, mi_col;
3445   const int mis = cm->mi_stride;
3446   MODE_INFO **mi_ptr = cm->mi_grid_visible;
3447 
3448   for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
3449     for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
3450       if (mi_ptr[mi_col]->tx_size > max_tx_size)
3451         mi_ptr[mi_col]->tx_size = max_tx_size;
3452     }
3453   }
3454 }
3455 
get_frame_type(const VP9_COMP * cpi)3456 static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
3457   if (frame_is_intra_only(&cpi->common))
3458     return INTRA_FRAME;
3459   else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
3460     return ALTREF_FRAME;
3461   else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
3462     return GOLDEN_FRAME;
3463   else
3464     return LAST_FRAME;
3465 }
3466 
select_tx_mode(const VP9_COMP * cpi,MACROBLOCKD * const xd)3467 static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
3468   if (xd->lossless) return ONLY_4X4;
3469   if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode)
3470     return ALLOW_16X16;
3471   if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
3472     return ALLOW_32X32;
3473   else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
3474            cpi->sf.tx_size_search_method == USE_TX_8X8)
3475     return TX_MODE_SELECT;
3476   else
3477     return cpi->common.tx_mode;
3478 }
3479 
hybrid_intra_mode_search(VP9_COMP * cpi,MACROBLOCK * const x,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)3480 static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x,
3481                                      RD_COST *rd_cost, BLOCK_SIZE bsize,
3482                                      PICK_MODE_CONTEXT *ctx) {
3483   if (bsize < BLOCK_16X16)
3484     vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
3485   else
3486     vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
3487 }
3488 
nonrd_pick_sb_modes(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)3489 static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
3490                                 MACROBLOCK *const x, int mi_row, int mi_col,
3491                                 RD_COST *rd_cost, BLOCK_SIZE bsize,
3492                                 PICK_MODE_CONTEXT *ctx) {
3493   VP9_COMMON *const cm = &cpi->common;
3494   TileInfo *const tile_info = &tile_data->tile_info;
3495   MACROBLOCKD *const xd = &x->e_mbd;
3496   MODE_INFO *mi;
3497   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
3498   BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8);  // processing unit block size
3499   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
3500   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
3501   int plane;
3502 
3503   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
3504   mi = xd->mi[0];
3505   mi->sb_type = bsize;
3506 
3507   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
3508     struct macroblockd_plane *pd = &xd->plane[plane];
3509     memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
3510            (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
3511     memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
3512            (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
3513   }
3514 
3515   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
3516     if (cyclic_refresh_segment_id_boosted(mi->segment_id))
3517       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
3518 
3519   if (cm->frame_type == KEY_FRAME)
3520     hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
3521   else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
3522     set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
3523   else if (bsize >= BLOCK_8X8)
3524     vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
3525   else
3526     vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
3527 
3528   duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
3529 
3530   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
3531     struct macroblockd_plane *pd = &xd->plane[plane];
3532     memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
3533            (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
3534     memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
3535            (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
3536   }
3537 
3538   if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost);
3539 
3540   ctx->rate = rd_cost->rate;
3541   ctx->dist = rd_cost->dist;
3542 }
3543 
fill_mode_info_sb(VP9_COMMON * cm,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize,PC_TREE * pc_tree)3544 static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
3545                               int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
3546   MACROBLOCKD *xd = &x->e_mbd;
3547   int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
3548   PARTITION_TYPE partition = pc_tree->partitioning;
3549   BLOCK_SIZE subsize = get_subsize(bsize, partition);
3550 
3551   assert(bsize >= BLOCK_8X8);
3552 
3553   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
3554 
3555   switch (partition) {
3556     case PARTITION_NONE:
3557       set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
3558       *(xd->mi[0]) = pc_tree->none.mic;
3559       *(x->mbmi_ext) = pc_tree->none.mbmi_ext;
3560       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
3561       break;
3562     case PARTITION_VERT:
3563       set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
3564       *(xd->mi[0]) = pc_tree->vertical[0].mic;
3565       *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext;
3566       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
3567 
3568       if (mi_col + hbs < cm->mi_cols) {
3569         set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs);
3570         *(xd->mi[0]) = pc_tree->vertical[1].mic;
3571         *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext;
3572         duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize);
3573       }
3574       break;
3575     case PARTITION_HORZ:
3576       set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
3577       *(xd->mi[0]) = pc_tree->horizontal[0].mic;
3578       *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext;
3579       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
3580       if (mi_row + hbs < cm->mi_rows) {
3581         set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col);
3582         *(xd->mi[0]) = pc_tree->horizontal[1].mic;
3583         *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext;
3584         duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize);
3585       }
3586       break;
3587     case PARTITION_SPLIT: {
3588       fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]);
3589       fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
3590                         pc_tree->split[1]);
3591       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
3592                         pc_tree->split[2]);
3593       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
3594                         pc_tree->split[3]);
3595       break;
3596     }
3597     default: break;
3598   }
3599 }
3600 
3601 // Reset the prediction pixel ready flag recursively.
pred_pixel_ready_reset(PC_TREE * pc_tree,BLOCK_SIZE bsize)3602 static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
3603   pc_tree->none.pred_pixel_ready = 0;
3604   pc_tree->horizontal[0].pred_pixel_ready = 0;
3605   pc_tree->horizontal[1].pred_pixel_ready = 0;
3606   pc_tree->vertical[0].pred_pixel_ready = 0;
3607   pc_tree->vertical[1].pred_pixel_ready = 0;
3608 
3609   if (bsize > BLOCK_8X8) {
3610     BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
3611     int i;
3612     for (i = 0; i < 4; ++i) pred_pixel_ready_reset(pc_tree->split[i], subsize);
3613   }
3614 }
3615 
nonrd_pick_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,RD_COST * rd_cost,int do_recon,int64_t best_rd,PC_TREE * pc_tree)3616 static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
3617                                  TileDataEnc *tile_data, TOKENEXTRA **tp,
3618                                  int mi_row, int mi_col, BLOCK_SIZE bsize,
3619                                  RD_COST *rd_cost, int do_recon,
3620                                  int64_t best_rd, PC_TREE *pc_tree) {
3621   const SPEED_FEATURES *const sf = &cpi->sf;
3622   VP9_COMMON *const cm = &cpi->common;
3623   TileInfo *const tile_info = &tile_data->tile_info;
3624   MACROBLOCK *const x = &td->mb;
3625   MACROBLOCKD *const xd = &x->e_mbd;
3626   const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
3627   TOKENEXTRA *tp_orig = *tp;
3628   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
3629   int i;
3630   BLOCK_SIZE subsize = bsize;
3631   RD_COST this_rdc, sum_rdc, best_rdc;
3632   int do_split = bsize >= BLOCK_8X8;
3633   int do_rect = 1;
3634   // Override skipping rectangular partition operations for edge blocks
3635   const int force_horz_split = (mi_row + ms >= cm->mi_rows);
3636   const int force_vert_split = (mi_col + ms >= cm->mi_cols);
3637   const int xss = x->e_mbd.plane[1].subsampling_x;
3638   const int yss = x->e_mbd.plane[1].subsampling_y;
3639 
3640   int partition_none_allowed = !force_horz_split && !force_vert_split;
3641   int partition_horz_allowed =
3642       !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
3643   int partition_vert_allowed =
3644       !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
3645   (void)*tp_orig;
3646 
3647   assert(num_8x8_blocks_wide_lookup[bsize] ==
3648          num_8x8_blocks_high_lookup[bsize]);
3649 
3650   vp9_rd_cost_init(&sum_rdc);
3651   vp9_rd_cost_reset(&best_rdc);
3652   best_rdc.rdcost = best_rd;
3653 
3654   // Determine partition types in search according to the speed features.
3655   // The threshold set here has to be of square block size.
3656   if (sf->auto_min_max_partition_size) {
3657     partition_none_allowed &=
3658         (bsize <= x->max_partition_size && bsize >= x->min_partition_size);
3659     partition_horz_allowed &=
3660         ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
3661          force_horz_split);
3662     partition_vert_allowed &=
3663         ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
3664          force_vert_split);
3665     do_split &= bsize > x->min_partition_size;
3666   }
3667   if (sf->use_square_partition_only) {
3668     partition_horz_allowed &= force_horz_split;
3669     partition_vert_allowed &= force_vert_split;
3670   }
3671 
3672   ctx->pred_pixel_ready =
3673       !(partition_vert_allowed || partition_horz_allowed || do_split);
3674 
3675   // PARTITION_NONE
3676   if (partition_none_allowed) {
3677     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize,
3678                         ctx);
3679     ctx->mic = *xd->mi[0];
3680     ctx->mbmi_ext = *x->mbmi_ext;
3681     ctx->skip_txfm[0] = x->skip_txfm[0];
3682     ctx->skip = x->skip;
3683 
3684     if (this_rdc.rate != INT_MAX) {
3685       int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3686       this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
3687       this_rdc.rdcost =
3688           RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
3689       if (this_rdc.rdcost < best_rdc.rdcost) {
3690         int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
3691         int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
3692 
3693         dist_breakout_thr >>=
3694             8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
3695 
3696         rate_breakout_thr *= num_pels_log2_lookup[bsize];
3697 
3698         best_rdc = this_rdc;
3699         if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
3700 
3701         if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
3702             this_rdc.dist < dist_breakout_thr) {
3703           do_split = 0;
3704           do_rect = 0;
3705         }
3706       }
3707     }
3708   }
3709 
3710   // store estimated motion vector
3711   store_pred_mv(x, ctx);
3712 
3713   // PARTITION_SPLIT
3714   if (do_split) {
3715     int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3716     sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
3717     sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3718     subsize = get_subsize(bsize, PARTITION_SPLIT);
3719     for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
3720       const int x_idx = (i & 1) * ms;
3721       const int y_idx = (i >> 1) * ms;
3722 
3723       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
3724         continue;
3725       load_pred_mv(x, ctx);
3726       nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
3727                            mi_col + x_idx, subsize, &this_rdc, 0,
3728                            best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
3729 
3730       if (this_rdc.rate == INT_MAX) {
3731         vp9_rd_cost_reset(&sum_rdc);
3732       } else {
3733         sum_rdc.rate += this_rdc.rate;
3734         sum_rdc.dist += this_rdc.dist;
3735         sum_rdc.rdcost += this_rdc.rdcost;
3736       }
3737     }
3738 
3739     if (sum_rdc.rdcost < best_rdc.rdcost) {
3740       best_rdc = sum_rdc;
3741       pc_tree->partitioning = PARTITION_SPLIT;
3742     } else {
3743       // skip rectangular partition test when larger block size
3744       // gives better rd cost
3745       if (sf->less_rectangular_check) do_rect &= !partition_none_allowed;
3746     }
3747   }
3748 
3749   // PARTITION_HORZ
3750   if (partition_horz_allowed && do_rect) {
3751     subsize = get_subsize(bsize, PARTITION_HORZ);
3752     if (sf->adaptive_motion_search) load_pred_mv(x, ctx);
3753     pc_tree->horizontal[0].pred_pixel_ready = 1;
3754     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3755                         &pc_tree->horizontal[0]);
3756 
3757     pc_tree->horizontal[0].mic = *xd->mi[0];
3758     pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
3759     pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
3760     pc_tree->horizontal[0].skip = x->skip;
3761 
3762     if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
3763       load_pred_mv(x, ctx);
3764       pc_tree->horizontal[1].pred_pixel_ready = 1;
3765       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc,
3766                           subsize, &pc_tree->horizontal[1]);
3767 
3768       pc_tree->horizontal[1].mic = *xd->mi[0];
3769       pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
3770       pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
3771       pc_tree->horizontal[1].skip = x->skip;
3772 
3773       if (this_rdc.rate == INT_MAX) {
3774         vp9_rd_cost_reset(&sum_rdc);
3775       } else {
3776         int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3777         this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
3778         sum_rdc.rate += this_rdc.rate;
3779         sum_rdc.dist += this_rdc.dist;
3780         sum_rdc.rdcost =
3781             RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3782       }
3783     }
3784 
3785     if (sum_rdc.rdcost < best_rdc.rdcost) {
3786       best_rdc = sum_rdc;
3787       pc_tree->partitioning = PARTITION_HORZ;
3788     } else {
3789       pred_pixel_ready_reset(pc_tree, bsize);
3790     }
3791   }
3792 
3793   // PARTITION_VERT
3794   if (partition_vert_allowed && do_rect) {
3795     subsize = get_subsize(bsize, PARTITION_VERT);
3796     if (sf->adaptive_motion_search) load_pred_mv(x, ctx);
3797     pc_tree->vertical[0].pred_pixel_ready = 1;
3798     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3799                         &pc_tree->vertical[0]);
3800     pc_tree->vertical[0].mic = *xd->mi[0];
3801     pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
3802     pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
3803     pc_tree->vertical[0].skip = x->skip;
3804 
3805     if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
3806       load_pred_mv(x, ctx);
3807       pc_tree->vertical[1].pred_pixel_ready = 1;
3808       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc,
3809                           subsize, &pc_tree->vertical[1]);
3810       pc_tree->vertical[1].mic = *xd->mi[0];
3811       pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
3812       pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
3813       pc_tree->vertical[1].skip = x->skip;
3814 
3815       if (this_rdc.rate == INT_MAX) {
3816         vp9_rd_cost_reset(&sum_rdc);
3817       } else {
3818         int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3819         sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
3820         sum_rdc.rate += this_rdc.rate;
3821         sum_rdc.dist += this_rdc.dist;
3822         sum_rdc.rdcost =
3823             RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3824       }
3825     }
3826 
3827     if (sum_rdc.rdcost < best_rdc.rdcost) {
3828       best_rdc = sum_rdc;
3829       pc_tree->partitioning = PARTITION_VERT;
3830     } else {
3831       pred_pixel_ready_reset(pc_tree, bsize);
3832     }
3833   }
3834 
3835   *rd_cost = best_rdc;
3836 
3837   if (best_rdc.rate == INT_MAX) {
3838     vp9_rd_cost_reset(rd_cost);
3839     return;
3840   }
3841 
3842   // update mode info array
3843   fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree);
3844 
3845   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) {
3846     int output_enabled = (bsize == BLOCK_64X64);
3847     encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
3848                  pc_tree);
3849   }
3850 
3851   if (bsize == BLOCK_64X64 && do_recon) {
3852     assert(tp_orig < *tp);
3853     assert(best_rdc.rate < INT_MAX);
3854     assert(best_rdc.dist < INT64_MAX);
3855   } else {
3856     assert(tp_orig == *tp);
3857   }
3858 }
3859 
nonrd_select_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MODE_INFO ** mi,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int output_enabled,RD_COST * rd_cost,PC_TREE * pc_tree)3860 static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
3861                                    TileDataEnc *tile_data, MODE_INFO **mi,
3862                                    TOKENEXTRA **tp, int mi_row, int mi_col,
3863                                    BLOCK_SIZE bsize, int output_enabled,
3864                                    RD_COST *rd_cost, PC_TREE *pc_tree) {
3865   VP9_COMMON *const cm = &cpi->common;
3866   TileInfo *const tile_info = &tile_data->tile_info;
3867   MACROBLOCK *const x = &td->mb;
3868   MACROBLOCKD *const xd = &x->e_mbd;
3869   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
3870   const int mis = cm->mi_stride;
3871   PARTITION_TYPE partition;
3872   BLOCK_SIZE subsize;
3873   RD_COST this_rdc;
3874 
3875   vp9_rd_cost_reset(&this_rdc);
3876   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
3877 
3878   subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
3879   partition = partition_lookup[bsl][subsize];
3880 
3881   if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) {
3882     x->max_partition_size = BLOCK_32X32;
3883     x->min_partition_size = BLOCK_16X16;
3884     nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
3885                          0, INT64_MAX, pc_tree);
3886   } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
3887              subsize >= BLOCK_16X16) {
3888     x->max_partition_size = BLOCK_32X32;
3889     x->min_partition_size = BLOCK_8X8;
3890     nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
3891                          0, INT64_MAX, pc_tree);
3892   } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) {
3893     x->max_partition_size = BLOCK_16X16;
3894     x->min_partition_size = BLOCK_8X8;
3895     nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
3896                          0, INT64_MAX, pc_tree);
3897   } else {
3898     switch (partition) {
3899       case PARTITION_NONE:
3900         pc_tree->none.pred_pixel_ready = 1;
3901         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
3902                             &pc_tree->none);
3903         pc_tree->none.mic = *xd->mi[0];
3904         pc_tree->none.mbmi_ext = *x->mbmi_ext;
3905         pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
3906         pc_tree->none.skip = x->skip;
3907         break;
3908       case PARTITION_VERT:
3909         pc_tree->vertical[0].pred_pixel_ready = 1;
3910         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
3911                             &pc_tree->vertical[0]);
3912         pc_tree->vertical[0].mic = *xd->mi[0];
3913         pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
3914         pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
3915         pc_tree->vertical[0].skip = x->skip;
3916         if (mi_col + hbs < cm->mi_cols) {
3917           pc_tree->vertical[1].pred_pixel_ready = 1;
3918           nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
3919                               &this_rdc, subsize, &pc_tree->vertical[1]);
3920           pc_tree->vertical[1].mic = *xd->mi[0];
3921           pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
3922           pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
3923           pc_tree->vertical[1].skip = x->skip;
3924           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3925               rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3926             rd_cost->rate += this_rdc.rate;
3927             rd_cost->dist += this_rdc.dist;
3928           }
3929         }
3930         break;
3931       case PARTITION_HORZ:
3932         pc_tree->horizontal[0].pred_pixel_ready = 1;
3933         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
3934                             &pc_tree->horizontal[0]);
3935         pc_tree->horizontal[0].mic = *xd->mi[0];
3936         pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
3937         pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
3938         pc_tree->horizontal[0].skip = x->skip;
3939         if (mi_row + hbs < cm->mi_rows) {
3940           pc_tree->horizontal[1].pred_pixel_ready = 1;
3941           nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
3942                               &this_rdc, subsize, &pc_tree->horizontal[1]);
3943           pc_tree->horizontal[1].mic = *xd->mi[0];
3944           pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
3945           pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
3946           pc_tree->horizontal[1].skip = x->skip;
3947           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3948               rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3949             rd_cost->rate += this_rdc.rate;
3950             rd_cost->dist += this_rdc.dist;
3951           }
3952         }
3953         break;
3954       case PARTITION_SPLIT:
3955         subsize = get_subsize(bsize, PARTITION_SPLIT);
3956         nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3957                                subsize, output_enabled, rd_cost,
3958                                pc_tree->split[0]);
3959         nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
3960                                mi_col + hbs, subsize, output_enabled, &this_rdc,
3961                                pc_tree->split[1]);
3962         if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3963             rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3964           rd_cost->rate += this_rdc.rate;
3965           rd_cost->dist += this_rdc.dist;
3966         }
3967         nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp,
3968                                mi_row + hbs, mi_col, subsize, output_enabled,
3969                                &this_rdc, pc_tree->split[2]);
3970         if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3971             rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3972           rd_cost->rate += this_rdc.rate;
3973           rd_cost->dist += this_rdc.dist;
3974         }
3975         nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
3976                                mi_row + hbs, mi_col + hbs, subsize,
3977                                output_enabled, &this_rdc, pc_tree->split[3]);
3978         if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3979             rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3980           rd_cost->rate += this_rdc.rate;
3981           rd_cost->dist += this_rdc.dist;
3982         }
3983         break;
3984       default: assert(0 && "Invalid partition type."); break;
3985     }
3986   }
3987 
3988   if (bsize == BLOCK_64X64 && output_enabled)
3989     encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree);
3990 }
3991 
nonrd_use_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MODE_INFO ** mi,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int output_enabled,RD_COST * dummy_cost,PC_TREE * pc_tree)3992 static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
3993                                 TileDataEnc *tile_data, MODE_INFO **mi,
3994                                 TOKENEXTRA **tp, int mi_row, int mi_col,
3995                                 BLOCK_SIZE bsize, int output_enabled,
3996                                 RD_COST *dummy_cost, PC_TREE *pc_tree) {
3997   VP9_COMMON *const cm = &cpi->common;
3998   TileInfo *tile_info = &tile_data->tile_info;
3999   MACROBLOCK *const x = &td->mb;
4000   MACROBLOCKD *const xd = &x->e_mbd;
4001   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
4002   const int mis = cm->mi_stride;
4003   PARTITION_TYPE partition;
4004   BLOCK_SIZE subsize;
4005 
4006   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
4007 
4008   subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
4009   partition = partition_lookup[bsl][subsize];
4010 
4011   if (output_enabled && bsize != BLOCK_4X4) {
4012     int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
4013     td->counts->partition[ctx][partition]++;
4014   }
4015 
4016   switch (partition) {
4017     case PARTITION_NONE:
4018       pc_tree->none.pred_pixel_ready = 1;
4019       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
4020                           subsize, &pc_tree->none);
4021       pc_tree->none.mic = *xd->mi[0];
4022       pc_tree->none.mbmi_ext = *x->mbmi_ext;
4023       pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
4024       pc_tree->none.skip = x->skip;
4025       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
4026                   subsize, &pc_tree->none);
4027       break;
4028     case PARTITION_VERT:
4029       pc_tree->vertical[0].pred_pixel_ready = 1;
4030       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
4031                           subsize, &pc_tree->vertical[0]);
4032       pc_tree->vertical[0].mic = *xd->mi[0];
4033       pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
4034       pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
4035       pc_tree->vertical[0].skip = x->skip;
4036       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
4037                   subsize, &pc_tree->vertical[0]);
4038       if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
4039         pc_tree->vertical[1].pred_pixel_ready = 1;
4040         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost,
4041                             subsize, &pc_tree->vertical[1]);
4042         pc_tree->vertical[1].mic = *xd->mi[0];
4043         pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
4044         pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
4045         pc_tree->vertical[1].skip = x->skip;
4046         encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs,
4047                     output_enabled, subsize, &pc_tree->vertical[1]);
4048       }
4049       break;
4050     case PARTITION_HORZ:
4051       pc_tree->horizontal[0].pred_pixel_ready = 1;
4052       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
4053                           subsize, &pc_tree->horizontal[0]);
4054       pc_tree->horizontal[0].mic = *xd->mi[0];
4055       pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
4056       pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
4057       pc_tree->horizontal[0].skip = x->skip;
4058       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
4059                   subsize, &pc_tree->horizontal[0]);
4060 
4061       if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
4062         pc_tree->horizontal[1].pred_pixel_ready = 1;
4063         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost,
4064                             subsize, &pc_tree->horizontal[1]);
4065         pc_tree->horizontal[1].mic = *xd->mi[0];
4066         pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
4067         pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
4068         pc_tree->horizontal[1].skip = x->skip;
4069         encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col,
4070                     output_enabled, subsize, &pc_tree->horizontal[1]);
4071       }
4072       break;
4073     case PARTITION_SPLIT:
4074       subsize = get_subsize(bsize, PARTITION_SPLIT);
4075       if (bsize == BLOCK_8X8) {
4076         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
4077                             subsize, pc_tree->leaf_split[0]);
4078         encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
4079                     subsize, pc_tree->leaf_split[0]);
4080       } else {
4081         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize,
4082                             output_enabled, dummy_cost, pc_tree->split[0]);
4083         nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
4084                             mi_col + hbs, subsize, output_enabled, dummy_cost,
4085                             pc_tree->split[1]);
4086         nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp,
4087                             mi_row + hbs, mi_col, subsize, output_enabled,
4088                             dummy_cost, pc_tree->split[2]);
4089         nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
4090                             mi_row + hbs, mi_col + hbs, subsize, output_enabled,
4091                             dummy_cost, pc_tree->split[3]);
4092       }
4093       break;
4094     default: assert(0 && "Invalid partition type."); break;
4095   }
4096 
4097   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
4098     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
4099 }
4100 
encode_nonrd_sb_row(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp)4101 static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
4102                                 TileDataEnc *tile_data, int mi_row,
4103                                 TOKENEXTRA **tp) {
4104   SPEED_FEATURES *const sf = &cpi->sf;
4105   VP9_COMMON *const cm = &cpi->common;
4106   TileInfo *const tile_info = &tile_data->tile_info;
4107   MACROBLOCK *const x = &td->mb;
4108   MACROBLOCKD *const xd = &x->e_mbd;
4109   const int mi_col_start = tile_info->mi_col_start;
4110   const int mi_col_end = tile_info->mi_col_end;
4111   int mi_col;
4112   const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
4113   const int num_sb_cols =
4114       get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
4115   int sb_col_in_tile;
4116 
4117   // Initialize the left context for the new SB row
4118   memset(&xd->left_context, 0, sizeof(xd->left_context));
4119   memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
4120 
4121   // Code each SB in the row
4122   for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
4123        mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) {
4124     const struct segmentation *const seg = &cm->seg;
4125     RD_COST dummy_rdc;
4126     const int idx_str = cm->mi_stride * mi_row + mi_col;
4127     MODE_INFO **mi = cm->mi_grid_visible + idx_str;
4128     PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
4129     BLOCK_SIZE bsize = BLOCK_64X64;
4130     int seg_skip = 0;
4131 
4132     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
4133                                    sb_col_in_tile);
4134 
4135     x->source_variance = UINT_MAX;
4136     vp9_zero(x->pred_mv);
4137     vp9_rd_cost_init(&dummy_rdc);
4138     x->color_sensitivity[0] = 0;
4139     x->color_sensitivity[1] = 0;
4140     x->sb_is_skin = 0;
4141     x->skip_low_source_sad = 0;
4142     x->lowvar_highsumdiff = 0;
4143     x->content_state_sb = 0;
4144 
4145     if (seg->enabled) {
4146       const uint8_t *const map =
4147           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
4148       int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
4149       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
4150       if (seg_skip) {
4151         partition_search_type = FIXED_PARTITION;
4152       }
4153     }
4154 
4155     if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) {
4156       int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3);
4157       int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
4158       avg_source_sad(cpi, x, shift, sb_offset2);
4159     }
4160 
4161     // Set the partition type of the 64X64 block
4162     switch (partition_search_type) {
4163       case VAR_BASED_PARTITION:
4164         // TODO(jingning, marpan): The mode decision and encoding process
4165         // support both intra and inter sub8x8 block coding for RTC mode.
4166         // Tune the thresholds accordingly to use sub8x8 block coding for
4167         // coding performance improvement.
4168         choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
4169         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
4170                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
4171         break;
4172       case SOURCE_VAR_BASED_PARTITION:
4173         set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
4174         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
4175                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
4176         break;
4177       case FIXED_PARTITION:
4178         if (!seg_skip) bsize = sf->always_this_block_size;
4179         set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4180         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
4181                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
4182         break;
4183       case REFERENCE_PARTITION:
4184         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
4185         // Use nonrd_pick_partition on scene-cut for VBR mode.
4186         // nonrd_pick_partition does not support 4x4 partition, so avoid it
4187         // on key frame for now.
4188         if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad &&
4189              cm->frame_type != KEY_FRAME)) {
4190           // Use lower max_partition_size for low resoultions.
4191           if (cm->width <= 352 && cm->height <= 288)
4192             x->max_partition_size = BLOCK_32X32;
4193           else
4194             x->max_partition_size = BLOCK_64X64;
4195           x->min_partition_size = BLOCK_8X8;
4196           nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
4197                                BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
4198                                td->pc_root);
4199         } else {
4200           choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
4201           // TODO(marpan): Seems like nonrd_select_partition does not support
4202           // 4x4 partition. Since 4x4 is used on key frame, use this switch
4203           // for now.
4204           if (cm->frame_type == KEY_FRAME)
4205             nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
4206                                 BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
4207           else
4208             nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
4209                                    BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
4210         }
4211 
4212         break;
4213       default: assert(0); break;
4214     }
4215 
4216     (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
4217                                     sb_col_in_tile, num_sb_cols);
4218   }
4219 }
4220 // end RTC play code
4221 
set_var_thresh_from_histogram(VP9_COMP * cpi)4222 static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
4223   const SPEED_FEATURES *const sf = &cpi->sf;
4224   const VP9_COMMON *const cm = &cpi->common;
4225 
4226   const uint8_t *src = cpi->Source->y_buffer;
4227   const uint8_t *last_src = cpi->Last_Source->y_buffer;
4228   const int src_stride = cpi->Source->y_stride;
4229   const int last_stride = cpi->Last_Source->y_stride;
4230 
4231   // Pick cutoff threshold
4232   const int cutoff = (VPXMIN(cm->width, cm->height) >= 720)
4233                          ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100)
4234                          : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
4235   DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]);
4236   diff *var16 = cpi->source_diff_var;
4237 
4238   int sum = 0;
4239   int i, j;
4240 
4241   memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
4242 
4243   for (i = 0; i < cm->mb_rows; i++) {
4244     for (j = 0; j < cm->mb_cols; j++) {
4245 #if CONFIG_VP9_HIGHBITDEPTH
4246       if (cm->use_highbitdepth) {
4247         switch (cm->bit_depth) {
4248           case VPX_BITS_8:
4249             vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
4250                                      &var16->sse, &var16->sum);
4251             break;
4252           case VPX_BITS_10:
4253             vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
4254                                       &var16->sse, &var16->sum);
4255             break;
4256           case VPX_BITS_12:
4257             vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
4258                                       &var16->sse, &var16->sum);
4259             break;
4260           default:
4261             assert(0 &&
4262                    "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
4263                    " or VPX_BITS_12");
4264             return -1;
4265         }
4266       } else {
4267         vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
4268                         &var16->sum);
4269       }
4270 #else
4271       vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
4272                       &var16->sum);
4273 #endif  // CONFIG_VP9_HIGHBITDEPTH
4274       var16->var = var16->sse - (((uint32_t)var16->sum * var16->sum) >> 8);
4275 
4276       if (var16->var >= VAR_HIST_MAX_BG_VAR)
4277         hist[VAR_HIST_BINS - 1]++;
4278       else
4279         hist[var16->var / VAR_HIST_FACTOR]++;
4280 
4281       src += 16;
4282       last_src += 16;
4283       var16++;
4284     }
4285 
4286     src = src - cm->mb_cols * 16 + 16 * src_stride;
4287     last_src = last_src - cm->mb_cols * 16 + 16 * last_stride;
4288   }
4289 
4290   cpi->source_var_thresh = 0;
4291 
4292   if (hist[VAR_HIST_BINS - 1] < cutoff) {
4293     for (i = 0; i < VAR_HIST_BINS - 1; i++) {
4294       sum += hist[i];
4295 
4296       if (sum > cutoff) {
4297         cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR;
4298         return 0;
4299       }
4300     }
4301   }
4302 
4303   return sf->search_type_check_frequency;
4304 }
4305 
source_var_based_partition_search_method(VP9_COMP * cpi)4306 static void source_var_based_partition_search_method(VP9_COMP *cpi) {
4307   VP9_COMMON *const cm = &cpi->common;
4308   SPEED_FEATURES *const sf = &cpi->sf;
4309 
4310   if (cm->frame_type == KEY_FRAME) {
4311     // For key frame, use SEARCH_PARTITION.
4312     sf->partition_search_type = SEARCH_PARTITION;
4313   } else if (cm->intra_only) {
4314     sf->partition_search_type = FIXED_PARTITION;
4315   } else {
4316     if (cm->last_width != cm->width || cm->last_height != cm->height) {
4317       if (cpi->source_diff_var) vpx_free(cpi->source_diff_var);
4318 
4319       CHECK_MEM_ERROR(cm, cpi->source_diff_var,
4320                       vpx_calloc(cm->MBs, sizeof(diff)));
4321     }
4322 
4323     if (!cpi->frames_till_next_var_check)
4324       cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi);
4325 
4326     if (cpi->frames_till_next_var_check > 0) {
4327       sf->partition_search_type = FIXED_PARTITION;
4328       cpi->frames_till_next_var_check--;
4329     }
4330   }
4331 }
4332 
get_skip_encode_frame(const VP9_COMMON * cm,ThreadData * const td)4333 static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) {
4334   unsigned int intra_count = 0, inter_count = 0;
4335   int j;
4336 
4337   for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
4338     intra_count += td->counts->intra_inter[j][0];
4339     inter_count += td->counts->intra_inter[j][1];
4340   }
4341 
4342   return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME &&
4343          cm->show_frame;
4344 }
4345 
vp9_init_tile_data(VP9_COMP * cpi)4346 void vp9_init_tile_data(VP9_COMP *cpi) {
4347   VP9_COMMON *const cm = &cpi->common;
4348   const int tile_cols = 1 << cm->log2_tile_cols;
4349   const int tile_rows = 1 << cm->log2_tile_rows;
4350   int tile_col, tile_row;
4351   TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
4352   TOKENLIST *tplist = cpi->tplist[0][0];
4353   int tile_tok = 0;
4354   int tplist_count = 0;
4355 
4356   if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
4357     if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
4358     CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows *
4359                                                    sizeof(*cpi->tile_data)));
4360     cpi->allocated_tiles = tile_cols * tile_rows;
4361 
4362     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
4363       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
4364         TileDataEnc *tile_data =
4365             &cpi->tile_data[tile_row * tile_cols + tile_col];
4366         int i, j;
4367         for (i = 0; i < BLOCK_SIZES; ++i) {
4368           for (j = 0; j < MAX_MODES; ++j) {
4369             tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
4370             tile_data->mode_map[i][j] = j;
4371           }
4372         }
4373 #if CONFIG_MULTITHREAD
4374         tile_data->row_base_thresh_freq_fact = NULL;
4375 #endif
4376       }
4377   }
4378 
4379   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
4380     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
4381       TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
4382       TileInfo *tile_info = &this_tile->tile_info;
4383       vp9_tile_init(tile_info, cm, tile_row, tile_col);
4384 
4385       cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
4386       pre_tok = cpi->tile_tok[tile_row][tile_col];
4387       tile_tok = allocated_tokens(*tile_info);
4388 
4389       cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
4390       tplist = cpi->tplist[tile_row][tile_col];
4391       tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
4392     }
4393   }
4394 }
4395 
vp9_encode_sb_row(VP9_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)4396 void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row,
4397                        int tile_col, int mi_row) {
4398   VP9_COMMON *const cm = &cpi->common;
4399   const int tile_cols = 1 << cm->log2_tile_cols;
4400   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
4401   const TileInfo *const tile_info = &this_tile->tile_info;
4402   TOKENEXTRA *tok = NULL;
4403   int tile_sb_row;
4404   int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1;
4405 
4406   tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >>
4407                 MI_BLOCK_SIZE_LOG2;
4408   get_start_tok(cpi, tile_row, tile_col, mi_row, &tok);
4409   cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok;
4410 
4411   if (cpi->sf.use_nonrd_pick_mode)
4412     encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
4413   else
4414     encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
4415 
4416   cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok;
4417   cpi->tplist[tile_row][tile_col][tile_sb_row].count =
4418       (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop -
4419                      cpi->tplist[tile_row][tile_col][tile_sb_row].start);
4420   assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <=
4421          get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols));
4422 
4423   (void)tile_mb_cols;
4424 }
4425 
vp9_encode_tile(VP9_COMP * cpi,ThreadData * td,int tile_row,int tile_col)4426 void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
4427                      int tile_col) {
4428   VP9_COMMON *const cm = &cpi->common;
4429   const int tile_cols = 1 << cm->log2_tile_cols;
4430   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
4431   const TileInfo *const tile_info = &this_tile->tile_info;
4432   const int mi_row_start = tile_info->mi_row_start;
4433   const int mi_row_end = tile_info->mi_row_end;
4434   int mi_row;
4435 
4436   for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
4437     vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
4438 }
4439 
encode_tiles(VP9_COMP * cpi)4440 static void encode_tiles(VP9_COMP *cpi) {
4441   VP9_COMMON *const cm = &cpi->common;
4442   const int tile_cols = 1 << cm->log2_tile_cols;
4443   const int tile_rows = 1 << cm->log2_tile_rows;
4444   int tile_col, tile_row;
4445 
4446   vp9_init_tile_data(cpi);
4447 
4448   for (tile_row = 0; tile_row < tile_rows; ++tile_row)
4449     for (tile_col = 0; tile_col < tile_cols; ++tile_col)
4450       vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);
4451 }
4452 
4453 #if CONFIG_FP_MB_STATS
input_fpmb_stats(FIRSTPASS_MB_STATS * firstpass_mb_stats,VP9_COMMON * cm,uint8_t ** this_frame_mb_stats)4454 static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
4455                             VP9_COMMON *cm, uint8_t **this_frame_mb_stats) {
4456   uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
4457                          cm->current_video_frame * cm->MBs * sizeof(uint8_t);
4458 
4459   if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
4460 
4461   *this_frame_mb_stats = mb_stats_in;
4462 
4463   return 1;
4464 }
4465 #endif
4466 
encode_frame_internal(VP9_COMP * cpi)4467 static void encode_frame_internal(VP9_COMP *cpi) {
4468   SPEED_FEATURES *const sf = &cpi->sf;
4469   ThreadData *const td = &cpi->td;
4470   MACROBLOCK *const x = &td->mb;
4471   VP9_COMMON *const cm = &cpi->common;
4472   MACROBLOCKD *const xd = &x->e_mbd;
4473 
4474   xd->mi = cm->mi_grid_visible;
4475   xd->mi[0] = cm->mi;
4476 
4477   vp9_zero(*td->counts);
4478   vp9_zero(cpi->td.rd_counts);
4479 
4480   xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 &&
4481                  cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
4482 
4483 #if CONFIG_VP9_HIGHBITDEPTH
4484   if (cm->use_highbitdepth)
4485     x->fwd_txm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
4486   else
4487     x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
4488   x->highbd_itxm_add =
4489       xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
4490 #else
4491   x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
4492 #endif  // CONFIG_VP9_HIGHBITDEPTH
4493   x->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
4494 
4495   if (xd->lossless) x->optimize = 0;
4496 
4497   cm->tx_mode = select_tx_mode(cpi, xd);
4498 
4499   vp9_frame_init_quantizer(cpi);
4500 
4501   vp9_initialize_rd_consts(cpi);
4502   vp9_initialize_me_consts(cpi, x, cm->base_qindex);
4503   init_encode_frame_mb_context(cpi);
4504   cm->use_prev_frame_mvs =
4505       !cm->error_resilient_mode && cm->width == cm->last_width &&
4506       cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame;
4507   // Special case: set prev_mi to NULL when the previous mode info
4508   // context cannot be used.
4509   cm->prev_mi =
4510       cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL;
4511 
4512   x->quant_fp = cpi->sf.use_quant_fp;
4513   vp9_zero(x->skip_txfm);
4514   if (sf->use_nonrd_pick_mode) {
4515     // Initialize internal buffer pointers for rtc coding, where non-RD
4516     // mode decision is used and hence no buffer pointer swap needed.
4517     int i;
4518     struct macroblock_plane *const p = x->plane;
4519     struct macroblockd_plane *const pd = xd->plane;
4520     PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none;
4521 
4522     for (i = 0; i < MAX_MB_PLANE; ++i) {
4523       p[i].coeff = ctx->coeff_pbuf[i][0];
4524       p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
4525       pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
4526       p[i].eobs = ctx->eobs_pbuf[i][0];
4527     }
4528     vp9_zero(x->zcoeff_blk);
4529 
4530     if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 &&
4531         !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) &&
4532         !cpi->use_svc)
4533       cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
4534 
4535     if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
4536       source_var_based_partition_search_method(cpi);
4537   }
4538 
4539   {
4540     struct vpx_usec_timer emr_timer;
4541     vpx_usec_timer_start(&emr_timer);
4542 
4543 #if CONFIG_FP_MB_STATS
4544     if (cpi->use_fp_mb_stats) {
4545       input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
4546                        &cpi->twopass.this_frame_mb_stats);
4547     }
4548 #endif
4549 
4550     if (!cpi->row_mt) {
4551       cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;
4552       cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;
4553       // If allowed, encoding tiles in parallel with one thread handling one
4554       // tile when row based multi-threading is disabled.
4555       if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
4556         vp9_encode_tiles_mt(cpi);
4557       else
4558         encode_tiles(cpi);
4559     } else {
4560       cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;
4561       cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;
4562       vp9_encode_tiles_row_mt(cpi);
4563     }
4564 
4565     vpx_usec_timer_mark(&emr_timer);
4566     cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
4567   }
4568 
4569   sf->skip_encode_frame =
4570       sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0;
4571 
4572 #if 0
4573   // Keep record of the total distortion this time around for future use
4574   cpi->last_frame_distortion = cpi->frame_distortion;
4575 #endif
4576 }
4577 
get_interp_filter(const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS],int is_alt_ref)4578 static INTERP_FILTER get_interp_filter(
4579     const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
4580   if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
4581       threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
4582       threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
4583     return EIGHTTAP_SMOOTH;
4584   } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] &&
4585              threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) {
4586     return EIGHTTAP_SHARP;
4587   } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) {
4588     return EIGHTTAP;
4589   } else {
4590     return SWITCHABLE;
4591   }
4592 }
4593 
compute_frame_aq_offset(struct VP9_COMP * cpi)4594 static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
4595   VP9_COMMON *const cm = &cpi->common;
4596   MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
4597   struct segmentation *const seg = &cm->seg;
4598 
4599   int mi_row, mi_col;
4600   int sum_delta = 0;
4601   int map_index = 0;
4602   int qdelta_index;
4603   int segment_id;
4604 
4605   for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
4606     MODE_INFO **mi_8x8 = mi_8x8_ptr;
4607     for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) {
4608       segment_id = mi_8x8[0]->segment_id;
4609       qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
4610       sum_delta += qdelta_index;
4611       map_index++;
4612     }
4613     mi_8x8_ptr += cm->mi_stride;
4614   }
4615 
4616   return sum_delta / (cm->mi_rows * cm->mi_cols);
4617 }
4618 
vp9_encode_frame(VP9_COMP * cpi)4619 void vp9_encode_frame(VP9_COMP *cpi) {
4620   VP9_COMMON *const cm = &cpi->common;
4621 
4622   // In the longer term the encoder should be generalized to match the
4623   // decoder such that we allow compound where one of the 3 buffers has a
4624   // different sign bias and that buffer is then the fixed ref. However, this
4625   // requires further work in the rd loop. For now the only supported encoder
4626   // side behavior is where the ALT ref buffer has opposite sign bias to
4627   // the other two.
4628   if (!frame_is_intra_only(cm)) {
4629     if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
4630          cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
4631         (cm->ref_frame_sign_bias[ALTREF_FRAME] ==
4632          cm->ref_frame_sign_bias[LAST_FRAME])) {
4633       cpi->allow_comp_inter_inter = 0;
4634     } else {
4635       cpi->allow_comp_inter_inter = 1;
4636       cm->comp_fixed_ref = ALTREF_FRAME;
4637       cm->comp_var_ref[0] = LAST_FRAME;
4638       cm->comp_var_ref[1] = GOLDEN_FRAME;
4639     }
4640   }
4641 
4642   if (cpi->sf.frame_parameter_update) {
4643     int i;
4644     RD_OPT *const rd_opt = &cpi->rd;
4645     FRAME_COUNTS *counts = cpi->td.counts;
4646     RD_COUNTS *const rdc = &cpi->td.rd_counts;
4647 
4648     // This code does a single RD pass over the whole frame assuming
4649     // either compound, single or hybrid prediction as per whatever has
4650     // worked best for that type of frame in the past.
4651     // It also predicts whether another coding mode would have worked
4652     // better than this coding mode. If that is the case, it remembers
4653     // that for subsequent frames.
4654     // It also does the same analysis for transform size selection.
4655     const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
4656     int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
4657     int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
4658     const int is_alt_ref = frame_type == ALTREF_FRAME;
4659 
4660     /* prediction (compound, single or hybrid) mode selection */
4661     if (is_alt_ref || !cpi->allow_comp_inter_inter)
4662       cm->reference_mode = SINGLE_REFERENCE;
4663     else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
4664              mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] &&
4665              check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
4666       cm->reference_mode = COMPOUND_REFERENCE;
4667     else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT])
4668       cm->reference_mode = SINGLE_REFERENCE;
4669     else
4670       cm->reference_mode = REFERENCE_MODE_SELECT;
4671 
4672     if (cm->interp_filter == SWITCHABLE)
4673       cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
4674 
4675     encode_frame_internal(cpi);
4676 
4677     for (i = 0; i < REFERENCE_MODES; ++i)
4678       mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
4679 
4680     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4681       filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2;
4682 
4683     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4684       int single_count_zero = 0;
4685       int comp_count_zero = 0;
4686 
4687       for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
4688         single_count_zero += counts->comp_inter[i][0];
4689         comp_count_zero += counts->comp_inter[i][1];
4690       }
4691 
4692       if (comp_count_zero == 0) {
4693         cm->reference_mode = SINGLE_REFERENCE;
4694         vp9_zero(counts->comp_inter);
4695       } else if (single_count_zero == 0) {
4696         cm->reference_mode = COMPOUND_REFERENCE;
4697         vp9_zero(counts->comp_inter);
4698       }
4699     }
4700 
4701     if (cm->tx_mode == TX_MODE_SELECT) {
4702       int count4x4 = 0;
4703       int count8x8_lp = 0, count8x8_8x8p = 0;
4704       int count16x16_16x16p = 0, count16x16_lp = 0;
4705       int count32x32 = 0;
4706 
4707       for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
4708         count4x4 += counts->tx.p32x32[i][TX_4X4];
4709         count4x4 += counts->tx.p16x16[i][TX_4X4];
4710         count4x4 += counts->tx.p8x8[i][TX_4X4];
4711 
4712         count8x8_lp += counts->tx.p32x32[i][TX_8X8];
4713         count8x8_lp += counts->tx.p16x16[i][TX_8X8];
4714         count8x8_8x8p += counts->tx.p8x8[i][TX_8X8];
4715 
4716         count16x16_16x16p += counts->tx.p16x16[i][TX_16X16];
4717         count16x16_lp += counts->tx.p32x32[i][TX_16X16];
4718         count32x32 += counts->tx.p32x32[i][TX_32X32];
4719       }
4720       if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
4721           count32x32 == 0) {
4722         cm->tx_mode = ALLOW_8X8;
4723         reset_skip_tx_size(cm, TX_8X8);
4724       } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
4725                  count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
4726         cm->tx_mode = ONLY_4X4;
4727         reset_skip_tx_size(cm, TX_4X4);
4728       } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
4729         cm->tx_mode = ALLOW_32X32;
4730       } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
4731         cm->tx_mode = ALLOW_16X16;
4732         reset_skip_tx_size(cm, TX_16X16);
4733       }
4734     }
4735   } else {
4736     cm->reference_mode = SINGLE_REFERENCE;
4737     encode_frame_internal(cpi);
4738   }
4739 
4740   // If segmented AQ is enabled compute the average AQ weighting.
4741   if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) &&
4742       (cm->seg.update_map || cm->seg.update_data)) {
4743     cm->seg.aq_av_offset = compute_frame_aq_offset(cpi);
4744   }
4745 }
4746 
sum_intra_stats(FRAME_COUNTS * counts,const MODE_INFO * mi)4747 static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
4748   const PREDICTION_MODE y_mode = mi->mode;
4749   const PREDICTION_MODE uv_mode = mi->uv_mode;
4750   const BLOCK_SIZE bsize = mi->sb_type;
4751 
4752   if (bsize < BLOCK_8X8) {
4753     int idx, idy;
4754     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
4755     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
4756     for (idy = 0; idy < 2; idy += num_4x4_h)
4757       for (idx = 0; idx < 2; idx += num_4x4_w)
4758         ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode];
4759   } else {
4760     ++counts->y_mode[size_group_lookup[bsize]][y_mode];
4761   }
4762 
4763   ++counts->uv_mode[y_mode][uv_mode];
4764 }
4765 
update_zeromv_cnt(VP9_COMP * const cpi,const MODE_INFO * const mi,int mi_row,int mi_col,BLOCK_SIZE bsize)4766 static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi,
4767                               int mi_row, int mi_col, BLOCK_SIZE bsize) {
4768   const VP9_COMMON *const cm = &cpi->common;
4769   MV mv = mi->mv[0].as_mv;
4770   const int bw = num_8x8_blocks_wide_lookup[bsize];
4771   const int bh = num_8x8_blocks_high_lookup[bsize];
4772   const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
4773   const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
4774   const int block_index = mi_row * cm->mi_cols + mi_col;
4775   int x, y;
4776   for (y = 0; y < ymis; y++)
4777     for (x = 0; x < xmis; x++) {
4778       int map_offset = block_index + y * cm->mi_cols + x;
4779       if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
4780         if (abs(mv.row) < 8 && abs(mv.col) < 8) {
4781           if (cpi->consec_zero_mv[map_offset] < 255)
4782             cpi->consec_zero_mv[map_offset]++;
4783         } else {
4784           cpi->consec_zero_mv[map_offset] = 0;
4785         }
4786       }
4787     }
4788 }
4789 
encode_superblock(VP9_COMP * cpi,ThreadData * td,TOKENEXTRA ** t,int output_enabled,int mi_row,int mi_col,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)4790 static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
4791                               int output_enabled, int mi_row, int mi_col,
4792                               BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
4793   VP9_COMMON *const cm = &cpi->common;
4794   MACROBLOCK *const x = &td->mb;
4795   MACROBLOCKD *const xd = &x->e_mbd;
4796   MODE_INFO *mi = xd->mi[0];
4797   const int seg_skip =
4798       segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP);
4799   x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 &&
4800                    cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
4801                    cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
4802                    cpi->sf.allow_skip_recode;
4803 
4804   if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode)
4805     memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
4806 
4807   x->skip_optimize = ctx->is_coded;
4808   ctx->is_coded = 1;
4809   x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
4810   x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
4811                     x->q_index < QIDX_SKIP_THRESH);
4812 
4813   if (x->skip_encode) return;
4814 
4815   if (!is_inter_block(mi)) {
4816     int plane;
4817 #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
4818     if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
4819         (xd->above_mi == NULL || xd->left_mi == NULL) &&
4820         need_top_left[mi->uv_mode])
4821       assert(0);
4822 #endif  // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
4823     mi->skip = 1;
4824     for (plane = 0; plane < MAX_MB_PLANE; ++plane)
4825       vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
4826     if (output_enabled) sum_intra_stats(td->counts, mi);
4827     vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
4828                     VPXMAX(bsize, BLOCK_8X8));
4829   } else {
4830     int ref;
4831     const int is_compound = has_second_ref(mi);
4832     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
4833     for (ref = 0; ref < 1 + is_compound; ++ref) {
4834       YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]);
4835       assert(cfg != NULL);
4836       vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
4837                            &xd->block_refs[ref]->sf);
4838     }
4839     if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
4840       vp9_build_inter_predictors_sby(xd, mi_row, mi_col,
4841                                      VPXMAX(bsize, BLOCK_8X8));
4842 
4843     vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col,
4844                                     VPXMAX(bsize, BLOCK_8X8));
4845 
4846     vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
4847     vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
4848                     VPXMAX(bsize, BLOCK_8X8));
4849   }
4850 
4851   if (seg_skip) {
4852     assert(mi->skip);
4853   }
4854 
4855   if (output_enabled) {
4856     if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 &&
4857         !(is_inter_block(mi) && mi->skip)) {
4858       ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
4859                       &td->counts->tx)[mi->tx_size];
4860     } else {
4861       // The new intra coding scheme requires no change of transform size
4862       if (is_inter_block(mi)) {
4863         mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
4864                              max_txsize_lookup[bsize]);
4865       } else {
4866         mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4;
4867       }
4868     }
4869 
4870     ++td->counts->tx.tx_totals[mi->tx_size];
4871     ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
4872     if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
4873       vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
4874     if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0)
4875       update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
4876   }
4877 }
4878