1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <limits.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16 #include "./vpx_dsp_rtcd.h"
17 #include "./vpx_config.h"
18
19 #include "vpx_dsp/vpx_dsp_common.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/vpx_timer.h"
22 #include "vpx_ports/system_state.h"
23
24 #include "vp9/common/vp9_common.h"
25 #include "vp9/common/vp9_entropy.h"
26 #include "vp9/common/vp9_entropymode.h"
27 #include "vp9/common/vp9_idct.h"
28 #include "vp9/common/vp9_mvref_common.h"
29 #include "vp9/common/vp9_pred_common.h"
30 #include "vp9/common/vp9_quant_common.h"
31 #include "vp9/common/vp9_reconintra.h"
32 #include "vp9/common/vp9_reconinter.h"
33 #include "vp9/common/vp9_seg_common.h"
34 #include "vp9/common/vp9_tile_common.h"
35
36 #include "vp9/encoder/vp9_aq_360.h"
37 #include "vp9/encoder/vp9_aq_complexity.h"
38 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
39 #include "vp9/encoder/vp9_aq_variance.h"
40 #include "vp9/encoder/vp9_encodeframe.h"
41 #include "vp9/encoder/vp9_encodemb.h"
42 #include "vp9/encoder/vp9_encodemv.h"
43 #include "vp9/encoder/vp9_ethread.h"
44 #include "vp9/encoder/vp9_extend.h"
45 #include "vp9/encoder/vp9_multi_thread.h"
46 #include "vp9/encoder/vp9_partition_models.h"
47 #include "vp9/encoder/vp9_pickmode.h"
48 #include "vp9/encoder/vp9_rd.h"
49 #include "vp9/encoder/vp9_rdopt.h"
50 #include "vp9/encoder/vp9_segmentation.h"
51 #include "vp9/encoder/vp9_tokenize.h"
52
53 static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
54 int output_enabled, int mi_row, int mi_col,
55 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
56
57 // This is used as a reference when computing the source variance for the
58 // purpose of activity masking.
59 // Eventually this should be replaced by custom no-reference routines,
60 // which will be faster.
61 static const uint8_t VP9_VAR_OFFS[64] = {
62 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
63 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
64 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
65 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
66 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
67 };
68
69 #if CONFIG_VP9_HIGHBITDEPTH
70 static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
71 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
72 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
73 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
74 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
75 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
76 };
77
78 static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
79 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
80 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
81 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
82 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
83 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
84 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
85 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
86 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
87 };
88
89 static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
90 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
91 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
92 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
93 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
94 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
95 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
96 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
97 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
98 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
99 128 * 16
100 };
101 #endif // CONFIG_VP9_HIGHBITDEPTH
102
vp9_get_sby_variance(VP9_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)103 unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref,
104 BLOCK_SIZE bs) {
105 unsigned int sse;
106 const unsigned int var =
107 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse);
108 return var;
109 }
110
111 #if CONFIG_VP9_HIGHBITDEPTH
vp9_high_get_sby_variance(VP9_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)112 unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref,
113 BLOCK_SIZE bs, int bd) {
114 unsigned int var, sse;
115 switch (bd) {
116 case 10:
117 var =
118 cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
119 CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse);
120 break;
121 case 12:
122 var =
123 cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
124 CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse);
125 break;
126 case 8:
127 default:
128 var =
129 cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
130 CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse);
131 break;
132 }
133 return var;
134 }
135 #endif // CONFIG_VP9_HIGHBITDEPTH
136
vp9_get_sby_perpixel_variance(VP9_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)137 unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
138 const struct buf_2d *ref,
139 BLOCK_SIZE bs) {
140 return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs),
141 num_pels_log2_lookup[bs]);
142 }
143
144 #if CONFIG_VP9_HIGHBITDEPTH
vp9_high_get_sby_perpixel_variance(VP9_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)145 unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
146 const struct buf_2d *ref,
147 BLOCK_SIZE bs, int bd) {
148 return (unsigned int)ROUND64_POWER_OF_TWO(
149 (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd),
150 num_pels_log2_lookup[bs]);
151 }
152 #endif // CONFIG_VP9_HIGHBITDEPTH
153
get_sby_perpixel_diff_variance(VP9_COMP * cpi,const struct buf_2d * ref,int mi_row,int mi_col,BLOCK_SIZE bs)154 static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
155 const struct buf_2d *ref,
156 int mi_row, int mi_col,
157 BLOCK_SIZE bs) {
158 unsigned int sse, var;
159 uint8_t *last_y;
160 const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
161
162 assert(last != NULL);
163 last_y =
164 &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
165 var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
166 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
167 }
168
get_rd_var_based_fixed_partition(VP9_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)169 static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
170 int mi_row, int mi_col) {
171 unsigned int var = get_sby_perpixel_diff_variance(
172 cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
173 if (var < 8)
174 return BLOCK_64X64;
175 else if (var < 128)
176 return BLOCK_32X32;
177 else if (var < 2048)
178 return BLOCK_16X16;
179 else
180 return BLOCK_8X8;
181 }
182
set_segment_index(VP9_COMP * cpi,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize,int segment_index)183 static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row,
184 int mi_col, BLOCK_SIZE bsize, int segment_index) {
185 VP9_COMMON *const cm = &cpi->common;
186 const struct segmentation *const seg = &cm->seg;
187 MACROBLOCKD *const xd = &x->e_mbd;
188 MODE_INFO *mi = xd->mi[0];
189
190 const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
191 const uint8_t *const map =
192 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
193
194 // Initialize the segmentation index as 0.
195 mi->segment_id = 0;
196
197 // Skip the rest if AQ mode is disabled.
198 if (!seg->enabled) return;
199
200 switch (aq_mode) {
201 case CYCLIC_REFRESH_AQ:
202 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
203 break;
204 case VARIANCE_AQ:
205 if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
206 cpi->force_update_segmentation ||
207 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
208 int min_energy;
209 int max_energy;
210 // Get sub block energy range
211 if (bsize >= BLOCK_32X32) {
212 vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
213 &max_energy);
214 } else {
215 min_energy = bsize <= BLOCK_16X16 ? x->mb_energy
216 : vp9_block_energy(cpi, x, bsize);
217 }
218 mi->segment_id = vp9_vaq_segment_id(min_energy);
219 } else {
220 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
221 }
222 break;
223 case LOOKAHEAD_AQ:
224 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
225 break;
226 case EQUATOR360_AQ:
227 if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation)
228 mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows);
229 else
230 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
231 break;
232 case PSNR_AQ: mi->segment_id = segment_index; break;
233 default:
234 // NO_AQ or PSNR_AQ
235 break;
236 }
237
238 vp9_init_plane_quantizers(cpi, x);
239 }
240
241 // Lighter version of set_offsets that only sets the mode info
242 // pointers.
set_mode_info_offsets(VP9_COMMON * const cm,MACROBLOCK * const x,MACROBLOCKD * const xd,int mi_row,int mi_col)243 static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
244 MACROBLOCK *const x,
245 MACROBLOCKD *const xd, int mi_row,
246 int mi_col) {
247 const int idx_str = xd->mi_stride * mi_row + mi_col;
248 xd->mi = cm->mi_grid_visible + idx_str;
249 xd->mi[0] = cm->mi + idx_str;
250 x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
251 }
252
set_offsets(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)253 static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
254 MACROBLOCK *const x, int mi_row, int mi_col,
255 BLOCK_SIZE bsize) {
256 VP9_COMMON *const cm = &cpi->common;
257 MACROBLOCKD *const xd = &x->e_mbd;
258 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
259 const int mi_height = num_8x8_blocks_high_lookup[bsize];
260 MvLimits *const mv_limits = &x->mv_limits;
261
262 set_skip_context(xd, mi_row, mi_col);
263
264 set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
265
266 // Set up destination pointers.
267 vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
268
269 // Set up limit values for MV components.
270 // Mv beyond the range do not produce new/different prediction block.
271 mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
272 mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
273 mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
274 mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
275
276 // Set up distance of MB to edge of frame in 1/8th pel units.
277 assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
278 set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
279 cm->mi_cols);
280
281 // Set up source buffers.
282 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
283
284 // R/D setup.
285 x->rddiv = cpi->rd.RDDIV;
286 x->rdmult = cpi->rd.RDMULT;
287
288 // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
289 xd->tile = *tile;
290 }
291
duplicate_mode_info_in_sb(VP9_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col,BLOCK_SIZE bsize)292 static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
293 int mi_row, int mi_col,
294 BLOCK_SIZE bsize) {
295 const int block_width =
296 VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
297 const int block_height =
298 VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
299 const int mi_stride = xd->mi_stride;
300 MODE_INFO *const src_mi = xd->mi[0];
301 int i, j;
302
303 for (j = 0; j < block_height; ++j)
304 for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
305 }
306
set_block_size(VP9_COMP * const cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,int mi_row,int mi_col,BLOCK_SIZE bsize)307 static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x,
308 MACROBLOCKD *const xd, int mi_row, int mi_col,
309 BLOCK_SIZE bsize) {
310 if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
311 set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
312 xd->mi[0]->sb_type = bsize;
313 }
314 }
315
316 typedef struct {
317 // This struct is used for computing variance in choose_partitioning(), where
318 // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even
319 // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16
320 // * 16 = 2^32).
321 uint32_t sum_square_error;
322 int32_t sum_error;
323 int log2_count;
324 int variance;
325 } var;
326
327 typedef struct {
328 var none;
329 var horz[2];
330 var vert[2];
331 } partition_variance;
332
333 typedef struct {
334 partition_variance part_variances;
335 var split[4];
336 } v4x4;
337
338 typedef struct {
339 partition_variance part_variances;
340 v4x4 split[4];
341 } v8x8;
342
343 typedef struct {
344 partition_variance part_variances;
345 v8x8 split[4];
346 } v16x16;
347
348 typedef struct {
349 partition_variance part_variances;
350 v16x16 split[4];
351 } v32x32;
352
353 typedef struct {
354 partition_variance part_variances;
355 v32x32 split[4];
356 } v64x64;
357
358 typedef struct {
359 partition_variance *part_variances;
360 var *split[4];
361 } variance_node;
362
363 typedef enum {
364 V16X16,
365 V32X32,
366 V64X64,
367 } TREE_LEVEL;
368
tree_to_node(void * data,BLOCK_SIZE bsize,variance_node * node)369 static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
370 int i;
371 node->part_variances = NULL;
372 switch (bsize) {
373 case BLOCK_64X64: {
374 v64x64 *vt = (v64x64 *)data;
375 node->part_variances = &vt->part_variances;
376 for (i = 0; i < 4; i++)
377 node->split[i] = &vt->split[i].part_variances.none;
378 break;
379 }
380 case BLOCK_32X32: {
381 v32x32 *vt = (v32x32 *)data;
382 node->part_variances = &vt->part_variances;
383 for (i = 0; i < 4; i++)
384 node->split[i] = &vt->split[i].part_variances.none;
385 break;
386 }
387 case BLOCK_16X16: {
388 v16x16 *vt = (v16x16 *)data;
389 node->part_variances = &vt->part_variances;
390 for (i = 0; i < 4; i++)
391 node->split[i] = &vt->split[i].part_variances.none;
392 break;
393 }
394 case BLOCK_8X8: {
395 v8x8 *vt = (v8x8 *)data;
396 node->part_variances = &vt->part_variances;
397 for (i = 0; i < 4; i++)
398 node->split[i] = &vt->split[i].part_variances.none;
399 break;
400 }
401 default: {
402 v4x4 *vt = (v4x4 *)data;
403 assert(bsize == BLOCK_4X4);
404 node->part_variances = &vt->part_variances;
405 for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
406 break;
407 }
408 }
409 }
410
411 // Set variance values given sum square error, sum error, count.
fill_variance(uint32_t s2,int32_t s,int c,var * v)412 static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
413 v->sum_square_error = s2;
414 v->sum_error = s;
415 v->log2_count = c;
416 }
417
get_variance(var * v)418 static void get_variance(var *v) {
419 v->variance =
420 (int)(256 * (v->sum_square_error -
421 (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
422 v->log2_count)) >>
423 v->log2_count);
424 }
425
sum_2_variances(const var * a,const var * b,var * r)426 static void sum_2_variances(const var *a, const var *b, var *r) {
427 assert(a->log2_count == b->log2_count);
428 fill_variance(a->sum_square_error + b->sum_square_error,
429 a->sum_error + b->sum_error, a->log2_count + 1, r);
430 }
431
fill_variance_tree(void * data,BLOCK_SIZE bsize)432 static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
433 variance_node node;
434 memset(&node, 0, sizeof(node));
435 tree_to_node(data, bsize, &node);
436 sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
437 sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
438 sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
439 sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
440 sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
441 &node.part_variances->none);
442 }
443
set_vt_partitioning(VP9_COMP * cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,void * data,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t threshold,BLOCK_SIZE bsize_min,int force_split)444 static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
445 MACROBLOCKD *const xd, void *data,
446 BLOCK_SIZE bsize, int mi_row, int mi_col,
447 int64_t threshold, BLOCK_SIZE bsize_min,
448 int force_split) {
449 VP9_COMMON *const cm = &cpi->common;
450 variance_node vt;
451 const int block_width = num_8x8_blocks_wide_lookup[bsize];
452 const int block_height = num_8x8_blocks_high_lookup[bsize];
453
454 assert(block_height == block_width);
455 tree_to_node(data, bsize, &vt);
456
457 if (force_split == 1) return 0;
458
459 // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
460 // variance is below threshold, otherwise split will be selected.
461 // No check for vert/horiz split as too few samples for variance.
462 if (bsize == bsize_min) {
463 // Variance already computed to set the force_split.
464 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
465 if (mi_col + block_width / 2 < cm->mi_cols &&
466 mi_row + block_height / 2 < cm->mi_rows &&
467 vt.part_variances->none.variance < threshold) {
468 set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
469 return 1;
470 }
471 return 0;
472 } else if (bsize > bsize_min) {
473 // Variance already computed to set the force_split.
474 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
475 // For key frame: take split for bsize above 32X32 or very high variance.
476 if (frame_is_intra_only(cm) &&
477 (bsize > BLOCK_32X32 ||
478 vt.part_variances->none.variance > (threshold << 4))) {
479 return 0;
480 }
481 // If variance is low, take the bsize (no split).
482 if (mi_col + block_width / 2 < cm->mi_cols &&
483 mi_row + block_height / 2 < cm->mi_rows &&
484 vt.part_variances->none.variance < threshold) {
485 set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
486 return 1;
487 }
488
489 // Check vertical split.
490 if (mi_row + block_height / 2 < cm->mi_rows) {
491 BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
492 get_variance(&vt.part_variances->vert[0]);
493 get_variance(&vt.part_variances->vert[1]);
494 if (vt.part_variances->vert[0].variance < threshold &&
495 vt.part_variances->vert[1].variance < threshold &&
496 get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
497 set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
498 set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
499 return 1;
500 }
501 }
502 // Check horizontal split.
503 if (mi_col + block_width / 2 < cm->mi_cols) {
504 BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
505 get_variance(&vt.part_variances->horz[0]);
506 get_variance(&vt.part_variances->horz[1]);
507 if (vt.part_variances->horz[0].variance < threshold &&
508 vt.part_variances->horz[1].variance < threshold &&
509 get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
510 set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
511 set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
512 return 1;
513 }
514 }
515
516 return 0;
517 }
518 return 0;
519 }
520
scale_part_thresh_sumdiff(int64_t threshold_base,int speed,int width,int height,int content_state)521 static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
522 int width, int height,
523 int content_state) {
524 if (speed >= 8) {
525 if (width <= 640 && height <= 480)
526 return (5 * threshold_base) >> 2;
527 else if ((content_state == kLowSadLowSumdiff) ||
528 (content_state == kHighSadLowSumdiff) ||
529 (content_state == kLowVarHighSumdiff))
530 return (5 * threshold_base) >> 2;
531 } else if (speed == 7) {
532 if ((content_state == kLowSadLowSumdiff) ||
533 (content_state == kHighSadLowSumdiff) ||
534 (content_state == kLowVarHighSumdiff)) {
535 return (5 * threshold_base) >> 2;
536 }
537 }
538 return threshold_base;
539 }
540
541 // Set the variance split thresholds for following the block sizes:
542 // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
543 // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
544 // currently only used on key frame.
set_vbp_thresholds(VP9_COMP * cpi,int64_t thresholds[],int q,int content_state)545 static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
546 int content_state) {
547 VP9_COMMON *const cm = &cpi->common;
548 const int is_key_frame = frame_is_intra_only(cm);
549 const int threshold_multiplier = is_key_frame ? 20 : 1;
550 int64_t threshold_base =
551 (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
552
553 if (is_key_frame) {
554 thresholds[0] = threshold_base;
555 thresholds[1] = threshold_base >> 2;
556 thresholds[2] = threshold_base >> 2;
557 thresholds[3] = threshold_base << 2;
558 } else {
559 // Increase base variance threshold based on estimated noise level.
560 if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) {
561 NOISE_LEVEL noise_level =
562 vp9_noise_estimate_extract_level(&cpi->noise_estimate);
563 if (noise_level == kHigh)
564 threshold_base = 3 * threshold_base;
565 else if (noise_level == kMedium)
566 threshold_base = threshold_base << 1;
567 else if (noise_level < kLow)
568 threshold_base = (7 * threshold_base) >> 3;
569 }
570 #if CONFIG_VP9_TEMPORAL_DENOISING
571 if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
572 cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
573 threshold_base =
574 vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
575 content_state, cpi->svc.temporal_layer_id);
576 else
577 threshold_base =
578 scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width,
579 cm->height, content_state);
580 #else
581 // Increase base variance threshold based on content_state/sum_diff level.
582 threshold_base = scale_part_thresh_sumdiff(
583 threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
584 #endif
585 thresholds[0] = threshold_base;
586 thresholds[2] = threshold_base << cpi->oxcf.speed;
587 if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7)
588 thresholds[2] = thresholds[2] << 1;
589 if (cm->width <= 352 && cm->height <= 288) {
590 thresholds[0] = threshold_base >> 3;
591 thresholds[1] = threshold_base >> 1;
592 thresholds[2] = threshold_base << 3;
593 } else if (cm->width < 1280 && cm->height < 720) {
594 thresholds[1] = (5 * threshold_base) >> 2;
595 } else if (cm->width < 1920 && cm->height < 1080) {
596 thresholds[1] = threshold_base << 1;
597 } else {
598 thresholds[1] = (5 * threshold_base) >> 1;
599 }
600 if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX;
601 }
602 }
603
vp9_set_variance_partition_thresholds(VP9_COMP * cpi,int q,int content_state)604 void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
605 int content_state) {
606 VP9_COMMON *const cm = &cpi->common;
607 SPEED_FEATURES *const sf = &cpi->sf;
608 const int is_key_frame = frame_is_intra_only(cm);
609 if (sf->partition_search_type != VAR_BASED_PARTITION &&
610 sf->partition_search_type != REFERENCE_PARTITION) {
611 return;
612 } else {
613 set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
614 // The thresholds below are not changed locally.
615 if (is_key_frame) {
616 cpi->vbp_threshold_sad = 0;
617 cpi->vbp_threshold_copy = 0;
618 cpi->vbp_bsize_min = BLOCK_8X8;
619 } else {
620 if (cm->width <= 352 && cm->height <= 288)
621 cpi->vbp_threshold_sad = 10;
622 else
623 cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
624 ? (cpi->y_dequant[q][1] << 1)
625 : 1000;
626 cpi->vbp_bsize_min = BLOCK_16X16;
627 if (cm->width <= 352 && cm->height <= 288)
628 cpi->vbp_threshold_copy = 4000;
629 else if (cm->width <= 640 && cm->height <= 360)
630 cpi->vbp_threshold_copy = 8000;
631 else
632 cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
633 ? (cpi->y_dequant[q][1] << 3)
634 : 8000;
635 if (cpi->rc.high_source_sad ||
636 (cpi->use_svc && cpi->svc.high_source_sad_superframe)) {
637 cpi->vbp_threshold_sad = 0;
638 cpi->vbp_threshold_copy = 0;
639 }
640 }
641 cpi->vbp_threshold_minmax = 15 + (q >> 3);
642 }
643 }
644
645 // Compute the minmax over the 8x8 subblocks.
compute_minmax_8x8(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,int highbd_flag,int pixels_wide,int pixels_high)646 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
647 int dp, int x16_idx, int y16_idx,
648 #if CONFIG_VP9_HIGHBITDEPTH
649 int highbd_flag,
650 #endif
651 int pixels_wide, int pixels_high) {
652 int k;
653 int minmax_max = 0;
654 int minmax_min = 255;
655 // Loop over the 4 8x8 subblocks.
656 for (k = 0; k < 4; k++) {
657 int x8_idx = x16_idx + ((k & 1) << 3);
658 int y8_idx = y16_idx + ((k >> 1) << 3);
659 int min = 0;
660 int max = 0;
661 if (x8_idx < pixels_wide && y8_idx < pixels_high) {
662 #if CONFIG_VP9_HIGHBITDEPTH
663 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
664 vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
665 d + y8_idx * dp + x8_idx, dp, &min, &max);
666 } else {
667 vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
668 dp, &min, &max);
669 }
670 #else
671 vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
672 &min, &max);
673 #endif
674 if ((max - min) > minmax_max) minmax_max = (max - min);
675 if ((max - min) < minmax_min) minmax_min = (max - min);
676 }
677 }
678 return (minmax_max - minmax_min);
679 }
680
fill_variance_4x4avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x8_idx,int y8_idx,v8x8 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame)681 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
682 int dp, int x8_idx, int y8_idx, v8x8 *vst,
683 #if CONFIG_VP9_HIGHBITDEPTH
684 int highbd_flag,
685 #endif
686 int pixels_wide, int pixels_high,
687 int is_key_frame) {
688 int k;
689 for (k = 0; k < 4; k++) {
690 int x4_idx = x8_idx + ((k & 1) << 2);
691 int y4_idx = y8_idx + ((k >> 1) << 2);
692 unsigned int sse = 0;
693 int sum = 0;
694 if (x4_idx < pixels_wide && y4_idx < pixels_high) {
695 int s_avg;
696 int d_avg = 128;
697 #if CONFIG_VP9_HIGHBITDEPTH
698 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
699 s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
700 if (!is_key_frame)
701 d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
702 } else {
703 s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
704 if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
705 }
706 #else
707 s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
708 if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
709 #endif
710 sum = s_avg - d_avg;
711 sse = sum * sum;
712 }
713 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
714 }
715 }
716
fill_variance_8x8avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,v16x16 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame)717 static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
718 int dp, int x16_idx, int y16_idx, v16x16 *vst,
719 #if CONFIG_VP9_HIGHBITDEPTH
720 int highbd_flag,
721 #endif
722 int pixels_wide, int pixels_high,
723 int is_key_frame) {
724 int k;
725 for (k = 0; k < 4; k++) {
726 int x8_idx = x16_idx + ((k & 1) << 3);
727 int y8_idx = y16_idx + ((k >> 1) << 3);
728 unsigned int sse = 0;
729 int sum = 0;
730 if (x8_idx < pixels_wide && y8_idx < pixels_high) {
731 int s_avg;
732 int d_avg = 128;
733 #if CONFIG_VP9_HIGHBITDEPTH
734 if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
735 s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
736 if (!is_key_frame)
737 d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
738 } else {
739 s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
740 if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
741 }
742 #else
743 s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
744 if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
745 #endif
746 sum = s_avg - d_avg;
747 sse = sum * sum;
748 }
749 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
750 }
751 }
752
753 // Check if most of the superblock is skin content, and if so, force split to
754 // 32x32, and set x->sb_is_skin for use in mode selection.
skin_sb_split(VP9_COMP * cpi,MACROBLOCK * x,const int low_res,int mi_row,int mi_col,int * force_split)755 static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
756 int mi_row, int mi_col, int *force_split) {
757 VP9_COMMON *const cm = &cpi->common;
758 #if CONFIG_VP9_HIGHBITDEPTH
759 if (cm->use_highbitdepth) return 0;
760 #endif
761 // Avoid checking superblocks on/near boundary and avoid low resolutions.
762 // Note superblock may still pick 64X64 if y_sad is very small
763 // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
764 if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
765 mi_row + 8 < cm->mi_rows)) {
766 int num_16x16_skin = 0;
767 int num_16x16_nonskin = 0;
768 uint8_t *ysignal = x->plane[0].src.buf;
769 uint8_t *usignal = x->plane[1].src.buf;
770 uint8_t *vsignal = x->plane[2].src.buf;
771 int sp = x->plane[0].src.stride;
772 int spuv = x->plane[1].src.stride;
773 const int block_index = mi_row * cm->mi_cols + mi_col;
774 const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
775 const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
776 const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
777 const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
778 // Loop through the 16x16 sub-blocks.
779 int i, j;
780 for (i = 0; i < ymis; i += 2) {
781 for (j = 0; j < xmis; j += 2) {
782 int bl_index = block_index + i * cm->mi_cols + j;
783 int is_skin = cpi->skin_map[bl_index];
784 num_16x16_skin += is_skin;
785 num_16x16_nonskin += (1 - is_skin);
786 if (num_16x16_nonskin > 3) {
787 // Exit loop if at least 4 of the 16x16 blocks are not skin.
788 i = ymis;
789 break;
790 }
791 ysignal += 16;
792 usignal += 8;
793 vsignal += 8;
794 }
795 ysignal += (sp << 4) - 64;
796 usignal += (spuv << 3) - 32;
797 vsignal += (spuv << 3) - 32;
798 }
799 if (num_16x16_skin > 12) {
800 *force_split = 1;
801 return 1;
802 }
803 }
804 return 0;
805 }
806
set_low_temp_var_flag(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,v64x64 * vt,int64_t thresholds[],MV_REFERENCE_FRAME ref_frame_partition,int mi_col,int mi_row)807 static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
808 v64x64 *vt, int64_t thresholds[],
809 MV_REFERENCE_FRAME ref_frame_partition,
810 int mi_col, int mi_row) {
811 int i, j;
812 VP9_COMMON *const cm = &cpi->common;
813 const int mv_thr = cm->width > 640 ? 8 : 4;
814 // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
815 // int_pro mv is small. If the temporal variance is small set the flag
816 // variance_low for the block. The variance threshold can be adjusted, the
817 // higher the more aggressive.
818 if (ref_frame_partition == LAST_FRAME &&
819 (cpi->sf.short_circuit_low_temp_var == 1 ||
820 (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
821 xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
822 xd->mi[0]->mv[0].as_mv.row < mv_thr &&
823 xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
824 if (xd->mi[0]->sb_type == BLOCK_64X64) {
825 if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
826 x->variance_low[0] = 1;
827 } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
828 for (i = 0; i < 2; i++) {
829 if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
830 x->variance_low[i + 1] = 1;
831 }
832 } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
833 for (i = 0; i < 2; i++) {
834 if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
835 x->variance_low[i + 3] = 1;
836 }
837 } else {
838 for (i = 0; i < 4; i++) {
839 const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } };
840 const int idx_str =
841 cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
842 MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
843
844 if (cm->mi_cols <= mi_col + idx[i][1] ||
845 cm->mi_rows <= mi_row + idx[i][0])
846 continue;
847
848 if ((*this_mi)->sb_type == BLOCK_32X32) {
849 int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 ||
850 cpi->sf.short_circuit_low_temp_var == 3)
851 ? ((5 * thresholds[1]) >> 3)
852 : (thresholds[1] >> 1);
853 if (vt->split[i].part_variances.none.variance < threshold_32x32)
854 x->variance_low[i + 5] = 1;
855 } else if (cpi->sf.short_circuit_low_temp_var >= 2) {
856 // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
857 // inside.
858 if ((*this_mi)->sb_type == BLOCK_16X16 ||
859 (*this_mi)->sb_type == BLOCK_32X16 ||
860 (*this_mi)->sb_type == BLOCK_16X32) {
861 for (j = 0; j < 4; j++) {
862 if (vt->split[i].split[j].part_variances.none.variance <
863 (thresholds[2] >> 8))
864 x->variance_low[(i << 2) + j + 9] = 1;
865 }
866 }
867 }
868 }
869 }
870 }
871 }
872
copy_partitioning_helper(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,BLOCK_SIZE bsize,int mi_row,int mi_col)873 static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x,
874 MACROBLOCKD *xd, BLOCK_SIZE bsize,
875 int mi_row, int mi_col) {
876 VP9_COMMON *const cm = &cpi->common;
877 BLOCK_SIZE *prev_part = cpi->prev_partition;
878 int start_pos = mi_row * cm->mi_stride + mi_col;
879
880 const int bsl = b_width_log2_lookup[bsize];
881 const int bs = (1 << bsl) >> 2;
882 BLOCK_SIZE subsize;
883 PARTITION_TYPE partition;
884
885 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
886
887 partition = partition_lookup[bsl][prev_part[start_pos]];
888 subsize = get_subsize(bsize, partition);
889
890 if (subsize < BLOCK_8X8) {
891 set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
892 } else {
893 switch (partition) {
894 case PARTITION_NONE:
895 set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
896 break;
897 case PARTITION_HORZ:
898 set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
899 set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize);
900 break;
901 case PARTITION_VERT:
902 set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
903 set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize);
904 break;
905 default:
906 assert(partition == PARTITION_SPLIT);
907 copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col);
908 copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col);
909 copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs);
910 copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs);
911 break;
912 }
913 }
914 }
915
copy_partitioning(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,int mi_row,int mi_col,int segment_id,int sb_offset)916 static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
917 int mi_row, int mi_col, int segment_id,
918 int sb_offset) {
919 int svc_copy_allowed = 1;
920 int frames_since_key_thresh = 1;
921 if (cpi->use_svc) {
922 // For SVC, don't allow copy if base spatial layer is key frame, or if
923 // frame is not a temporal enhancement layer frame.
924 int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id,
925 cpi->svc.number_temporal_layers);
926 const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
927 if (lc->is_key_frame || !cpi->svc.non_reference_frame) svc_copy_allowed = 0;
928 frames_since_key_thresh = cpi->svc.number_spatial_layers << 1;
929 }
930 if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed &&
931 !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE &&
932 cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE &&
933 cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) {
934 if (cpi->prev_partition != NULL) {
935 copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col);
936 cpi->copied_frame_cnt[sb_offset] += 1;
937 memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]),
938 sizeof(x->variance_low));
939 return 1;
940 }
941 }
942
943 return 0;
944 }
945
scale_partitioning_svc(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,BLOCK_SIZE bsize,int mi_row,int mi_col,int mi_row_high,int mi_col_high)946 static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
947 BLOCK_SIZE bsize, int mi_row, int mi_col,
948 int mi_row_high, int mi_col_high) {
949 VP9_COMMON *const cm = &cpi->common;
950 SVC *const svc = &cpi->svc;
951 BLOCK_SIZE *prev_part = svc->prev_partition_svc;
952 // Variables with _high are for higher resolution.
953 int bsize_high = 0;
954 int subsize_high = 0;
955 const int bsl_high = b_width_log2_lookup[bsize];
956 const int bs_high = (1 << bsl_high) >> 2;
957 const int has_rows = (mi_row_high + bs_high) < cm->mi_rows;
958 const int has_cols = (mi_col_high + bs_high) < cm->mi_cols;
959
960 const int row_boundary_block_scale_factor[BLOCK_SIZES] = {
961 13, 13, 13, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0
962 };
963 const int col_boundary_block_scale_factor[BLOCK_SIZES] = {
964 13, 13, 13, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0
965 };
966 int start_pos;
967 BLOCK_SIZE bsize_low;
968 PARTITION_TYPE partition_high;
969
970 if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0;
971 if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] ||
972 mi_col >= svc->mi_cols[svc->spatial_layer_id - 1])
973 return 0;
974
975 // Find corresponding (mi_col/mi_row) block down-scaled by 2x2.
976 start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col;
977 bsize_low = prev_part[start_pos];
978 // The block size is too big for boundaries. Do variance based partitioning.
979 if ((!has_rows || !has_cols) && bsize_low > BLOCK_16X16) return 1;
980
981 // For reference frames: return 1 (do variance-based partitioning) if the
982 // superblock is not low source sad and lower-resoln bsize is below 32x32.
983 if (!cpi->svc.non_reference_frame && !x->skip_low_source_sad &&
984 bsize_low < BLOCK_32X32)
985 return 1;
986
987 // Scale up block size by 2x2. Force 64x64 for size larger than 32x32.
988 if (bsize_low < BLOCK_32X32) {
989 bsize_high = bsize_low + 3;
990 } else if (bsize_low >= BLOCK_32X32) {
991 bsize_high = BLOCK_64X64;
992 }
993 // Scale up blocks on boundary.
994 if (!has_cols && has_rows) {
995 bsize_high = bsize_low + row_boundary_block_scale_factor[bsize_low];
996 } else if (has_cols && !has_rows) {
997 bsize_high = bsize_low + col_boundary_block_scale_factor[bsize_low];
998 } else if (!has_cols && !has_rows) {
999 bsize_high = bsize_low;
1000 }
1001
1002 partition_high = partition_lookup[bsl_high][bsize_high];
1003 subsize_high = get_subsize(bsize, partition_high);
1004
1005 if (subsize_high < BLOCK_8X8) {
1006 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high);
1007 } else {
1008 const int bsl = b_width_log2_lookup[bsize];
1009 const int bs = (1 << bsl) >> 2;
1010 switch (partition_high) {
1011 case PARTITION_NONE:
1012 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high);
1013 break;
1014 case PARTITION_HORZ:
1015 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high);
1016 if (subsize_high < BLOCK_64X64)
1017 set_block_size(cpi, x, xd, mi_row_high + bs_high, mi_col_high,
1018 subsize_high);
1019 break;
1020 case PARTITION_VERT:
1021 set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high);
1022 if (subsize_high < BLOCK_64X64)
1023 set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high,
1024 subsize_high);
1025 break;
1026 default:
1027 assert(partition_high == PARTITION_SPLIT);
1028 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col,
1029 mi_row_high, mi_col_high))
1030 return 1;
1031 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1),
1032 mi_col, mi_row_high + bs_high, mi_col_high))
1033 return 1;
1034 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row,
1035 mi_col + (bs >> 1), mi_row_high,
1036 mi_col_high + bs_high))
1037 return 1;
1038 if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1),
1039 mi_col + (bs >> 1), mi_row_high + bs_high,
1040 mi_col_high + bs_high))
1041 return 1;
1042 break;
1043 }
1044 }
1045
1046 return 0;
1047 }
1048
update_partition_svc(VP9_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)1049 static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
1050 int mi_col) {
1051 VP9_COMMON *const cm = &cpi->common;
1052 BLOCK_SIZE *prev_part = cpi->svc.prev_partition_svc;
1053 int start_pos = mi_row * cm->mi_stride + mi_col;
1054 const int bsl = b_width_log2_lookup[bsize];
1055 const int bs = (1 << bsl) >> 2;
1056 BLOCK_SIZE subsize;
1057 PARTITION_TYPE partition;
1058 const MODE_INFO *mi = NULL;
1059 int xx, yy;
1060
1061 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1062
1063 mi = cm->mi_grid_visible[start_pos];
1064 partition = partition_lookup[bsl][mi->sb_type];
1065 subsize = get_subsize(bsize, partition);
1066 if (subsize < BLOCK_8X8) {
1067 prev_part[start_pos] = bsize;
1068 } else {
1069 switch (partition) {
1070 case PARTITION_NONE:
1071 prev_part[start_pos] = bsize;
1072 if (bsize == BLOCK_64X64) {
1073 for (xx = 0; xx < 8; xx += 4)
1074 for (yy = 0; yy < 8; yy += 4) {
1075 if ((mi_row + xx < cm->mi_rows) && (mi_col + yy < cm->mi_cols))
1076 prev_part[start_pos + xx * cm->mi_stride + yy] = bsize;
1077 }
1078 }
1079 break;
1080 case PARTITION_HORZ:
1081 prev_part[start_pos] = subsize;
1082 if (mi_row + bs < cm->mi_rows)
1083 prev_part[start_pos + bs * cm->mi_stride] = subsize;
1084 break;
1085 case PARTITION_VERT:
1086 prev_part[start_pos] = subsize;
1087 if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
1088 break;
1089 default:
1090 assert(partition == PARTITION_SPLIT);
1091 update_partition_svc(cpi, subsize, mi_row, mi_col);
1092 update_partition_svc(cpi, subsize, mi_row + bs, mi_col);
1093 update_partition_svc(cpi, subsize, mi_row, mi_col + bs);
1094 update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs);
1095 break;
1096 }
1097 }
1098 }
1099
update_prev_partition_helper(VP9_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)1100 static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize,
1101 int mi_row, int mi_col) {
1102 VP9_COMMON *const cm = &cpi->common;
1103 BLOCK_SIZE *prev_part = cpi->prev_partition;
1104 int start_pos = mi_row * cm->mi_stride + mi_col;
1105 const int bsl = b_width_log2_lookup[bsize];
1106 const int bs = (1 << bsl) >> 2;
1107 BLOCK_SIZE subsize;
1108 PARTITION_TYPE partition;
1109 const MODE_INFO *mi = NULL;
1110
1111 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1112
1113 mi = cm->mi_grid_visible[start_pos];
1114 partition = partition_lookup[bsl][mi->sb_type];
1115 subsize = get_subsize(bsize, partition);
1116 if (subsize < BLOCK_8X8) {
1117 prev_part[start_pos] = bsize;
1118 } else {
1119 switch (partition) {
1120 case PARTITION_NONE: prev_part[start_pos] = bsize; break;
1121 case PARTITION_HORZ:
1122 prev_part[start_pos] = subsize;
1123 if (mi_row + bs < cm->mi_rows)
1124 prev_part[start_pos + bs * cm->mi_stride] = subsize;
1125 break;
1126 case PARTITION_VERT:
1127 prev_part[start_pos] = subsize;
1128 if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
1129 break;
1130 default:
1131 assert(partition == PARTITION_SPLIT);
1132 update_prev_partition_helper(cpi, subsize, mi_row, mi_col);
1133 update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col);
1134 update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs);
1135 update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs);
1136 break;
1137 }
1138 }
1139 }
1140
update_prev_partition(VP9_COMP * cpi,MACROBLOCK * x,int segment_id,int mi_row,int mi_col,int sb_offset)1141 static void update_prev_partition(VP9_COMP *cpi, MACROBLOCK *x, int segment_id,
1142 int mi_row, int mi_col, int sb_offset) {
1143 update_prev_partition_helper(cpi, BLOCK_64X64, mi_row, mi_col);
1144 cpi->prev_segment_id[sb_offset] = segment_id;
1145 memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low,
1146 sizeof(x->variance_low));
1147 // Reset the counter for copy partitioning
1148 cpi->copied_frame_cnt[sb_offset] = 0;
1149 }
1150
chroma_check(VP9_COMP * cpi,MACROBLOCK * x,int bsize,unsigned int y_sad,int is_key_frame)1151 static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize,
1152 unsigned int y_sad, int is_key_frame) {
1153 int i;
1154 MACROBLOCKD *xd = &x->e_mbd;
1155
1156 if (is_key_frame) return;
1157
1158 // For speed >= 8, avoid the chroma check if y_sad is above threshold.
1159 if (cpi->oxcf.speed >= 8) {
1160 if (y_sad > cpi->vbp_thresholds[1] &&
1161 (!cpi->noise_estimate.enabled ||
1162 vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium))
1163 return;
1164 }
1165
1166 for (i = 1; i <= 2; ++i) {
1167 unsigned int uv_sad = UINT_MAX;
1168 struct macroblock_plane *p = &x->plane[i];
1169 struct macroblockd_plane *pd = &xd->plane[i];
1170 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
1171
1172 if (bs != BLOCK_INVALID)
1173 uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
1174 pd->dst.stride);
1175
1176 // TODO(marpan): Investigate if we should lower this threshold if
1177 // superblock is detected as skin.
1178 x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
1179 }
1180 }
1181
avg_source_sad(VP9_COMP * cpi,MACROBLOCK * x,int shift,int sb_offset)1182 static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift,
1183 int sb_offset) {
1184 unsigned int tmp_sse;
1185 uint64_t tmp_sad;
1186 unsigned int tmp_variance;
1187 const BLOCK_SIZE bsize = BLOCK_64X64;
1188 uint8_t *src_y = cpi->Source->y_buffer;
1189 int src_ystride = cpi->Source->y_stride;
1190 uint8_t *last_src_y = cpi->Last_Source->y_buffer;
1191 int last_src_ystride = cpi->Last_Source->y_stride;
1192 uint64_t avg_source_sad_threshold = 10000;
1193 uint64_t avg_source_sad_threshold2 = 12000;
1194 #if CONFIG_VP9_HIGHBITDEPTH
1195 if (cpi->common.use_highbitdepth) return 0;
1196 #endif
1197 src_y += shift;
1198 last_src_y += shift;
1199 tmp_sad =
1200 cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride);
1201 tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y,
1202 last_src_ystride, &tmp_sse);
1203 // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
1204 if (tmp_sad < avg_source_sad_threshold)
1205 x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff
1206 : kLowSadHighSumdiff;
1207 else
1208 x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff
1209 : kHighSadHighSumdiff;
1210
1211 // Detect large lighting change.
1212 if (cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
1213 cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) &&
1214 (tmp_sse - tmp_variance) > 10000)
1215 x->content_state_sb = kLowVarHighSumdiff;
1216 else if (tmp_sad > (avg_source_sad_threshold << 1))
1217 x->content_state_sb = kVeryHighSad;
1218
1219 if (cpi->content_state_sb_fd != NULL) {
1220 if (tmp_sad < avg_source_sad_threshold2) {
1221 // Cap the increment to 255.
1222 if (cpi->content_state_sb_fd[sb_offset] < 255)
1223 cpi->content_state_sb_fd[sb_offset]++;
1224 } else {
1225 cpi->content_state_sb_fd[sb_offset] = 0;
1226 }
1227 }
1228 if (tmp_sad == 0) x->zero_temp_sad_source = 1;
1229 return tmp_sad;
1230 }
1231
1232 // This function chooses partitioning based on the variance between source and
1233 // reconstructed last, where variance is computed for down-sampled inputs.
choose_partitioning(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)1234 static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
1235 MACROBLOCK *x, int mi_row, int mi_col) {
1236 VP9_COMMON *const cm = &cpi->common;
1237 MACROBLOCKD *xd = &x->e_mbd;
1238 int i, j, k, m;
1239 v64x64 vt;
1240 v16x16 *vt2 = NULL;
1241 int force_split[21];
1242 int avg_32x32;
1243 int max_var_32x32 = 0;
1244 int min_var_32x32 = INT_MAX;
1245 int var_32x32;
1246 int avg_16x16[4];
1247 int maxvar_16x16[4];
1248 int minvar_16x16[4];
1249 int64_t threshold_4x4avg;
1250 NOISE_LEVEL noise_level = kLow;
1251 int content_state = 0;
1252 uint8_t *s;
1253 const uint8_t *d;
1254 int sp;
1255 int dp;
1256 int compute_minmax_variance = 1;
1257 unsigned int y_sad = UINT_MAX;
1258 BLOCK_SIZE bsize = BLOCK_64X64;
1259 // Ref frame used in partitioning.
1260 MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
1261 int pixels_wide = 64, pixels_high = 64;
1262 int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
1263 cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] };
1264 int scene_change_detected =
1265 cpi->rc.high_source_sad ||
1266 (cpi->use_svc && cpi->svc.high_source_sad_superframe);
1267
1268 // For the variance computation under SVC mode, we treat the frame as key if
1269 // the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
1270 int is_key_frame =
1271 (frame_is_intra_only(cm) ||
1272 (is_one_pass_cbr_svc(cpi) &&
1273 cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
1274 // Always use 4x4 partition for key frame.
1275 const int use_4x4_partition = frame_is_intra_only(cm);
1276 const int low_res = (cm->width <= 352 && cm->height <= 288);
1277 int variance4x4downsample[16];
1278 int segment_id;
1279 int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3);
1280
1281 // For SVC: check if LAST frame is NULL or if the resolution of LAST is
1282 // different than the current frame resolution, and if so, treat this frame
1283 // as a key frame, for the purpose of the superblock partitioning.
1284 // LAST == NULL can happen in some cases where enhancement spatial layers are
1285 // enabled dyanmically in the stream and the only reference is the spatial
1286 // reference (GOLDEN).
1287 if (cpi->use_svc) {
1288 const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME);
1289 if (ref == NULL || ref->y_crop_height != cm->height ||
1290 ref->y_crop_width != cm->width)
1291 is_key_frame = 1;
1292 }
1293
1294 set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
1295 set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0);
1296 segment_id = xd->mi[0]->segment_id;
1297
1298 if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame))
1299 compute_minmax_variance = 0;
1300
1301 memset(x->variance_low, 0, sizeof(x->variance_low));
1302
1303 if (cpi->sf.use_source_sad && !is_key_frame) {
1304 int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
1305 content_state = x->content_state_sb;
1306 x->skip_low_source_sad = (content_state == kLowSadLowSumdiff ||
1307 content_state == kLowSadHighSumdiff)
1308 ? 1
1309 : 0;
1310 x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0;
1311 if (cpi->content_state_sb_fd != NULL)
1312 x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2];
1313
1314 // For SVC on top spatial layer: use/scale the partition from
1315 // the lower spatial resolution if svc_use_lowres_part is enabled.
1316 if (cpi->sf.svc_use_lowres_part &&
1317 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 &&
1318 cpi->svc.prev_partition_svc != NULL && content_state != kVeryHighSad) {
1319 if (!scale_partitioning_svc(cpi, x, xd, BLOCK_64X64, mi_row >> 1,
1320 mi_col >> 1, mi_row, mi_col)) {
1321 if (cpi->sf.copy_partition_flag) {
1322 update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
1323 }
1324 return 0;
1325 }
1326 }
1327 // If source_sad is low copy the partition without computing the y_sad.
1328 if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&
1329 !scene_change_detected &&
1330 copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
1331 x->sb_use_mv_part = 1;
1332 if (cpi->sf.svc_use_lowres_part &&
1333 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
1334 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
1335 return 0;
1336 }
1337 }
1338
1339 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
1340 cyclic_refresh_segment_id_boosted(segment_id)) {
1341 int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
1342 set_vbp_thresholds(cpi, thresholds, q, content_state);
1343 } else {
1344 set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state);
1345 }
1346
1347 // For non keyframes, disable 4x4 average for low resolution when speed = 8
1348 threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX;
1349
1350 if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
1351 if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
1352
1353 s = x->plane[0].src.buf;
1354 sp = x->plane[0].src.stride;
1355
1356 // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
1357 // 5-20 for the 16x16 blocks.
1358 force_split[0] = scene_change_detected;
1359
1360 if (!is_key_frame) {
1361 // In the case of spatial/temporal scalable coding, the assumption here is
1362 // that the temporal reference frame will always be of type LAST_FRAME.
1363 // TODO(marpan): If that assumption is broken, we need to revisit this code.
1364 MODE_INFO *mi = xd->mi[0];
1365 YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
1366
1367 const YV12_BUFFER_CONFIG *yv12_g = NULL;
1368 unsigned int y_sad_g, y_sad_thr, y_sad_last;
1369 bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
1370 (mi_row + 4 < cm->mi_rows);
1371
1372 assert(yv12 != NULL);
1373
1374 if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
1375 cpi->svc.use_gf_temporal_ref_current_layer) {
1376 // For now, GOLDEN will not be used for non-zero spatial layers, since
1377 // it may not be a temporal reference.
1378 yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
1379 }
1380
1381 // Only compute y_sad_g (sad for golden reference) for speed < 8.
1382 if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
1383 (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
1384 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1385 &cm->frame_refs[GOLDEN_FRAME - 1].sf);
1386 y_sad_g = cpi->fn_ptr[bsize].sdf(
1387 x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
1388 xd->plane[0].pre[0].stride);
1389 } else {
1390 y_sad_g = UINT_MAX;
1391 }
1392
1393 if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
1394 cpi->rc.is_src_frame_alt_ref) {
1395 yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
1396 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
1397 &cm->frame_refs[ALTREF_FRAME - 1].sf);
1398 mi->ref_frame[0] = ALTREF_FRAME;
1399 y_sad_g = UINT_MAX;
1400 } else {
1401 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
1402 &cm->frame_refs[LAST_FRAME - 1].sf);
1403 mi->ref_frame[0] = LAST_FRAME;
1404 }
1405 mi->ref_frame[1] = NONE;
1406 mi->sb_type = BLOCK_64X64;
1407 mi->mv[0].as_int = 0;
1408 mi->interp_filter = BILINEAR;
1409
1410 if (cpi->oxcf.speed >= 8 && !low_res &&
1411 x->content_state_sb != kVeryHighSad) {
1412 y_sad = cpi->fn_ptr[bsize].sdf(
1413 x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
1414 xd->plane[0].pre[0].stride);
1415 } else {
1416 const MV dummy_mv = { 0, 0 };
1417 y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
1418 &dummy_mv);
1419 x->sb_use_mv_part = 1;
1420 x->sb_mvcol_part = mi->mv[0].as_mv.col;
1421 x->sb_mvrow_part = mi->mv[0].as_mv.row;
1422 if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
1423 cpi->svc.spatial_layer_id == 0 &&
1424 cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source &&
1425 cm->width > 640 && cm->height > 480) {
1426 // Disable split below 16x16 block size when scroll motion is detected.
1427 // TODO(marpan/jianj): Improve this condition: issue is that search
1428 // range is hard-coded/limited in vp9_int_pro_motion_estimation() so
1429 // scroll motion may not be detected here.
1430 if ((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) ||
1431 y_sad < 100000) {
1432 compute_minmax_variance = 0;
1433 thresholds[2] = INT64_MAX;
1434 }
1435 }
1436 }
1437
1438 y_sad_last = y_sad;
1439 // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
1440 // are close if short_circuit_low_temp_var is on.
1441 y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
1442 if (y_sad_g < y_sad_thr) {
1443 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1444 &cm->frame_refs[GOLDEN_FRAME - 1].sf);
1445 mi->ref_frame[0] = GOLDEN_FRAME;
1446 mi->mv[0].as_int = 0;
1447 y_sad = y_sad_g;
1448 ref_frame_partition = GOLDEN_FRAME;
1449 } else {
1450 x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
1451 ref_frame_partition = LAST_FRAME;
1452 }
1453
1454 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
1455 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
1456
1457 if (cpi->use_skin_detection)
1458 x->sb_is_skin =
1459 skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split);
1460
1461 d = xd->plane[0].dst.buf;
1462 dp = xd->plane[0].dst.stride;
1463
1464 // If the y_sad is very small, take 64x64 as partition and exit.
1465 // Don't check on boosted segment for now, as 64x64 is suppressed there.
1466 if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
1467 const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64];
1468 const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64];
1469 if (mi_col + block_width / 2 < cm->mi_cols &&
1470 mi_row + block_height / 2 < cm->mi_rows) {
1471 set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64);
1472 x->variance_low[0] = 1;
1473 chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1474 if (cpi->sf.svc_use_lowres_part &&
1475 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
1476 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
1477 if (cpi->sf.copy_partition_flag) {
1478 update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
1479 }
1480 return 0;
1481 }
1482 }
1483
1484 // If the y_sad is small enough, copy the partition of the superblock in the
1485 // last frame to current frame only if the last frame is not a keyframe.
1486 // Stop the copy every cpi->max_copied_frame to refresh the partition.
1487 // TODO(jianj) : tune the threshold.
1488 if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy &&
1489 copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
1490 chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1491 if (cpi->sf.svc_use_lowres_part &&
1492 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
1493 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
1494 return 0;
1495 }
1496 } else {
1497 d = VP9_VAR_OFFS;
1498 dp = 0;
1499 #if CONFIG_VP9_HIGHBITDEPTH
1500 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1501 switch (xd->bd) {
1502 case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break;
1503 case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break;
1504 case 8:
1505 default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break;
1506 }
1507 }
1508 #endif // CONFIG_VP9_HIGHBITDEPTH
1509 }
1510
1511 if (low_res && threshold_4x4avg < INT64_MAX)
1512 CHECK_MEM_ERROR(cm, vt2, vpx_calloc(16, sizeof(*vt2)));
1513 // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
1514 // for splits.
1515 for (i = 0; i < 4; i++) {
1516 const int x32_idx = ((i & 1) << 5);
1517 const int y32_idx = ((i >> 1) << 5);
1518 const int i2 = i << 2;
1519 force_split[i + 1] = 0;
1520 avg_16x16[i] = 0;
1521 maxvar_16x16[i] = 0;
1522 minvar_16x16[i] = INT_MAX;
1523 for (j = 0; j < 4; j++) {
1524 const int x16_idx = x32_idx + ((j & 1) << 4);
1525 const int y16_idx = y32_idx + ((j >> 1) << 4);
1526 const int split_index = 5 + i2 + j;
1527 v16x16 *vst = &vt.split[i].split[j];
1528 force_split[split_index] = 0;
1529 variance4x4downsample[i2 + j] = 0;
1530 if (!is_key_frame) {
1531 fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst,
1532 #if CONFIG_VP9_HIGHBITDEPTH
1533 xd->cur_buf->flags,
1534 #endif
1535 pixels_wide, pixels_high, is_key_frame);
1536 fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
1537 get_variance(&vt.split[i].split[j].part_variances.none);
1538 avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance;
1539 if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i])
1540 minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
1541 if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i])
1542 maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
1543 if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) {
1544 // 16X16 variance is above threshold for split, so force split to 8x8
1545 // for this 16x16 block (this also forces splits for upper levels).
1546 force_split[split_index] = 1;
1547 force_split[i + 1] = 1;
1548 force_split[0] = 1;
1549 } else if (compute_minmax_variance &&
1550 vt.split[i].split[j].part_variances.none.variance >
1551 thresholds[1] &&
1552 !cyclic_refresh_segment_id_boosted(segment_id)) {
1553 // We have some nominal amount of 16x16 variance (based on average),
1554 // compute the minmax over the 8x8 sub-blocks, and if above threshold,
1555 // force split to 8x8 block for this 16x16 block.
1556 int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
1557 #if CONFIG_VP9_HIGHBITDEPTH
1558 xd->cur_buf->flags,
1559 #endif
1560 pixels_wide, pixels_high);
1561 int thresh_minmax = (int)cpi->vbp_threshold_minmax;
1562 if (x->content_state_sb == kVeryHighSad)
1563 thresh_minmax = thresh_minmax << 1;
1564 if (minmax > thresh_minmax) {
1565 force_split[split_index] = 1;
1566 force_split[i + 1] = 1;
1567 force_split[0] = 1;
1568 }
1569 }
1570 }
1571 if (is_key_frame ||
1572 (low_res && vt.split[i].split[j].part_variances.none.variance >
1573 threshold_4x4avg)) {
1574 force_split[split_index] = 0;
1575 // Go down to 4x4 down-sampling for variance.
1576 variance4x4downsample[i2 + j] = 1;
1577 for (k = 0; k < 4; k++) {
1578 int x8_idx = x16_idx + ((k & 1) << 3);
1579 int y8_idx = y16_idx + ((k >> 1) << 3);
1580 v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
1581 fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
1582 #if CONFIG_VP9_HIGHBITDEPTH
1583 xd->cur_buf->flags,
1584 #endif
1585 pixels_wide, pixels_high, is_key_frame);
1586 }
1587 }
1588 }
1589 }
1590 if (cpi->noise_estimate.enabled)
1591 noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate);
1592 // Fill the rest of the variance tree by summing split partition values.
1593 avg_32x32 = 0;
1594 for (i = 0; i < 4; i++) {
1595 const int i2 = i << 2;
1596 for (j = 0; j < 4; j++) {
1597 if (variance4x4downsample[i2 + j] == 1) {
1598 v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j];
1599 for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
1600 fill_variance_tree(vtemp, BLOCK_16X16);
1601 // If variance of this 16x16 block is above the threshold, force block
1602 // to split. This also forces a split on the upper levels.
1603 get_variance(&vtemp->part_variances.none);
1604 if (vtemp->part_variances.none.variance > thresholds[2]) {
1605 force_split[5 + i2 + j] = 1;
1606 force_split[i + 1] = 1;
1607 force_split[0] = 1;
1608 }
1609 }
1610 }
1611 fill_variance_tree(&vt.split[i], BLOCK_32X32);
1612 // If variance of this 32x32 block is above the threshold, or if its above
1613 // (some threshold of) the average variance over the sub-16x16 blocks, then
1614 // force this block to split. This also forces a split on the upper
1615 // (64x64) level.
1616 if (!force_split[i + 1]) {
1617 get_variance(&vt.split[i].part_variances.none);
1618 var_32x32 = vt.split[i].part_variances.none.variance;
1619 max_var_32x32 = VPXMAX(var_32x32, max_var_32x32);
1620 min_var_32x32 = VPXMIN(var_32x32, min_var_32x32);
1621 if (vt.split[i].part_variances.none.variance > thresholds[1] ||
1622 (!is_key_frame &&
1623 vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
1624 vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) {
1625 force_split[i + 1] = 1;
1626 force_split[0] = 1;
1627 } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 &&
1628 (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) &&
1629 maxvar_16x16[i] > thresholds[1]) {
1630 force_split[i + 1] = 1;
1631 force_split[0] = 1;
1632 }
1633 avg_32x32 += var_32x32;
1634 }
1635 }
1636 if (!force_split[0]) {
1637 fill_variance_tree(&vt, BLOCK_64X64);
1638 get_variance(&vt.part_variances.none);
1639 // If variance of this 64x64 block is above (some threshold of) the average
1640 // variance over the sub-32x32 blocks, then force this block to split.
1641 // Only checking this for noise level >= medium for now.
1642 if (!is_key_frame && noise_level >= kMedium &&
1643 vt.part_variances.none.variance > (9 * avg_32x32) >> 5)
1644 force_split[0] = 1;
1645 // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in
1646 // a 64x64 block is greater than threshold and the maximum 32x32 variance is
1647 // above a miniumum threshold, then force the split of a 64x64 block
1648 // Only check this for low noise.
1649 else if (!is_key_frame && noise_level < kMedium &&
1650 (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) &&
1651 max_var_32x32 > thresholds[0] >> 1)
1652 force_split[0] = 1;
1653 }
1654
1655 // Now go through the entire structure, splitting every block size until
1656 // we get to one that's got a variance lower than our threshold.
1657 if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
1658 !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col,
1659 thresholds[0], BLOCK_16X16, force_split[0])) {
1660 for (i = 0; i < 4; ++i) {
1661 const int x32_idx = ((i & 1) << 2);
1662 const int y32_idx = ((i >> 1) << 2);
1663 const int i2 = i << 2;
1664 if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32,
1665 (mi_row + y32_idx), (mi_col + x32_idx),
1666 thresholds[1], BLOCK_16X16,
1667 force_split[i + 1])) {
1668 for (j = 0; j < 4; ++j) {
1669 const int x16_idx = ((j & 1) << 1);
1670 const int y16_idx = ((j >> 1) << 1);
1671 // For inter frames: if variance4x4downsample[] == 1 for this 16x16
1672 // block, then the variance is based on 4x4 down-sampling, so use vt2
1673 // in set_vt_partioning(), otherwise use vt.
1674 v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1)
1675 ? &vt2[i2 + j]
1676 : &vt.split[i].split[j];
1677 if (!set_vt_partitioning(
1678 cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx,
1679 mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min,
1680 force_split[5 + i2 + j])) {
1681 for (k = 0; k < 4; ++k) {
1682 const int x8_idx = (k & 1);
1683 const int y8_idx = (k >> 1);
1684 if (use_4x4_partition) {
1685 if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k],
1686 BLOCK_8X8,
1687 mi_row + y32_idx + y16_idx + y8_idx,
1688 mi_col + x32_idx + x16_idx + x8_idx,
1689 thresholds[3], BLOCK_8X8, 0)) {
1690 set_block_size(
1691 cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
1692 (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4);
1693 }
1694 } else {
1695 set_block_size(
1696 cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
1697 (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8);
1698 }
1699 }
1700 }
1701 }
1702 }
1703 }
1704 }
1705
1706 if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) {
1707 update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
1708 }
1709
1710 if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part &&
1711 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
1712 update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
1713
1714 if (cpi->sf.short_circuit_low_temp_var) {
1715 set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition,
1716 mi_col, mi_row);
1717 }
1718
1719 chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1720 if (vt2) vpx_free(vt2);
1721 return 0;
1722 }
1723
update_state(VP9_COMP * cpi,ThreadData * td,PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,int output_enabled)1724 static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx,
1725 int mi_row, int mi_col, BLOCK_SIZE bsize,
1726 int output_enabled) {
1727 int i, x_idx, y;
1728 VP9_COMMON *const cm = &cpi->common;
1729 RD_COUNTS *const rdc = &td->rd_counts;
1730 MACROBLOCK *const x = &td->mb;
1731 MACROBLOCKD *const xd = &x->e_mbd;
1732 struct macroblock_plane *const p = x->plane;
1733 struct macroblockd_plane *const pd = xd->plane;
1734 MODE_INFO *mi = &ctx->mic;
1735 MODE_INFO *const xdmi = xd->mi[0];
1736 MODE_INFO *mi_addr = xd->mi[0];
1737 const struct segmentation *const seg = &cm->seg;
1738 const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
1739 const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
1740 const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
1741 const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
1742 MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
1743 int w, h;
1744
1745 const int mis = cm->mi_stride;
1746 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1747 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1748 int max_plane;
1749
1750 assert(mi->sb_type == bsize);
1751
1752 *mi_addr = *mi;
1753 *x->mbmi_ext = ctx->mbmi_ext;
1754
1755 // If segmentation in use
1756 if (seg->enabled) {
1757 // For in frame complexity AQ copy the segment id from the segment map.
1758 if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
1759 const uint8_t *const map =
1760 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1761 mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1762 }
1763 // Else for cyclic refresh mode update the segment map, set the segment id
1764 // and then update the quantizer.
1765 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
1766 vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize,
1767 ctx->rate, ctx->dist, x->skip, p);
1768 }
1769 }
1770
1771 max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1;
1772 for (i = 0; i < max_plane; ++i) {
1773 p[i].coeff = ctx->coeff_pbuf[i][1];
1774 p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
1775 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
1776 p[i].eobs = ctx->eobs_pbuf[i][1];
1777 }
1778
1779 for (i = max_plane; i < MAX_MB_PLANE; ++i) {
1780 p[i].coeff = ctx->coeff_pbuf[i][2];
1781 p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1782 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1783 p[i].eobs = ctx->eobs_pbuf[i][2];
1784 }
1785
1786 // Restore the coding context of the MB to that that was in place
1787 // when the mode was picked for it
1788 for (y = 0; y < mi_height; y++)
1789 for (x_idx = 0; x_idx < mi_width; x_idx++)
1790 if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
1791 (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
1792 xd->mi[x_idx + y * mis] = mi_addr;
1793 }
1794
1795 if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x);
1796
1797 if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) {
1798 xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
1799 xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
1800 }
1801
1802 x->skip = ctx->skip;
1803 memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk,
1804 sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
1805
1806 if (!output_enabled) return;
1807
1808 #if CONFIG_INTERNAL_STATS
1809 if (frame_is_intra_only(cm)) {
1810 static const int kf_mode_index[] = {
1811 THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/,
1812 THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/,
1813 THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
1814 THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
1815 THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/,
1816 };
1817 ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]];
1818 } else {
1819 // Note how often each mode chosen as best
1820 ++cpi->mode_chosen_counts[ctx->best_mode_index];
1821 }
1822 #endif
1823 if (!frame_is_intra_only(cm)) {
1824 if (is_inter_block(xdmi)) {
1825 vp9_update_mv_count(td);
1826
1827 if (cm->interp_filter == SWITCHABLE) {
1828 const int ctx = get_pred_context_switchable_interp(xd);
1829 ++td->counts->switchable_interp[ctx][xdmi->interp_filter];
1830 }
1831 }
1832
1833 rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
1834 rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
1835 rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
1836
1837 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
1838 rdc->filter_diff[i] += ctx->best_filter_diff[i];
1839 }
1840
1841 for (h = 0; h < y_mis; ++h) {
1842 MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
1843 for (w = 0; w < x_mis; ++w) {
1844 MV_REF *const mv = frame_mv + w;
1845 mv->ref_frame[0] = mi->ref_frame[0];
1846 mv->ref_frame[1] = mi->ref_frame[1];
1847 mv->mv[0].as_int = mi->mv[0].as_int;
1848 mv->mv[1].as_int = mi->mv[1].as_int;
1849 }
1850 }
1851 }
1852
vp9_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col)1853 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
1854 int mi_row, int mi_col) {
1855 uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer };
1856 const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
1857 int i;
1858
1859 // Set current frame pointer.
1860 x->e_mbd.cur_buf = src;
1861
1862 for (i = 0; i < MAX_MB_PLANE; i++)
1863 setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,
1864 NULL, x->e_mbd.plane[i].subsampling_x,
1865 x->e_mbd.plane[i].subsampling_y);
1866 }
1867
set_mode_info_seg_skip(MACROBLOCK * x,TX_MODE tx_mode,RD_COST * rd_cost,BLOCK_SIZE bsize)1868 static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
1869 RD_COST *rd_cost, BLOCK_SIZE bsize) {
1870 MACROBLOCKD *const xd = &x->e_mbd;
1871 MODE_INFO *const mi = xd->mi[0];
1872 INTERP_FILTER filter_ref;
1873
1874 filter_ref = get_pred_context_switchable_interp(xd);
1875 if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP;
1876
1877 mi->sb_type = bsize;
1878 mi->mode = ZEROMV;
1879 mi->tx_size =
1880 VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]);
1881 mi->skip = 1;
1882 mi->uv_mode = DC_PRED;
1883 mi->ref_frame[0] = LAST_FRAME;
1884 mi->ref_frame[1] = NONE;
1885 mi->mv[0].as_int = 0;
1886 mi->interp_filter = filter_ref;
1887
1888 xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
1889 x->skip = 1;
1890
1891 vp9_rd_cost_init(rd_cost);
1892 }
1893
set_segment_rdmult(VP9_COMP * const cpi,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize,AQ_MODE aq_mode)1894 static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
1895 int mi_row, int mi_col, BLOCK_SIZE bsize,
1896 AQ_MODE aq_mode) {
1897 int segment_qindex;
1898 VP9_COMMON *const cm = &cpi->common;
1899 const uint8_t *const map =
1900 cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1901
1902 vp9_init_plane_quantizers(cpi, x);
1903 vpx_clear_system_state();
1904 segment_qindex =
1905 vp9_get_qindex(&cm->seg, x->e_mbd.mi[0]->segment_id, cm->base_qindex);
1906
1907 if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) {
1908 if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
1909 return;
1910 }
1911
1912 if (aq_mode == CYCLIC_REFRESH_AQ) {
1913 // If segment is boosted, use rdmult for that segment.
1914 if (cyclic_refresh_segment_id_boosted(
1915 get_segment_id(cm, map, bsize, mi_row, mi_col)))
1916 x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
1917 return;
1918 }
1919
1920 x->rdmult = vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
1921 }
1922
rd_pick_sb_modes(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)1923 static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
1924 MACROBLOCK *const x, int mi_row, int mi_col,
1925 RD_COST *rd_cost, BLOCK_SIZE bsize,
1926 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
1927 VP9_COMMON *const cm = &cpi->common;
1928 TileInfo *const tile_info = &tile_data->tile_info;
1929 MACROBLOCKD *const xd = &x->e_mbd;
1930 MODE_INFO *mi;
1931 struct macroblock_plane *const p = x->plane;
1932 struct macroblockd_plane *const pd = xd->plane;
1933 const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
1934 int i, orig_rdmult;
1935
1936 vpx_clear_system_state();
1937
1938 // Use the lower precision, but faster, 32x32 fdct for mode selection.
1939 x->use_lp32x32fdct = 1;
1940
1941 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1942 mi = xd->mi[0];
1943 mi->sb_type = bsize;
1944
1945 for (i = 0; i < MAX_MB_PLANE; ++i) {
1946 p[i].coeff = ctx->coeff_pbuf[i][0];
1947 p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
1948 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
1949 p[i].eobs = ctx->eobs_pbuf[i][0];
1950 }
1951 ctx->is_coded = 0;
1952 ctx->skippable = 0;
1953 ctx->pred_pixel_ready = 0;
1954 x->skip_recode = 0;
1955
1956 // Set to zero to make sure we do not use the previous encoded frame stats
1957 mi->skip = 0;
1958
1959 #if CONFIG_VP9_HIGHBITDEPTH
1960 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1961 x->source_variance = vp9_high_get_sby_perpixel_variance(
1962 cpi, &x->plane[0].src, bsize, xd->bd);
1963 } else {
1964 x->source_variance =
1965 vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
1966 }
1967 #else
1968 x->source_variance =
1969 vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
1970 #endif // CONFIG_VP9_HIGHBITDEPTH
1971
1972 // Save rdmult before it might be changed, so it can be restored later.
1973 orig_rdmult = x->rdmult;
1974
1975 if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) {
1976 double logvar = vp9_log_block_var(cpi, x, bsize);
1977 // Check block complexity as part of descision on using pixel or transform
1978 // domain distortion in rd tests.
1979 x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion &&
1980 (logvar >= cpi->sf.tx_domain_thresh);
1981
1982 // Check block complexity as part of descision on using quantized
1983 // coefficient optimisation inside the rd loop.
1984 x->block_qcoeff_opt =
1985 cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh);
1986 } else {
1987 x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion;
1988 x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt;
1989 }
1990
1991 set_segment_index(cpi, x, mi_row, mi_col, bsize, 0);
1992 set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode);
1993
1994 // Find best coding mode & reconstruct the MB so it is available
1995 // as a predictor for MBs that follow in the SB
1996 if (frame_is_intra_only(cm)) {
1997 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
1998 } else {
1999 if (bsize >= BLOCK_8X8) {
2000 if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
2001 vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
2002 ctx, best_rd);
2003 else
2004 vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
2005 bsize, ctx, best_rd);
2006 } else {
2007 vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
2008 bsize, ctx, best_rd);
2009 }
2010 }
2011
2012 // Examine the resulting rate and for AQ mode 2 make a segment choice.
2013 if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
2014 (bsize >= BLOCK_16X16) &&
2015 (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
2016 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
2017 vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
2018 }
2019
2020 // TODO(jingning) The rate-distortion optimization flow needs to be
2021 // refactored to provide proper exit/return handle.
2022 if (rd_cost->rate == INT_MAX)
2023 rd_cost->rdcost = INT64_MAX;
2024 else
2025 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
2026
2027 x->rdmult = orig_rdmult;
2028
2029 ctx->rate = rd_cost->rate;
2030 ctx->dist = rd_cost->dist;
2031 }
2032
update_stats(VP9_COMMON * cm,ThreadData * td)2033 static void update_stats(VP9_COMMON *cm, ThreadData *td) {
2034 const MACROBLOCK *x = &td->mb;
2035 const MACROBLOCKD *const xd = &x->e_mbd;
2036 const MODE_INFO *const mi = xd->mi[0];
2037 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2038 const BLOCK_SIZE bsize = mi->sb_type;
2039
2040 if (!frame_is_intra_only(cm)) {
2041 FRAME_COUNTS *const counts = td->counts;
2042 const int inter_block = is_inter_block(mi);
2043 const int seg_ref_active =
2044 segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME);
2045 if (!seg_ref_active) {
2046 counts->intra_inter[get_intra_inter_context(xd)][inter_block]++;
2047 // If the segment reference feature is enabled we have only a single
2048 // reference frame allowed for the segment so exclude it from
2049 // the reference frame counts used to work out probabilities.
2050 if (inter_block) {
2051 const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
2052 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2053 counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
2054 [has_second_ref(mi)]++;
2055
2056 if (has_second_ref(mi)) {
2057 const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
2058 const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
2059 const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1];
2060 counts->comp_ref[ctx][bit]++;
2061 } else {
2062 counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
2063 [ref0 != LAST_FRAME]++;
2064 if (ref0 != LAST_FRAME)
2065 counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
2066 [ref0 != GOLDEN_FRAME]++;
2067 }
2068 }
2069 }
2070 if (inter_block &&
2071 !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) {
2072 const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
2073 if (bsize >= BLOCK_8X8) {
2074 const PREDICTION_MODE mode = mi->mode;
2075 ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
2076 } else {
2077 const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
2078 const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
2079 int idx, idy;
2080 for (idy = 0; idy < 2; idy += num_4x4_h) {
2081 for (idx = 0; idx < 2; idx += num_4x4_w) {
2082 const int j = idy * 2 + idx;
2083 const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
2084 ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
2085 }
2086 }
2087 }
2088 }
2089 }
2090 }
2091
restore_context(MACROBLOCK * const x,int mi_row,int mi_col,ENTROPY_CONTEXT a[16* MAX_MB_PLANE],ENTROPY_CONTEXT l[16* MAX_MB_PLANE],PARTITION_CONTEXT sa[8],PARTITION_CONTEXT sl[8],BLOCK_SIZE bsize)2092 static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
2093 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
2094 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
2095 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
2096 BLOCK_SIZE bsize) {
2097 MACROBLOCKD *const xd = &x->e_mbd;
2098 int p;
2099 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
2100 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
2101 int mi_width = num_8x8_blocks_wide_lookup[bsize];
2102 int mi_height = num_8x8_blocks_high_lookup[bsize];
2103 for (p = 0; p < MAX_MB_PLANE; p++) {
2104 memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
2105 a + num_4x4_blocks_wide * p,
2106 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
2107 xd->plane[p].subsampling_x);
2108 memcpy(xd->left_context[p] +
2109 ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
2110 l + num_4x4_blocks_high * p,
2111 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
2112 xd->plane[p].subsampling_y);
2113 }
2114 memcpy(xd->above_seg_context + mi_col, sa,
2115 sizeof(*xd->above_seg_context) * mi_width);
2116 memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
2117 sizeof(xd->left_seg_context[0]) * mi_height);
2118 }
2119
save_context(MACROBLOCK * const x,int mi_row,int mi_col,ENTROPY_CONTEXT a[16* MAX_MB_PLANE],ENTROPY_CONTEXT l[16* MAX_MB_PLANE],PARTITION_CONTEXT sa[8],PARTITION_CONTEXT sl[8],BLOCK_SIZE bsize)2120 static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
2121 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
2122 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
2123 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
2124 BLOCK_SIZE bsize) {
2125 const MACROBLOCKD *const xd = &x->e_mbd;
2126 int p;
2127 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
2128 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
2129 int mi_width = num_8x8_blocks_wide_lookup[bsize];
2130 int mi_height = num_8x8_blocks_high_lookup[bsize];
2131
2132 // buffer the above/left context information of the block in search.
2133 for (p = 0; p < MAX_MB_PLANE; ++p) {
2134 memcpy(a + num_4x4_blocks_wide * p,
2135 xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
2136 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
2137 xd->plane[p].subsampling_x);
2138 memcpy(l + num_4x4_blocks_high * p,
2139 xd->left_context[p] +
2140 ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
2141 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
2142 xd->plane[p].subsampling_y);
2143 }
2144 memcpy(sa, xd->above_seg_context + mi_col,
2145 sizeof(*xd->above_seg_context) * mi_width);
2146 memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
2147 sizeof(xd->left_seg_context[0]) * mi_height);
2148 }
2149
encode_b(VP9_COMP * cpi,const TileInfo * const tile,ThreadData * td,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)2150 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td,
2151 TOKENEXTRA **tp, int mi_row, int mi_col,
2152 int output_enabled, BLOCK_SIZE bsize,
2153 PICK_MODE_CONTEXT *ctx) {
2154 MACROBLOCK *const x = &td->mb;
2155 set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
2156
2157 if (cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ)
2158 x->rdmult = x->cb_rdmult;
2159
2160 update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
2161 encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
2162
2163 if (output_enabled) {
2164 update_stats(&cpi->common, td);
2165
2166 (*tp)->token = EOSB_TOKEN;
2167 (*tp)++;
2168 }
2169 }
2170
encode_sb(VP9_COMP * cpi,ThreadData * td,const TileInfo * const tile,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PC_TREE * pc_tree)2171 static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
2172 TOKENEXTRA **tp, int mi_row, int mi_col,
2173 int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
2174 VP9_COMMON *const cm = &cpi->common;
2175 MACROBLOCK *const x = &td->mb;
2176 MACROBLOCKD *const xd = &x->e_mbd;
2177
2178 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
2179 int ctx;
2180 PARTITION_TYPE partition;
2181 BLOCK_SIZE subsize = bsize;
2182
2183 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
2184
2185 if (bsize >= BLOCK_8X8) {
2186 ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
2187 subsize = get_subsize(bsize, pc_tree->partitioning);
2188 } else {
2189 ctx = 0;
2190 subsize = BLOCK_4X4;
2191 }
2192
2193 partition = partition_lookup[bsl][subsize];
2194 if (output_enabled && bsize != BLOCK_4X4)
2195 td->counts->partition[ctx][partition]++;
2196
2197 switch (partition) {
2198 case PARTITION_NONE:
2199 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
2200 &pc_tree->none);
2201 break;
2202 case PARTITION_VERT:
2203 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
2204 &pc_tree->vertical[0]);
2205 if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
2206 encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled,
2207 subsize, &pc_tree->vertical[1]);
2208 }
2209 break;
2210 case PARTITION_HORZ:
2211 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
2212 &pc_tree->horizontal[0]);
2213 if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
2214 encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled,
2215 subsize, &pc_tree->horizontal[1]);
2216 }
2217 break;
2218 default:
2219 assert(partition == PARTITION_SPLIT);
2220 if (bsize == BLOCK_8X8) {
2221 encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
2222 pc_tree->leaf_split[0]);
2223 } else {
2224 encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2225 pc_tree->split[0]);
2226 encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
2227 subsize, pc_tree->split[1]);
2228 encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
2229 subsize, pc_tree->split[2]);
2230 encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
2231 subsize, pc_tree->split[3]);
2232 }
2233 break;
2234 }
2235
2236 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
2237 update_partition_context(xd, mi_row, mi_col, subsize, bsize);
2238 }
2239
2240 // Check to see if the given partition size is allowed for a specified number
2241 // of 8x8 block rows and columns remaining in the image.
2242 // If not then return the largest allowed partition size
find_partition_size(BLOCK_SIZE bsize,int rows_left,int cols_left,int * bh,int * bw)2243 static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
2244 int cols_left, int *bh, int *bw) {
2245 if (rows_left <= 0 || cols_left <= 0) {
2246 return VPXMIN(bsize, BLOCK_8X8);
2247 } else {
2248 for (; bsize > 0; bsize -= 3) {
2249 *bh = num_8x8_blocks_high_lookup[bsize];
2250 *bw = num_8x8_blocks_wide_lookup[bsize];
2251 if ((*bh <= rows_left) && (*bw <= cols_left)) {
2252 break;
2253 }
2254 }
2255 }
2256 return bsize;
2257 }
2258
set_partial_b64x64_partition(MODE_INFO * mi,int mis,int bh_in,int bw_in,int row8x8_remaining,int col8x8_remaining,BLOCK_SIZE bsize,MODE_INFO ** mi_8x8)2259 static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in,
2260 int bw_in, int row8x8_remaining,
2261 int col8x8_remaining, BLOCK_SIZE bsize,
2262 MODE_INFO **mi_8x8) {
2263 int bh = bh_in;
2264 int r, c;
2265 for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
2266 int bw = bw_in;
2267 for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
2268 const int index = r * mis + c;
2269 mi_8x8[index] = mi + index;
2270 mi_8x8[index]->sb_type = find_partition_size(
2271 bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
2272 }
2273 }
2274 }
2275
2276 // This function attempts to set all mode info entries in a given SB64
2277 // to the same block partition size.
2278 // However, at the bottom and right borders of the image the requested size
2279 // may not be allowed in which case this code attempts to choose the largest
2280 // allowable partition.
set_fixed_partitioning(VP9_COMP * cpi,const TileInfo * const tile,MODE_INFO ** mi_8x8,int mi_row,int mi_col,BLOCK_SIZE bsize)2281 static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
2282 MODE_INFO **mi_8x8, int mi_row, int mi_col,
2283 BLOCK_SIZE bsize) {
2284 VP9_COMMON *const cm = &cpi->common;
2285 const int mis = cm->mi_stride;
2286 const int row8x8_remaining = tile->mi_row_end - mi_row;
2287 const int col8x8_remaining = tile->mi_col_end - mi_col;
2288 int block_row, block_col;
2289 MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
2290 int bh = num_8x8_blocks_high_lookup[bsize];
2291 int bw = num_8x8_blocks_wide_lookup[bsize];
2292
2293 assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
2294
2295 // Apply the requested partition size to the SB64 if it is all "in image"
2296 if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
2297 (row8x8_remaining >= MI_BLOCK_SIZE)) {
2298 for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
2299 for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
2300 int index = block_row * mis + block_col;
2301 mi_8x8[index] = mi_upper_left + index;
2302 mi_8x8[index]->sb_type = bsize;
2303 }
2304 }
2305 } else {
2306 // Else this is a partial SB64.
2307 set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
2308 col8x8_remaining, bsize, mi_8x8);
2309 }
2310 }
2311
2312 static const struct {
2313 int row;
2314 int col;
2315 } coord_lookup[16] = {
2316 // 32x32 index = 0
2317 { 0, 0 },
2318 { 0, 2 },
2319 { 2, 0 },
2320 { 2, 2 },
2321 // 32x32 index = 1
2322 { 0, 4 },
2323 { 0, 6 },
2324 { 2, 4 },
2325 { 2, 6 },
2326 // 32x32 index = 2
2327 { 4, 0 },
2328 { 4, 2 },
2329 { 6, 0 },
2330 { 6, 2 },
2331 // 32x32 index = 3
2332 { 4, 4 },
2333 { 4, 6 },
2334 { 6, 4 },
2335 { 6, 6 },
2336 };
2337
set_source_var_based_partition(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * const x,MODE_INFO ** mi_8x8,int mi_row,int mi_col)2338 static void set_source_var_based_partition(VP9_COMP *cpi,
2339 const TileInfo *const tile,
2340 MACROBLOCK *const x,
2341 MODE_INFO **mi_8x8, int mi_row,
2342 int mi_col) {
2343 VP9_COMMON *const cm = &cpi->common;
2344 const int mis = cm->mi_stride;
2345 const int row8x8_remaining = tile->mi_row_end - mi_row;
2346 const int col8x8_remaining = tile->mi_col_end - mi_col;
2347 MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
2348
2349 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
2350
2351 assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
2352
2353 // In-image SB64
2354 if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
2355 (row8x8_remaining >= MI_BLOCK_SIZE)) {
2356 int i, j;
2357 int index;
2358 diff d32[4];
2359 const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1);
2360 int is_larger_better = 0;
2361 int use32x32 = 0;
2362 unsigned int thr = cpi->source_var_thresh;
2363
2364 memset(d32, 0, 4 * sizeof(diff));
2365
2366 for (i = 0; i < 4; i++) {
2367 diff *d16[4];
2368
2369 for (j = 0; j < 4; j++) {
2370 int b_mi_row = coord_lookup[i * 4 + j].row;
2371 int b_mi_col = coord_lookup[i * 4 + j].col;
2372 int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2;
2373
2374 d16[j] = cpi->source_diff_var + offset + boffset;
2375
2376 index = b_mi_row * mis + b_mi_col;
2377 mi_8x8[index] = mi_upper_left + index;
2378 mi_8x8[index]->sb_type = BLOCK_16X16;
2379
2380 // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
2381 // size to further improve quality.
2382 }
2383
2384 is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) &&
2385 (d16[2]->var < thr) && (d16[3]->var < thr);
2386
2387 // Use 32x32 partition
2388 if (is_larger_better) {
2389 use32x32 += 1;
2390
2391 for (j = 0; j < 4; j++) {
2392 d32[i].sse += d16[j]->sse;
2393 d32[i].sum += d16[j]->sum;
2394 }
2395
2396 d32[i].var =
2397 (unsigned int)(d32[i].sse -
2398 (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >>
2399 10));
2400
2401 index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col;
2402 mi_8x8[index] = mi_upper_left + index;
2403 mi_8x8[index]->sb_type = BLOCK_32X32;
2404 }
2405 }
2406
2407 if (use32x32 == 4) {
2408 thr <<= 1;
2409 is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) &&
2410 (d32[2].var < thr) && (d32[3].var < thr);
2411
2412 // Use 64x64 partition
2413 if (is_larger_better) {
2414 mi_8x8[0] = mi_upper_left;
2415 mi_8x8[0]->sb_type = BLOCK_64X64;
2416 }
2417 }
2418 } else { // partial in-image SB64
2419 int bh = num_8x8_blocks_high_lookup[BLOCK_16X16];
2420 int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16];
2421 set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
2422 col8x8_remaining, BLOCK_16X16, mi_8x8);
2423 }
2424 }
2425
update_state_rt(VP9_COMP * cpi,ThreadData * td,PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,int bsize)2426 static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
2427 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
2428 int bsize) {
2429 VP9_COMMON *const cm = &cpi->common;
2430 MACROBLOCK *const x = &td->mb;
2431 MACROBLOCKD *const xd = &x->e_mbd;
2432 MODE_INFO *const mi = xd->mi[0];
2433 struct macroblock_plane *const p = x->plane;
2434 const struct segmentation *const seg = &cm->seg;
2435 const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
2436 const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
2437 const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
2438 const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
2439
2440 *(xd->mi[0]) = ctx->mic;
2441 *(x->mbmi_ext) = ctx->mbmi_ext;
2442
2443 if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) {
2444 // For in frame complexity AQ or variance AQ, copy segment_id from
2445 // segmentation_map.
2446 if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) {
2447 const uint8_t *const map =
2448 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
2449 mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
2450 } else {
2451 // Setting segmentation map for cyclic_refresh.
2452 vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize,
2453 ctx->rate, ctx->dist, x->skip, p);
2454 }
2455 vp9_init_plane_quantizers(cpi, x);
2456 }
2457
2458 if (is_inter_block(mi)) {
2459 vp9_update_mv_count(td);
2460 if (cm->interp_filter == SWITCHABLE) {
2461 const int pred_ctx = get_pred_context_switchable_interp(xd);
2462 ++td->counts->switchable_interp[pred_ctx][mi->interp_filter];
2463 }
2464
2465 if (mi->sb_type < BLOCK_8X8) {
2466 mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
2467 mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
2468 }
2469 }
2470
2471 if (cm->use_prev_frame_mvs || !cm->error_resilient_mode ||
2472 (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 &&
2473 cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) {
2474 MV_REF *const frame_mvs =
2475 cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
2476 int w, h;
2477
2478 for (h = 0; h < y_mis; ++h) {
2479 MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
2480 for (w = 0; w < x_mis; ++w) {
2481 MV_REF *const mv = frame_mv + w;
2482 mv->ref_frame[0] = mi->ref_frame[0];
2483 mv->ref_frame[1] = mi->ref_frame[1];
2484 mv->mv[0].as_int = mi->mv[0].as_int;
2485 mv->mv[1].as_int = mi->mv[1].as_int;
2486 }
2487 }
2488 }
2489
2490 x->skip = ctx->skip;
2491 x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0];
2492 }
2493
encode_b_rt(VP9_COMP * cpi,ThreadData * td,const TileInfo * const tile,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)2494 static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
2495 const TileInfo *const tile, TOKENEXTRA **tp, int mi_row,
2496 int mi_col, int output_enabled, BLOCK_SIZE bsize,
2497 PICK_MODE_CONTEXT *ctx) {
2498 MACROBLOCK *const x = &td->mb;
2499 set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
2500 update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
2501
2502 encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
2503 update_stats(&cpi->common, td);
2504
2505 (*tp)->token = EOSB_TOKEN;
2506 (*tp)++;
2507 }
2508
encode_sb_rt(VP9_COMP * cpi,ThreadData * td,const TileInfo * const tile,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PC_TREE * pc_tree)2509 static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
2510 const TileInfo *const tile, TOKENEXTRA **tp,
2511 int mi_row, int mi_col, int output_enabled,
2512 BLOCK_SIZE bsize, PC_TREE *pc_tree) {
2513 VP9_COMMON *const cm = &cpi->common;
2514 MACROBLOCK *const x = &td->mb;
2515 MACROBLOCKD *const xd = &x->e_mbd;
2516
2517 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
2518 int ctx;
2519 PARTITION_TYPE partition;
2520 BLOCK_SIZE subsize;
2521
2522 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
2523
2524 if (bsize >= BLOCK_8X8) {
2525 const int idx_str = xd->mi_stride * mi_row + mi_col;
2526 MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
2527 ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
2528 subsize = mi_8x8[0]->sb_type;
2529 } else {
2530 ctx = 0;
2531 subsize = BLOCK_4X4;
2532 }
2533
2534 partition = partition_lookup[bsl][subsize];
2535 if (output_enabled && bsize != BLOCK_4X4)
2536 td->counts->partition[ctx][partition]++;
2537
2538 switch (partition) {
2539 case PARTITION_NONE:
2540 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2541 &pc_tree->none);
2542 break;
2543 case PARTITION_VERT:
2544 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2545 &pc_tree->vertical[0]);
2546 if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
2547 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
2548 subsize, &pc_tree->vertical[1]);
2549 }
2550 break;
2551 case PARTITION_HORZ:
2552 encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2553 &pc_tree->horizontal[0]);
2554 if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
2555 encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
2556 subsize, &pc_tree->horizontal[1]);
2557 }
2558 break;
2559 default:
2560 assert(partition == PARTITION_SPLIT);
2561 subsize = get_subsize(bsize, PARTITION_SPLIT);
2562 encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2563 pc_tree->split[0]);
2564 encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
2565 subsize, pc_tree->split[1]);
2566 encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
2567 subsize, pc_tree->split[2]);
2568 encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs,
2569 output_enabled, subsize, pc_tree->split[3]);
2570 break;
2571 }
2572
2573 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
2574 update_partition_context(xd, mi_row, mi_col, subsize, bsize);
2575 }
2576
rd_use_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MODE_INFO ** mi_8x8,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate,int64_t * dist,int do_recon,PC_TREE * pc_tree)2577 static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
2578 TileDataEnc *tile_data, MODE_INFO **mi_8x8,
2579 TOKENEXTRA **tp, int mi_row, int mi_col,
2580 BLOCK_SIZE bsize, int *rate, int64_t *dist,
2581 int do_recon, PC_TREE *pc_tree) {
2582 VP9_COMMON *const cm = &cpi->common;
2583 TileInfo *const tile_info = &tile_data->tile_info;
2584 MACROBLOCK *const x = &td->mb;
2585 MACROBLOCKD *const xd = &x->e_mbd;
2586 const int mis = cm->mi_stride;
2587 const int bsl = b_width_log2_lookup[bsize];
2588 const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
2589 const int bss = (1 << bsl) / 4;
2590 int i, pl;
2591 PARTITION_TYPE partition = PARTITION_NONE;
2592 BLOCK_SIZE subsize;
2593 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2594 PARTITION_CONTEXT sl[8], sa[8];
2595 RD_COST last_part_rdc, none_rdc, chosen_rdc;
2596 BLOCK_SIZE sub_subsize = BLOCK_4X4;
2597 int splits_below = 0;
2598 BLOCK_SIZE bs_type = mi_8x8[0]->sb_type;
2599 int do_partition_search = 1;
2600 PICK_MODE_CONTEXT *ctx = &pc_tree->none;
2601
2602 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
2603
2604 assert(num_4x4_blocks_wide_lookup[bsize] ==
2605 num_4x4_blocks_high_lookup[bsize]);
2606
2607 vp9_rd_cost_reset(&last_part_rdc);
2608 vp9_rd_cost_reset(&none_rdc);
2609 vp9_rd_cost_reset(&chosen_rdc);
2610
2611 partition = partition_lookup[bsl][bs_type];
2612 subsize = get_subsize(bsize, partition);
2613
2614 pc_tree->partitioning = partition;
2615 save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2616
2617 if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) {
2618 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2619 x->mb_energy = vp9_block_energy(cpi, x, bsize);
2620 }
2621
2622 if (do_partition_search &&
2623 cpi->sf.partition_search_type == SEARCH_PARTITION &&
2624 cpi->sf.adjust_partitioning_from_last_frame) {
2625 // Check if any of the sub blocks are further split.
2626 if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
2627 sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
2628 splits_below = 1;
2629 for (i = 0; i < 4; i++) {
2630 int jj = i >> 1, ii = i & 0x01;
2631 MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss];
2632 if (this_mi && this_mi->sb_type >= sub_subsize) {
2633 splits_below = 0;
2634 }
2635 }
2636 }
2637
2638 // If partition is not none try none unless each of the 4 splits are split
2639 // even further..
2640 if (partition != PARTITION_NONE && !splits_below &&
2641 mi_row + (mi_step >> 1) < cm->mi_rows &&
2642 mi_col + (mi_step >> 1) < cm->mi_cols) {
2643 pc_tree->partitioning = PARTITION_NONE;
2644 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx,
2645 INT64_MAX);
2646
2647 pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2648
2649 if (none_rdc.rate < INT_MAX) {
2650 none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
2651 none_rdc.rdcost =
2652 RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist);
2653 }
2654
2655 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2656 mi_8x8[0]->sb_type = bs_type;
2657 pc_tree->partitioning = partition;
2658 }
2659 }
2660
2661 switch (partition) {
2662 case PARTITION_NONE:
2663 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize,
2664 ctx, INT64_MAX);
2665 break;
2666 case PARTITION_HORZ:
2667 pc_tree->horizontal[0].skip_ref_frame_mask = 0;
2668 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2669 subsize, &pc_tree->horizontal[0], INT64_MAX);
2670 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2671 mi_row + (mi_step >> 1) < cm->mi_rows) {
2672 RD_COST tmp_rdc;
2673 PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
2674 vp9_rd_cost_init(&tmp_rdc);
2675 update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
2676 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
2677 pc_tree->horizontal[1].skip_ref_frame_mask = 0;
2678 rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
2679 &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX);
2680 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2681 vp9_rd_cost_reset(&last_part_rdc);
2682 break;
2683 }
2684 last_part_rdc.rate += tmp_rdc.rate;
2685 last_part_rdc.dist += tmp_rdc.dist;
2686 last_part_rdc.rdcost += tmp_rdc.rdcost;
2687 }
2688 break;
2689 case PARTITION_VERT:
2690 pc_tree->vertical[0].skip_ref_frame_mask = 0;
2691 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2692 subsize, &pc_tree->vertical[0], INT64_MAX);
2693 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2694 mi_col + (mi_step >> 1) < cm->mi_cols) {
2695 RD_COST tmp_rdc;
2696 PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
2697 vp9_rd_cost_init(&tmp_rdc);
2698 update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
2699 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
2700 pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0;
2701 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1),
2702 &tmp_rdc, subsize,
2703 &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
2704 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2705 vp9_rd_cost_reset(&last_part_rdc);
2706 break;
2707 }
2708 last_part_rdc.rate += tmp_rdc.rate;
2709 last_part_rdc.dist += tmp_rdc.dist;
2710 last_part_rdc.rdcost += tmp_rdc.rdcost;
2711 }
2712 break;
2713 default:
2714 assert(partition == PARTITION_SPLIT);
2715 if (bsize == BLOCK_8X8) {
2716 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2717 subsize, pc_tree->leaf_split[0], INT64_MAX);
2718 break;
2719 }
2720 last_part_rdc.rate = 0;
2721 last_part_rdc.dist = 0;
2722 last_part_rdc.rdcost = 0;
2723 for (i = 0; i < 4; i++) {
2724 int x_idx = (i & 1) * (mi_step >> 1);
2725 int y_idx = (i >> 1) * (mi_step >> 1);
2726 int jj = i >> 1, ii = i & 0x01;
2727 RD_COST tmp_rdc;
2728 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
2729 continue;
2730
2731 vp9_rd_cost_init(&tmp_rdc);
2732 rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss,
2733 tp, mi_row + y_idx, mi_col + x_idx, subsize,
2734 &tmp_rdc.rate, &tmp_rdc.dist, i != 3,
2735 pc_tree->split[i]);
2736 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2737 vp9_rd_cost_reset(&last_part_rdc);
2738 break;
2739 }
2740 last_part_rdc.rate += tmp_rdc.rate;
2741 last_part_rdc.dist += tmp_rdc.dist;
2742 }
2743 break;
2744 }
2745
2746 pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2747 if (last_part_rdc.rate < INT_MAX) {
2748 last_part_rdc.rate += cpi->partition_cost[pl][partition];
2749 last_part_rdc.rdcost =
2750 RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist);
2751 }
2752
2753 if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
2754 cpi->sf.partition_search_type == SEARCH_PARTITION &&
2755 partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
2756 (mi_row + mi_step < cm->mi_rows ||
2757 mi_row + (mi_step >> 1) == cm->mi_rows) &&
2758 (mi_col + mi_step < cm->mi_cols ||
2759 mi_col + (mi_step >> 1) == cm->mi_cols)) {
2760 BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
2761 chosen_rdc.rate = 0;
2762 chosen_rdc.dist = 0;
2763 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2764 pc_tree->partitioning = PARTITION_SPLIT;
2765
2766 // Split partition.
2767 for (i = 0; i < 4; i++) {
2768 int x_idx = (i & 1) * (mi_step >> 1);
2769 int y_idx = (i >> 1) * (mi_step >> 1);
2770 RD_COST tmp_rdc;
2771 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2772 PARTITION_CONTEXT sl[8], sa[8];
2773
2774 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
2775 continue;
2776
2777 save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2778 pc_tree->split[i]->partitioning = PARTITION_NONE;
2779 rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2780 &tmp_rdc, split_subsize, &pc_tree->split[i]->none,
2781 INT64_MAX);
2782
2783 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2784
2785 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2786 vp9_rd_cost_reset(&chosen_rdc);
2787 break;
2788 }
2789
2790 chosen_rdc.rate += tmp_rdc.rate;
2791 chosen_rdc.dist += tmp_rdc.dist;
2792
2793 if (i != 3)
2794 encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0,
2795 split_subsize, pc_tree->split[i]);
2796
2797 pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
2798 split_subsize);
2799 chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
2800 }
2801 pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2802 if (chosen_rdc.rate < INT_MAX) {
2803 chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
2804 chosen_rdc.rdcost =
2805 RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist);
2806 }
2807 }
2808
2809 // If last_part is better set the partitioning to that.
2810 if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
2811 mi_8x8[0]->sb_type = bsize;
2812 if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
2813 chosen_rdc = last_part_rdc;
2814 }
2815 // If none was better set the partitioning to that.
2816 if (none_rdc.rdcost < chosen_rdc.rdcost) {
2817 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
2818 chosen_rdc = none_rdc;
2819 }
2820
2821 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2822
2823 // We must have chosen a partitioning and encoding or we'll fail later on.
2824 // No other opportunities for success.
2825 if (bsize == BLOCK_64X64)
2826 assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
2827
2828 if (do_recon) {
2829 int output_enabled = (bsize == BLOCK_64X64);
2830 encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
2831 pc_tree);
2832 }
2833
2834 *rate = chosen_rdc.rate;
2835 *dist = chosen_rdc.dist;
2836 }
2837
2838 static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
2839 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
2840 BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16,
2841 BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
2842 };
2843
2844 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
2845 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
2846 BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
2847 BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
2848 };
2849
2850 // Look at all the mode_info entries for blocks that are part of this
2851 // partition and find the min and max values for sb_type.
2852 // At the moment this is designed to work on a 64x64 SB but could be
2853 // adjusted to use a size parameter.
2854 //
2855 // The min and max are assumed to have been initialized prior to calling this
2856 // function so repeat calls can accumulate a min and max of more than one sb64.
get_sb_partition_size_range(MACROBLOCKD * xd,MODE_INFO ** mi_8x8,BLOCK_SIZE * min_block_size,BLOCK_SIZE * max_block_size,int bs_hist[BLOCK_SIZES])2857 static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
2858 BLOCK_SIZE *min_block_size,
2859 BLOCK_SIZE *max_block_size,
2860 int bs_hist[BLOCK_SIZES]) {
2861 int sb_width_in_blocks = MI_BLOCK_SIZE;
2862 int sb_height_in_blocks = MI_BLOCK_SIZE;
2863 int i, j;
2864 int index = 0;
2865
2866 // Check the sb_type for each block that belongs to this region.
2867 for (i = 0; i < sb_height_in_blocks; ++i) {
2868 for (j = 0; j < sb_width_in_blocks; ++j) {
2869 MODE_INFO *mi = mi_8x8[index + j];
2870 BLOCK_SIZE sb_type = mi ? mi->sb_type : 0;
2871 bs_hist[sb_type]++;
2872 *min_block_size = VPXMIN(*min_block_size, sb_type);
2873 *max_block_size = VPXMAX(*max_block_size, sb_type);
2874 }
2875 index += xd->mi_stride;
2876 }
2877 }
2878
2879 // Next square block size less or equal than current block size.
2880 static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
2881 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
2882 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
2883 BLOCK_32X32, BLOCK_32X32, BLOCK_64X64
2884 };
2885
2886 // Look at neighboring blocks and set a min and max partition size based on
2887 // what they chose.
rd_auto_partition_range(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCKD * const xd,int mi_row,int mi_col,BLOCK_SIZE * min_block_size,BLOCK_SIZE * max_block_size)2888 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
2889 MACROBLOCKD *const xd, int mi_row,
2890 int mi_col, BLOCK_SIZE *min_block_size,
2891 BLOCK_SIZE *max_block_size) {
2892 VP9_COMMON *const cm = &cpi->common;
2893 MODE_INFO **mi = xd->mi;
2894 const int left_in_image = !!xd->left_mi;
2895 const int above_in_image = !!xd->above_mi;
2896 const int row8x8_remaining = tile->mi_row_end - mi_row;
2897 const int col8x8_remaining = tile->mi_col_end - mi_col;
2898 int bh, bw;
2899 BLOCK_SIZE min_size = BLOCK_4X4;
2900 BLOCK_SIZE max_size = BLOCK_64X64;
2901 int bs_hist[BLOCK_SIZES] = { 0 };
2902
2903 // Trap case where we do not have a prediction.
2904 if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
2905 // Default "min to max" and "max to min"
2906 min_size = BLOCK_64X64;
2907 max_size = BLOCK_4X4;
2908
2909 // NOTE: each call to get_sb_partition_size_range() uses the previous
2910 // passed in values for min and max as a starting point.
2911 // Find the min and max partition used in previous frame at this location
2912 if (cm->frame_type != KEY_FRAME) {
2913 MODE_INFO **prev_mi =
2914 &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
2915 get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist);
2916 }
2917 // Find the min and max partition sizes used in the left SB64
2918 if (left_in_image) {
2919 MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
2920 get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size,
2921 bs_hist);
2922 }
2923 // Find the min and max partition sizes used in the above SB64.
2924 if (above_in_image) {
2925 MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
2926 get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size,
2927 bs_hist);
2928 }
2929
2930 // Adjust observed min and max for "relaxed" auto partition case.
2931 if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
2932 min_size = min_partition_size[min_size];
2933 max_size = max_partition_size[max_size];
2934 }
2935 }
2936
2937 // Check border cases where max and min from neighbors may not be legal.
2938 max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
2939 &bh, &bw);
2940 // Test for blocks at the edge of the active image.
2941 // This may be the actual edge of the image or where there are formatting
2942 // bars.
2943 if (vp9_active_edge_sb(cpi, mi_row, mi_col)) {
2944 min_size = BLOCK_4X4;
2945 } else {
2946 min_size =
2947 VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size));
2948 }
2949
2950 // When use_square_partition_only is true, make sure at least one square
2951 // partition is allowed by selecting the next smaller square size as
2952 // *min_block_size.
2953 if (cpi->sf.use_square_partition_only &&
2954 next_square_size[max_size] < min_size) {
2955 min_size = next_square_size[max_size];
2956 }
2957
2958 *min_block_size = min_size;
2959 *max_block_size = max_size;
2960 }
2961
2962 // TODO(jingning) refactor functions setting partition search range
set_partition_range(VP9_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col,BLOCK_SIZE bsize,BLOCK_SIZE * min_bs,BLOCK_SIZE * max_bs)2963 static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row,
2964 int mi_col, BLOCK_SIZE bsize,
2965 BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
2966 int mi_width = num_8x8_blocks_wide_lookup[bsize];
2967 int mi_height = num_8x8_blocks_high_lookup[bsize];
2968 int idx, idy;
2969
2970 MODE_INFO *mi;
2971 const int idx_str = cm->mi_stride * mi_row + mi_col;
2972 MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
2973 BLOCK_SIZE bs, min_size, max_size;
2974
2975 min_size = BLOCK_64X64;
2976 max_size = BLOCK_4X4;
2977
2978 if (prev_mi) {
2979 for (idy = 0; idy < mi_height; ++idy) {
2980 for (idx = 0; idx < mi_width; ++idx) {
2981 mi = prev_mi[idy * cm->mi_stride + idx];
2982 bs = mi ? mi->sb_type : bsize;
2983 min_size = VPXMIN(min_size, bs);
2984 max_size = VPXMAX(max_size, bs);
2985 }
2986 }
2987 }
2988
2989 if (xd->left_mi) {
2990 for (idy = 0; idy < mi_height; ++idy) {
2991 mi = xd->mi[idy * cm->mi_stride - 1];
2992 bs = mi ? mi->sb_type : bsize;
2993 min_size = VPXMIN(min_size, bs);
2994 max_size = VPXMAX(max_size, bs);
2995 }
2996 }
2997
2998 if (xd->above_mi) {
2999 for (idx = 0; idx < mi_width; ++idx) {
3000 mi = xd->mi[idx - cm->mi_stride];
3001 bs = mi ? mi->sb_type : bsize;
3002 min_size = VPXMIN(min_size, bs);
3003 max_size = VPXMAX(max_size, bs);
3004 }
3005 }
3006
3007 if (min_size == max_size) {
3008 min_size = min_partition_size[min_size];
3009 max_size = max_partition_size[max_size];
3010 }
3011
3012 *min_bs = min_size;
3013 *max_bs = max_size;
3014 }
3015
store_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)3016 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
3017 memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
3018 }
3019
load_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)3020 static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
3021 memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
3022 }
3023
3024 #if CONFIG_FP_MB_STATS
3025 const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
3026 1, 2, 2, 2, 4, 4 };
3027 const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
3028 2, 1, 2, 4, 2, 4 };
3029 const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
3030 0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120
3031 };
3032 const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
3033 0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120
3034 };
3035 const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
3036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6
3037 };
3038
3039 typedef enum {
3040 MV_ZERO = 0,
3041 MV_LEFT = 1,
3042 MV_UP = 2,
3043 MV_RIGHT = 3,
3044 MV_DOWN = 4,
3045 MV_INVALID
3046 } MOTION_DIRECTION;
3047
get_motion_direction_fp(uint8_t fp_byte)3048 static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
3049 if (fp_byte & FPMB_MOTION_ZERO_MASK) {
3050 return MV_ZERO;
3051 } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
3052 return MV_LEFT;
3053 } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
3054 return MV_RIGHT;
3055 } else if (fp_byte & FPMB_MOTION_UP_MASK) {
3056 return MV_UP;
3057 } else {
3058 return MV_DOWN;
3059 }
3060 }
3061
get_motion_inconsistency(MOTION_DIRECTION this_mv,MOTION_DIRECTION that_mv)3062 static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
3063 MOTION_DIRECTION that_mv) {
3064 if (this_mv == that_mv) {
3065 return 0;
3066 } else {
3067 return abs(this_mv - that_mv) == 2 ? 2 : 1;
3068 }
3069 }
3070 #endif
3071
3072 // Calculate prediction based on the given input features and neural net config.
3073 // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
3074 // layer.
nn_predict(const float * features,const NN_CONFIG * nn_config,float * output)3075 static void nn_predict(const float *features, const NN_CONFIG *nn_config,
3076 float *output) {
3077 int num_input_nodes = nn_config->num_inputs;
3078 int buf_index = 0;
3079 float buf[2][NN_MAX_NODES_PER_LAYER];
3080 const float *input_nodes = features;
3081
3082 // Propagate hidden layers.
3083 const int num_layers = nn_config->num_hidden_layers;
3084 int layer, node, i;
3085 assert(num_layers <= NN_MAX_HIDDEN_LAYERS);
3086 for (layer = 0; layer < num_layers; ++layer) {
3087 const float *weights = nn_config->weights[layer];
3088 const float *bias = nn_config->bias[layer];
3089 float *output_nodes = buf[buf_index];
3090 const int num_output_nodes = nn_config->num_hidden_nodes[layer];
3091 assert(num_output_nodes < NN_MAX_NODES_PER_LAYER);
3092 for (node = 0; node < num_output_nodes; ++node) {
3093 float val = 0.0f;
3094 for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
3095 val += bias[node];
3096 // ReLU as activation function.
3097 val = VPXMAX(val, 0.0f);
3098 output_nodes[node] = val;
3099 weights += num_input_nodes;
3100 }
3101 num_input_nodes = num_output_nodes;
3102 input_nodes = output_nodes;
3103 buf_index = 1 - buf_index;
3104 }
3105
3106 // Final output layer.
3107 {
3108 const float *weights = nn_config->weights[num_layers];
3109 for (node = 0; node < nn_config->num_outputs; ++node) {
3110 const float *bias = nn_config->bias[num_layers];
3111 float val = 0.0f;
3112 for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
3113 output[node] = val + bias[node];
3114 weights += num_input_nodes;
3115 }
3116 }
3117 }
3118
3119 #define FEATURES 7
3120 // Machine-learning based partition search early termination.
3121 // Return 1 to skip split and rect partitions.
ml_pruning_partition(VP9_COMMON * const cm,MACROBLOCKD * const xd,PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize)3122 static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
3123 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
3124 BLOCK_SIZE bsize) {
3125 const int mag_mv =
3126 abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row);
3127 const int left_in_image = !!xd->left_mi;
3128 const int above_in_image = !!xd->above_mi;
3129 MODE_INFO **prev_mi =
3130 &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row];
3131 int above_par = 0; // above_partitioning
3132 int left_par = 0; // left_partitioning
3133 int last_par = 0; // last_partitioning
3134 int offset = 0;
3135 int i;
3136 BLOCK_SIZE context_size;
3137 const NN_CONFIG *nn_config = NULL;
3138 const float *mean, *sd, *linear_weights;
3139 float nn_score, linear_score;
3140 float features[FEATURES];
3141
3142 assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
3143 vpx_clear_system_state();
3144
3145 switch (bsize) {
3146 case BLOCK_64X64:
3147 offset = 0;
3148 nn_config = &vp9_partition_nnconfig_64x64;
3149 break;
3150 case BLOCK_32X32:
3151 offset = 8;
3152 nn_config = &vp9_partition_nnconfig_32x32;
3153 break;
3154 case BLOCK_16X16:
3155 offset = 16;
3156 nn_config = &vp9_partition_nnconfig_16x16;
3157 break;
3158 default: assert(0 && "Unexpected block size."); return 0;
3159 }
3160
3161 if (above_in_image) {
3162 context_size = xd->above_mi->sb_type;
3163 if (context_size < bsize)
3164 above_par = 2;
3165 else if (context_size == bsize)
3166 above_par = 1;
3167 }
3168
3169 if (left_in_image) {
3170 context_size = xd->left_mi->sb_type;
3171 if (context_size < bsize)
3172 left_par = 2;
3173 else if (context_size == bsize)
3174 left_par = 1;
3175 }
3176
3177 if (prev_mi) {
3178 context_size = prev_mi[0]->sb_type;
3179 if (context_size < bsize)
3180 last_par = 2;
3181 else if (context_size == bsize)
3182 last_par = 1;
3183 }
3184
3185 mean = &vp9_partition_feature_mean[offset];
3186 sd = &vp9_partition_feature_std[offset];
3187 features[0] = ((float)ctx->rate - mean[0]) / sd[0];
3188 features[1] = ((float)ctx->dist - mean[1]) / sd[1];
3189 features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2];
3190 features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3];
3191 features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4];
3192 features[5] = ((float)cm->base_qindex - mean[5]) * sd[5];
3193 features[6] = ((float)last_par - mean[6]) * sd[6];
3194
3195 // Predict using linear model.
3196 linear_weights = &vp9_partition_linear_weights[offset];
3197 linear_score = linear_weights[FEATURES];
3198 for (i = 0; i < FEATURES; ++i)
3199 linear_score += linear_weights[i] * features[i];
3200 if (linear_score > 0.1f) return 0;
3201
3202 // Predict using neural net model.
3203 nn_predict(features, nn_config, &nn_score);
3204
3205 if (linear_score < -0.0f && nn_score < 0.1f) return 1;
3206 if (nn_score < -0.0f && linear_score < 0.1f) return 1;
3207 return 0;
3208 }
3209 #undef FEATURES
3210
3211 #define FEATURES 4
3212 // ML-based partition search breakout.
ml_predict_breakout(VP9_COMP * const cpi,BLOCK_SIZE bsize,const MACROBLOCK * const x,const RD_COST * const rd_cost)3213 static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize,
3214 const MACROBLOCK *const x,
3215 const RD_COST *const rd_cost) {
3216 DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
3217 const VP9_COMMON *const cm = &cpi->common;
3218 float features[FEATURES];
3219 const float *linear_weights = NULL; // Linear model weights.
3220 float linear_score = 0.0f;
3221 const int qindex = cm->base_qindex;
3222 const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2);
3223 const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720;
3224 const int resolution_ctx = is_720p_or_larger ? 1 : 0;
3225
3226 switch (bsize) {
3227 case BLOCK_64X64:
3228 linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx];
3229 break;
3230 case BLOCK_32X32:
3231 linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx];
3232 break;
3233 case BLOCK_16X16:
3234 linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx];
3235 break;
3236 case BLOCK_8X8:
3237 linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx];
3238 break;
3239 default: assert(0 && "Unexpected block size."); return 0;
3240 }
3241 if (!linear_weights) return 0;
3242
3243 { // Generate feature values.
3244 #if CONFIG_VP9_HIGHBITDEPTH
3245 const int ac_q =
3246 vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
3247 #else
3248 const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth);
3249 #endif // CONFIG_VP9_HIGHBITDEPTH
3250 const int num_pels_log2 = num_pels_log2_lookup[bsize];
3251 int feature_index = 0;
3252 unsigned int var, sse;
3253 float rate_f, dist_f;
3254
3255 #if CONFIG_VP9_HIGHBITDEPTH
3256 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3257 var =
3258 vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd);
3259 } else {
3260 var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
3261 vp9_64_zeros, 0, &sse);
3262 }
3263 #else
3264 var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
3265 vp9_64_zeros, 0, &sse);
3266 #endif
3267 var = var >> num_pels_log2;
3268
3269 vpx_clear_system_state();
3270
3271 rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX);
3272 dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2);
3273 rate_f =
3274 ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
3275 rate_f;
3276
3277 features[feature_index++] = rate_f;
3278 features[feature_index++] = dist_f;
3279 features[feature_index++] = (float)var;
3280 features[feature_index++] = (float)ac_q;
3281 assert(feature_index == FEATURES);
3282 }
3283
3284 { // Calculate the output score.
3285 int i;
3286 linear_score = linear_weights[FEATURES];
3287 for (i = 0; i < FEATURES; ++i)
3288 linear_score += linear_weights[i] * features[i];
3289 }
3290
3291 return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx];
3292 }
3293 #undef FEATURES
3294
3295 #define FEATURES 17
3296 #define LABELS 4
ml_prune_rect_partition(VP9_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const PC_TREE * const pc_tree,int * allow_horz,int * allow_vert,int64_t ref_rd,int mi_row,int mi_col)3297 static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x,
3298 BLOCK_SIZE bsize,
3299 const PC_TREE *const pc_tree,
3300 int *allow_horz, int *allow_vert,
3301 int64_t ref_rd, int mi_row, int mi_col) {
3302 const NN_CONFIG *nn_config = NULL;
3303 float score[LABELS] = {
3304 0.0f,
3305 };
3306 int thresh = -1;
3307 int i;
3308
3309 if (ref_rd <= 0 || ref_rd > 1000000000) return;
3310
3311 switch (bsize) {
3312 case BLOCK_8X8: break;
3313 case BLOCK_16X16:
3314 nn_config = &vp9_rect_part_nnconfig_16;
3315 thresh = cpi->sf.ml_prune_rect_partition_threhold[1];
3316 break;
3317 case BLOCK_32X32:
3318 nn_config = &vp9_rect_part_nnconfig_32;
3319 thresh = cpi->sf.ml_prune_rect_partition_threhold[2];
3320 break;
3321 case BLOCK_64X64:
3322 nn_config = &vp9_rect_part_nnconfig_64;
3323 thresh = cpi->sf.ml_prune_rect_partition_threhold[3];
3324 break;
3325 default: assert(0 && "Unexpected block size."); return;
3326 }
3327 if (!nn_config || thresh < 0) return;
3328
3329 // Feature extraction and model score calculation.
3330 {
3331 const int64_t none_rdcost = pc_tree->none.rdcost;
3332 const VP9_COMMON *const cm = &cpi->common;
3333 #if CONFIG_VP9_HIGHBITDEPTH
3334 const int dc_q =
3335 vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
3336 #else
3337 const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
3338 #endif // CONFIG_VP9_HIGHBITDEPTH
3339 int feature_index = 0;
3340 unsigned int block_var = 0;
3341 unsigned int sub_block_var[4] = { 0 };
3342 float features[FEATURES];
3343
3344 features[feature_index++] =
3345 (float)(pc_tree->partitioning == PARTITION_NONE);
3346 features[feature_index++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
3347
3348 // Calculate source pixel variance.
3349 {
3350 struct buf_2d buf;
3351 const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
3352 const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
3353 const MACROBLOCKD *const xd = &x->e_mbd;
3354 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
3355
3356 (void)xd;
3357 #if CONFIG_VP9_HIGHBITDEPTH
3358 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3359 block_var = vp9_high_get_sby_perpixel_variance(cpi, &x->plane[0].src,
3360 bsize, xd->bd);
3361 } else {
3362 block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
3363 }
3364 #else
3365 block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
3366 #endif // CONFIG_VP9_HIGHBITDEPTH
3367
3368 buf.stride = x->plane[0].src.stride;
3369 for (i = 0; i < 4; ++i) {
3370 const int x_idx = (i & 1) * bs / 2;
3371 const int y_idx = (i >> 1) * bs / 2;
3372 buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
3373 #if CONFIG_VP9_HIGHBITDEPTH
3374 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3375 sub_block_var[i] =
3376 vp9_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
3377 } else {
3378 sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize);
3379 }
3380 #else
3381 sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize);
3382 #endif // CONFIG_VP9_HIGHBITDEPTH
3383 }
3384 }
3385
3386 features[feature_index++] = logf((float)block_var + 1.0f);
3387 features[feature_index++] = logf((float)ref_rd + 1.0f);
3388 features[feature_index++] = (none_rdcost > 0 && none_rdcost < 1000000000)
3389 ? (float)pc_tree->none.skippable
3390 : 0.0f;
3391
3392 for (i = 0; i < 4; ++i) {
3393 const int64_t this_rd = pc_tree->split[i]->none.rdcost;
3394 const int rd_valid = this_rd > 0 && this_rd < 1000000000;
3395 // Ratio between sub-block RD and whole block RD.
3396 features[feature_index++] =
3397 rd_valid ? ((float)this_rd / (float)ref_rd) : 1.0f;
3398 // Sub-block skippable.
3399 features[feature_index++] =
3400 rd_valid ? ((float)pc_tree->split[i]->none.skippable) : 0.0f;
3401 }
3402
3403 {
3404 const float denom = (float)(block_var + 1);
3405 const float low_b = 0.1f;
3406 const float high_b = 10.0f;
3407 for (i = 0; i < 4; ++i) {
3408 // Ratio between the quarter sub-block variance and the
3409 // whole-block variance.
3410 float var_ratio = (float)(sub_block_var[i] + 1) / denom;
3411 if (var_ratio < low_b) var_ratio = low_b;
3412 if (var_ratio > high_b) var_ratio = high_b;
3413 features[feature_index++] = var_ratio;
3414 }
3415 }
3416 assert(feature_index == FEATURES);
3417 nn_predict(features, nn_config, score);
3418 }
3419
3420 // Make decisions based on the model score.
3421 {
3422 int max_score = -1000;
3423 int horz = 0, vert = 0;
3424 int int_score[LABELS];
3425 for (i = 0; i < LABELS; ++i) {
3426 int_score[i] = (int)(100 * score[i]);
3427 max_score = VPXMAX(int_score[i], max_score);
3428 }
3429 thresh = max_score - thresh;
3430 for (i = 0; i < LABELS; ++i) {
3431 if (int_score[i] >= thresh) {
3432 if ((i >> 0) & 1) horz = 1;
3433 if ((i >> 1) & 1) vert = 1;
3434 }
3435 }
3436 *allow_horz = *allow_horz && horz;
3437 *allow_vert = *allow_vert && vert;
3438 }
3439 }
3440 #undef FEATURES
3441 #undef LABELS
3442
3443 // Use a neural net model to prune partition-none and partition-split search.
3444 // The model uses prediction residue variance and quantization step size as
3445 // input features.
3446 #define FEATURES 6
ml_predict_var_rd_paritioning(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int * none,int * split)3447 static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
3448 BLOCK_SIZE bsize, int mi_row,
3449 int mi_col, int *none, int *split) {
3450 VP9_COMMON *const cm = &cpi->common;
3451 MACROBLOCKD *xd = &x->e_mbd;
3452 MODE_INFO *mi = xd->mi[0];
3453 const NN_CONFIG *nn_config = NULL;
3454 #if CONFIG_VP9_HIGHBITDEPTH
3455 DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]);
3456 uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3457 ? (CONVERT_TO_BYTEPTR(pred_buffer))
3458 : pred_buffer;
3459 #else
3460 DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]);
3461 uint8_t *const pred_buf = pred_buffer;
3462 #endif // CONFIG_VP9_HIGHBITDEPTH
3463 const int speed = cpi->oxcf.speed;
3464 int i;
3465 float thresh = 0.0f;
3466
3467 switch (bsize) {
3468 case BLOCK_64X64:
3469 nn_config = &vp9_var_rd_part_nnconfig_64;
3470 thresh = speed > 0 ? 3.5f : 3.0f;
3471 break;
3472 case BLOCK_32X32:
3473 nn_config = &vp9_var_rd_part_nnconfig_32;
3474 thresh = speed > 0 ? 3.5f : 3.0f;
3475 break;
3476 case BLOCK_16X16:
3477 nn_config = &vp9_var_rd_part_nnconfig_16;
3478 thresh = speed > 0 ? 3.5f : 4.0f;
3479 break;
3480 case BLOCK_8X8:
3481 nn_config = &vp9_var_rd_part_nnconfig_8;
3482 if (cm->width >= 720 && cm->height >= 720)
3483 thresh = speed > 0 ? 2.5f : 2.0f;
3484 else
3485 thresh = speed > 0 ? 3.5f : 2.0f;
3486 break;
3487 default: assert(0 && "Unexpected block size."); return;
3488 }
3489
3490 if (!nn_config) return;
3491
3492 mi->ref_frame[1] = NONE;
3493 mi->sb_type = bsize;
3494 // Do a simple single motion search to find a prediction for current block.
3495 // The variance of the residue will be used as input features.
3496 {
3497 const MV_REFERENCE_FRAME ref =
3498 cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
3499 YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref);
3500 MV ref_mv = { 0, 0 };
3501 MV ref_mv_full = { 0, 0 };
3502 const int step_param = 1;
3503 const MvLimits tmp_mv_limits = x->mv_limits;
3504 const SEARCH_METHODS search_method = NSTEP;
3505 const int sadpb = x->sadperbit16;
3506 MV best_mv = { 0, 0 };
3507 int cost_list[5];
3508
3509 assert(yv12 != NULL);
3510 if (!yv12) return;
3511 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
3512 &cm->frame_refs[ref - 1].sf);
3513 mi->ref_frame[0] = ref;
3514 vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
3515 vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param,
3516 search_method, sadpb, cond_cost_list(cpi, cost_list),
3517 &ref_mv, &best_mv, 0, 0);
3518 best_mv.row *= 8;
3519 best_mv.col *= 8;
3520 x->mv_limits = tmp_mv_limits;
3521 mi->mv[0].as_mv = best_mv;
3522
3523 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
3524 xd->plane[0].dst.buf = pred_buf;
3525 xd->plane[0].dst.stride = 64;
3526 vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
3527 }
3528
3529 vpx_clear_system_state();
3530
3531 {
3532 float features[FEATURES] = { 0.0f };
3533 #if CONFIG_VP9_HIGHBITDEPTH
3534 const int dc_q =
3535 vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8);
3536 #else
3537 const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
3538 #endif // CONFIG_VP9_HIGHBITDEPTH
3539 int feature_idx = 0;
3540 float score;
3541
3542 // Generate model input features.
3543 features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
3544 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
3545 // Get the variance of the residue as input features.
3546 {
3547 const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
3548 const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
3549 const uint8_t *pred = pred_buf;
3550 const uint8_t *src = x->plane[0].src.buf;
3551 const int src_stride = x->plane[0].src.stride;
3552 const int pred_stride = 64;
3553 unsigned int sse;
3554 // Variance of whole block.
3555 const unsigned int var =
3556 cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
3557 const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
3558
3559 features[feature_idx++] = logf((float)var + 1.0f);
3560 for (i = 0; i < 4; ++i) {
3561 const int x_idx = (i & 1) * bs / 2;
3562 const int y_idx = (i >> 1) * bs / 2;
3563 const int src_offset = y_idx * src_stride + x_idx;
3564 const int pred_offset = y_idx * pred_stride + x_idx;
3565 // Variance of quarter block.
3566 const unsigned int sub_var =
3567 cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
3568 pred + pred_offset, pred_stride, &sse);
3569 const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
3570 features[feature_idx++] = var_ratio;
3571 }
3572 }
3573 assert(feature_idx == FEATURES);
3574
3575 // Feed the features into the model to get the confidence score.
3576 nn_predict(features, nn_config, &score);
3577
3578 // Higher score means that the model has higher confidence that the split
3579 // partition is better than the non-split partition. So if the score is
3580 // high enough, we skip the none-split partition search; if the score is
3581 // low enough, we skip the split partition search.
3582 if (score > thresh) *none = 0;
3583 if (score < -thresh) *split = 0;
3584 }
3585 }
3586 #undef FEATURES
3587 #undef LABELS
3588
get_rdmult_delta(VP9_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int orig_rdmult)3589 static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
3590 int mi_col, int orig_rdmult) {
3591 const int gf_group_index = cpi->twopass.gf_group.index;
3592 TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index];
3593 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
3594 int tpl_stride = tpl_frame->stride;
3595 int64_t intra_cost = 0;
3596 int64_t mc_dep_cost = 0;
3597 int mi_wide = num_8x8_blocks_wide_lookup[bsize];
3598 int mi_high = num_8x8_blocks_high_lookup[bsize];
3599 int row, col;
3600
3601 int dr = 0;
3602 int count = 0;
3603 double r0, rk, beta;
3604
3605 if (tpl_frame->is_valid == 0) return orig_rdmult;
3606
3607 if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult;
3608
3609 if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult;
3610
3611 for (row = mi_row; row < mi_row + mi_high; ++row) {
3612 for (col = mi_col; col < mi_col + mi_wide; ++col) {
3613 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
3614
3615 if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
3616
3617 intra_cost += this_stats->intra_cost;
3618 mc_dep_cost += this_stats->mc_dep_cost;
3619
3620 ++count;
3621 }
3622 }
3623
3624 vpx_clear_system_state();
3625
3626 r0 = cpi->rd.r0;
3627 rk = (double)intra_cost / mc_dep_cost;
3628 beta = r0 / rk;
3629 dr = vp9_get_adaptive_rdmult(cpi, beta);
3630
3631 dr = VPXMIN(dr, orig_rdmult * 3 / 2);
3632 dr = VPXMAX(dr, orig_rdmult * 1 / 2);
3633
3634 dr = VPXMAX(1, dr);
3635
3636 return dr;
3637 }
3638
3639 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
3640 // unlikely to be selected depending on previous rate-distortion optimization
3641 // results, for encoding speed-up.
rd_pick_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,RD_COST * rd_cost,int64_t best_rd,PC_TREE * pc_tree)3642 static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
3643 TileDataEnc *tile_data, TOKENEXTRA **tp,
3644 int mi_row, int mi_col, BLOCK_SIZE bsize,
3645 RD_COST *rd_cost, int64_t best_rd,
3646 PC_TREE *pc_tree) {
3647 VP9_COMMON *const cm = &cpi->common;
3648 TileInfo *const tile_info = &tile_data->tile_info;
3649 MACROBLOCK *const x = &td->mb;
3650 MACROBLOCKD *const xd = &x->e_mbd;
3651 const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
3652 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
3653 PARTITION_CONTEXT sl[8], sa[8];
3654 TOKENEXTRA *tp_orig = *tp;
3655 PICK_MODE_CONTEXT *const ctx = &pc_tree->none;
3656 int i;
3657 const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3658 BLOCK_SIZE subsize;
3659 RD_COST this_rdc, sum_rdc, best_rdc;
3660 int do_split = bsize >= BLOCK_8X8;
3661 int do_rect = 1;
3662 INTERP_FILTER pred_interp_filter;
3663
3664 // Override skipping rectangular partition operations for edge blocks
3665 const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
3666 const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
3667 const int xss = x->e_mbd.plane[1].subsampling_x;
3668 const int yss = x->e_mbd.plane[1].subsampling_y;
3669
3670 BLOCK_SIZE min_size = x->min_partition_size;
3671 BLOCK_SIZE max_size = x->max_partition_size;
3672
3673 #if CONFIG_FP_MB_STATS
3674 unsigned int src_diff_var = UINT_MAX;
3675 int none_complexity = 0;
3676 #endif
3677
3678 int partition_none_allowed = !force_horz_split && !force_vert_split;
3679 int partition_horz_allowed =
3680 !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
3681 int partition_vert_allowed =
3682 !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
3683
3684 int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist;
3685 int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
3686 int must_split = 0;
3687 int partition_mul = cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ
3688 ? x->cb_rdmult
3689 : cpi->rd.RDMULT;
3690 // Ref frames picked in the [i_th] quarter subblock during square partition
3691 // RD search. It may be used to prune ref frame selection of rect partitions.
3692 uint8_t ref_frames_used[4] = { 0, 0, 0, 0 };
3693
3694 (void)*tp_orig;
3695
3696 assert(num_8x8_blocks_wide_lookup[bsize] ==
3697 num_8x8_blocks_high_lookup[bsize]);
3698
3699 dist_breakout_thr >>=
3700 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
3701
3702 rate_breakout_thr *= num_pels_log2_lookup[bsize];
3703
3704 vp9_rd_cost_init(&this_rdc);
3705 vp9_rd_cost_init(&sum_rdc);
3706 vp9_rd_cost_reset(&best_rdc);
3707 best_rdc.rdcost = best_rd;
3708
3709 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
3710
3711 if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ &&
3712 cpi->oxcf.aq_mode != LOOKAHEAD_AQ)
3713 x->mb_energy = vp9_block_energy(cpi, x, bsize);
3714
3715 if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
3716 int cb_partition_search_ctrl =
3717 ((pc_tree->index == 0 || pc_tree->index == 3) +
3718 get_chessboard_index(cm->current_video_frame)) &
3719 0x1;
3720
3721 if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
3722 set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
3723 }
3724
3725 // Get sub block energy range
3726 if (bsize >= BLOCK_16X16) {
3727 int min_energy, max_energy;
3728 vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
3729 &max_energy);
3730 must_split = (min_energy < -3) && (max_energy - min_energy > 2);
3731 }
3732
3733 // Determine partition types in search according to the speed features.
3734 // The threshold set here has to be of square block size.
3735 if (cpi->sf.auto_min_max_partition_size) {
3736 partition_none_allowed &= (bsize <= max_size);
3737 partition_horz_allowed &=
3738 ((bsize <= max_size && bsize > min_size) || force_horz_split);
3739 partition_vert_allowed &=
3740 ((bsize <= max_size && bsize > min_size) || force_vert_split);
3741 do_split &= bsize > min_size;
3742 }
3743
3744 if (cpi->sf.use_square_partition_only &&
3745 (bsize > cpi->sf.use_square_only_thresh_high ||
3746 bsize < cpi->sf.use_square_only_thresh_low)) {
3747 if (cpi->use_svc) {
3748 if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
3749 partition_horz_allowed &= force_horz_split;
3750 if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
3751 partition_vert_allowed &= force_vert_split;
3752 } else {
3753 partition_horz_allowed &= force_horz_split;
3754 partition_vert_allowed &= force_vert_split;
3755 }
3756 }
3757
3758 save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
3759
3760 #if CONFIG_FP_MB_STATS
3761 if (cpi->use_fp_mb_stats) {
3762 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
3763 src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
3764 mi_col, bsize);
3765 }
3766 #endif
3767
3768 #if CONFIG_FP_MB_STATS
3769 // Decide whether we shall split directly and skip searching NONE by using
3770 // the first pass block statistics
3771 if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
3772 partition_none_allowed && src_diff_var > 4 &&
3773 cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
3774 int mb_row = mi_row >> 1;
3775 int mb_col = mi_col >> 1;
3776 int mb_row_end =
3777 VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
3778 int mb_col_end =
3779 VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
3780 int r, c;
3781
3782 // compute a complexity measure, basically measure inconsistency of motion
3783 // vectors obtained from the first pass in the current block
3784 for (r = mb_row; r < mb_row_end; r++) {
3785 for (c = mb_col; c < mb_col_end; c++) {
3786 const int mb_index = r * cm->mb_cols + c;
3787
3788 MOTION_DIRECTION this_mv;
3789 MOTION_DIRECTION right_mv;
3790 MOTION_DIRECTION bottom_mv;
3791
3792 this_mv =
3793 get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
3794
3795 // to its right
3796 if (c != mb_col_end - 1) {
3797 right_mv = get_motion_direction_fp(
3798 cpi->twopass.this_frame_mb_stats[mb_index + 1]);
3799 none_complexity += get_motion_inconsistency(this_mv, right_mv);
3800 }
3801
3802 // to its bottom
3803 if (r != mb_row_end - 1) {
3804 bottom_mv = get_motion_direction_fp(
3805 cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
3806 none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
3807 }
3808
3809 // do not count its left and top neighbors to avoid double counting
3810 }
3811 }
3812
3813 if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
3814 partition_none_allowed = 0;
3815 }
3816 }
3817 #endif
3818
3819 pc_tree->partitioning = PARTITION_NONE;
3820
3821 if (cpi->sf.ml_var_partition_pruning) {
3822 const int do_ml_var_partition_pruning =
3823 !frame_is_intra_only(cm) && partition_none_allowed && do_split &&
3824 mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows &&
3825 mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols;
3826 if (do_ml_var_partition_pruning) {
3827 ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col,
3828 &partition_none_allowed, &do_split);
3829 }
3830 }
3831
3832 // PARTITION_NONE
3833 if (partition_none_allowed) {
3834 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
3835 best_rdc.rdcost);
3836 ctx->rdcost = this_rdc.rdcost;
3837 if (this_rdc.rate != INT_MAX) {
3838 if (cpi->sf.prune_ref_frame_for_rect_partitions) {
3839 const int ref1 = ctx->mic.ref_frame[0];
3840 const int ref2 = ctx->mic.ref_frame[1];
3841 for (i = 0; i < 4; ++i) {
3842 ref_frames_used[i] |= (1 << ref1);
3843 if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
3844 }
3845 }
3846 if (bsize >= BLOCK_8X8) {
3847 this_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
3848 cpi->partition_cost[pl][PARTITION_NONE], 0);
3849 this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
3850 }
3851
3852 if (this_rdc.rdcost < best_rdc.rdcost) {
3853 MODE_INFO *mi = xd->mi[0];
3854
3855 best_rdc = this_rdc;
3856 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
3857
3858 if (cpi->sf.ml_partition_search_early_termination) {
3859 // Currently, the machine-learning based partition search early
3860 // termination is only used while bsize is 16x16, 32x32 or 64x64,
3861 // VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
3862 if (!x->e_mbd.lossless &&
3863 !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) &&
3864 ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
3865 if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) {
3866 do_split = 0;
3867 do_rect = 0;
3868 }
3869 }
3870 }
3871
3872 if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) {
3873 const int use_ml_based_breakout =
3874 cpi->sf.use_ml_partition_search_breakout &&
3875 cm->base_qindex >= 100;
3876 if (use_ml_based_breakout) {
3877 if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) {
3878 do_split = 0;
3879 do_rect = 0;
3880 }
3881 } else {
3882 if (!cpi->sf.ml_partition_search_early_termination) {
3883 if ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
3884 (best_rdc.dist < dist_breakout_thr &&
3885 best_rdc.rate < rate_breakout_thr)) {
3886 do_split = 0;
3887 do_rect = 0;
3888 }
3889 }
3890 }
3891 }
3892
3893 #if CONFIG_FP_MB_STATS
3894 // Check if every 16x16 first pass block statistics has zero
3895 // motion and the corresponding first pass residue is small enough.
3896 // If that is the case, check the difference variance between the
3897 // current frame and the last frame. If the variance is small enough,
3898 // stop further splitting in RD optimization
3899 if (cpi->use_fp_mb_stats && do_split != 0 &&
3900 cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
3901 int mb_row = mi_row >> 1;
3902 int mb_col = mi_col >> 1;
3903 int mb_row_end =
3904 VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
3905 int mb_col_end =
3906 VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
3907 int r, c;
3908
3909 int skip = 1;
3910 for (r = mb_row; r < mb_row_end; r++) {
3911 for (c = mb_col; c < mb_col_end; c++) {
3912 const int mb_index = r * cm->mb_cols + c;
3913 if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
3914 FPMB_MOTION_ZERO_MASK) ||
3915 !(cpi->twopass.this_frame_mb_stats[mb_index] &
3916 FPMB_ERROR_SMALL_MASK)) {
3917 skip = 0;
3918 break;
3919 }
3920 }
3921 if (skip == 0) {
3922 break;
3923 }
3924 }
3925
3926 if (skip) {
3927 if (src_diff_var == UINT_MAX) {
3928 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
3929 src_diff_var = get_sby_perpixel_diff_variance(
3930 cpi, &x->plane[0].src, mi_row, mi_col, bsize);
3931 }
3932 if (src_diff_var < 8) {
3933 do_split = 0;
3934 do_rect = 0;
3935 }
3936 }
3937 }
3938 #endif
3939 }
3940 }
3941 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
3942 } else {
3943 vp9_zero(ctx->pred_mv);
3944 ctx->mic.interp_filter = EIGHTTAP;
3945 }
3946
3947 // store estimated motion vector
3948 store_pred_mv(x, ctx);
3949
3950 // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an
3951 // intra block and used for context purposes.
3952 if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) {
3953 pred_interp_filter = EIGHTTAP;
3954 } else {
3955 pred_interp_filter = ctx->mic.interp_filter;
3956 }
3957
3958 // PARTITION_SPLIT
3959 // TODO(jingning): use the motion vectors given by the above search as
3960 // the starting point of motion search in the following partition type check.
3961 pc_tree->split[0]->none.rdcost = 0;
3962 pc_tree->split[1]->none.rdcost = 0;
3963 pc_tree->split[2]->none.rdcost = 0;
3964 pc_tree->split[3]->none.rdcost = 0;
3965 if (do_split || must_split) {
3966 subsize = get_subsize(bsize, PARTITION_SPLIT);
3967 load_pred_mv(x, ctx);
3968 if (bsize == BLOCK_8X8) {
3969 i = 4;
3970 if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
3971 pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
3972 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3973 pc_tree->leaf_split[0], best_rdc.rdcost);
3974 if (sum_rdc.rate == INT_MAX) {
3975 sum_rdc.rdcost = INT64_MAX;
3976 } else {
3977 if (cpi->sf.prune_ref_frame_for_rect_partitions) {
3978 const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0];
3979 const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1];
3980 for (i = 0; i < 4; ++i) {
3981 ref_frames_used[i] |= (1 << ref1);
3982 if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
3983 }
3984 }
3985 }
3986 } else {
3987 for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split);
3988 ++i) {
3989 const int x_idx = (i & 1) * mi_step;
3990 const int y_idx = (i >> 1) * mi_step;
3991
3992 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
3993 continue;
3994
3995 pc_tree->split[i]->index = i;
3996 if (cpi->sf.prune_ref_frame_for_rect_partitions)
3997 pc_tree->split[i]->none.rate = INT_MAX;
3998 rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
3999 mi_col + x_idx, subsize, &this_rdc,
4000 // A must split test here increases the number of sub
4001 // partitions but hurts metrics results quite a bit,
4002 // so this extra test is commented out pending
4003 // further tests on whether it adds much in terms of
4004 // visual quality.
4005 // (must_split) ? best_rdc.rdcost
4006 // : best_rdc.rdcost - sum_rdc.rdcost,
4007 best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
4008
4009 if (this_rdc.rate == INT_MAX) {
4010 sum_rdc.rdcost = INT64_MAX;
4011 break;
4012 } else {
4013 if (cpi->sf.prune_ref_frame_for_rect_partitions &&
4014 pc_tree->split[i]->none.rate != INT_MAX) {
4015 const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0];
4016 const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1];
4017 ref_frames_used[i] |= (1 << ref1);
4018 if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
4019 }
4020 sum_rdc.rate += this_rdc.rate;
4021 sum_rdc.dist += this_rdc.dist;
4022 sum_rdc.rdcost += this_rdc.rdcost;
4023 }
4024 }
4025 }
4026
4027 if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) {
4028 sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
4029 cpi->partition_cost[pl][PARTITION_SPLIT], 0);
4030 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
4031
4032 if ((sum_rdc.rdcost < best_rdc.rdcost) ||
4033 (must_split && (sum_rdc.dist < best_rdc.dist))) {
4034 best_rdc = sum_rdc;
4035 pc_tree->partitioning = PARTITION_SPLIT;
4036
4037 // Rate and distortion based partition search termination clause.
4038 if (!cpi->sf.ml_partition_search_early_termination &&
4039 !x->e_mbd.lossless &&
4040 ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
4041 (best_rdc.dist < dist_breakout_thr &&
4042 best_rdc.rate < rate_breakout_thr))) {
4043 do_rect = 0;
4044 }
4045 }
4046 } else {
4047 // skip rectangular partition test when larger block size
4048 // gives better rd cost
4049 if (cpi->sf.less_rectangular_check &&
4050 (bsize > cpi->sf.use_square_only_thresh_high ||
4051 best_rdc.dist < dist_breakout_thr))
4052 do_rect &= !partition_none_allowed;
4053 }
4054 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
4055 }
4056
4057 pc_tree->horizontal[0].skip_ref_frame_mask = 0;
4058 pc_tree->horizontal[1].skip_ref_frame_mask = 0;
4059 pc_tree->vertical[0].skip_ref_frame_mask = 0;
4060 pc_tree->vertical[1].skip_ref_frame_mask = 0;
4061 if (cpi->sf.prune_ref_frame_for_rect_partitions) {
4062 uint8_t used_frames;
4063 used_frames = ref_frames_used[0] | ref_frames_used[1];
4064 if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
4065 used_frames = ref_frames_used[2] | ref_frames_used[3];
4066 if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
4067 used_frames = ref_frames_used[0] | ref_frames_used[2];
4068 if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
4069 used_frames = ref_frames_used[1] | ref_frames_used[3];
4070 if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
4071 }
4072
4073 {
4074 const int do_ml_rect_partition_pruning =
4075 !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split &&
4076 (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8;
4077 if (do_ml_rect_partition_pruning) {
4078 ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed,
4079 &partition_vert_allowed, best_rdc.rdcost, mi_row,
4080 mi_col);
4081 }
4082 }
4083
4084 // PARTITION_HORZ
4085 if (partition_horz_allowed &&
4086 (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
4087 const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ];
4088 const int64_t part_mode_rdcost =
4089 RDCOST(partition_mul, x->rddiv, part_mode_rate, 0);
4090 subsize = get_subsize(bsize, PARTITION_HORZ);
4091 load_pred_mv(x, ctx);
4092 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
4093 partition_none_allowed)
4094 pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter;
4095 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
4096 &pc_tree->horizontal[0],
4097 best_rdc.rdcost - part_mode_rdcost);
4098 if (sum_rdc.rdcost < INT64_MAX) {
4099 sum_rdc.rdcost += part_mode_rdcost;
4100 sum_rdc.rate += part_mode_rate;
4101 }
4102
4103 if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
4104 bsize > BLOCK_8X8) {
4105 PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
4106 update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
4107 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
4108 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
4109 partition_none_allowed)
4110 pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter;
4111 rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
4112 subsize, &pc_tree->horizontal[1],
4113 best_rdc.rdcost - sum_rdc.rdcost);
4114 if (this_rdc.rate == INT_MAX) {
4115 sum_rdc.rdcost = INT64_MAX;
4116 } else {
4117 sum_rdc.rate += this_rdc.rate;
4118 sum_rdc.dist += this_rdc.dist;
4119 sum_rdc.rdcost += this_rdc.rdcost;
4120 }
4121 }
4122
4123 if (sum_rdc.rdcost < best_rdc.rdcost) {
4124 best_rdc = sum_rdc;
4125 pc_tree->partitioning = PARTITION_HORZ;
4126
4127 if (cpi->sf.less_rectangular_check &&
4128 bsize > cpi->sf.use_square_only_thresh_high)
4129 do_rect = 0;
4130 }
4131 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
4132 }
4133
4134 // PARTITION_VERT
4135 if (partition_vert_allowed &&
4136 (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
4137 const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT];
4138 const int64_t part_mode_rdcost =
4139 RDCOST(partition_mul, x->rddiv, part_mode_rate, 0);
4140 subsize = get_subsize(bsize, PARTITION_VERT);
4141 load_pred_mv(x, ctx);
4142 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
4143 partition_none_allowed)
4144 pc_tree->vertical[0].pred_interp_filter = pred_interp_filter;
4145 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
4146 &pc_tree->vertical[0], best_rdc.rdcost - part_mode_rdcost);
4147 if (sum_rdc.rdcost < INT64_MAX) {
4148 sum_rdc.rdcost += part_mode_rdcost;
4149 sum_rdc.rate += part_mode_rate;
4150 }
4151
4152 if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
4153 bsize > BLOCK_8X8) {
4154 update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
4155 encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
4156 &pc_tree->vertical[0]);
4157 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
4158 partition_none_allowed)
4159 pc_tree->vertical[1].pred_interp_filter = pred_interp_filter;
4160 rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
4161 subsize, &pc_tree->vertical[1],
4162 best_rdc.rdcost - sum_rdc.rdcost);
4163 if (this_rdc.rate == INT_MAX) {
4164 sum_rdc.rdcost = INT64_MAX;
4165 } else {
4166 sum_rdc.rate += this_rdc.rate;
4167 sum_rdc.dist += this_rdc.dist;
4168 sum_rdc.rdcost += this_rdc.rdcost;
4169 }
4170 }
4171
4172 if (sum_rdc.rdcost < best_rdc.rdcost) {
4173 best_rdc = sum_rdc;
4174 pc_tree->partitioning = PARTITION_VERT;
4175 }
4176 restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
4177 }
4178
4179 // TODO(jbb): This code added so that we avoid static analysis
4180 // warning related to the fact that best_rd isn't used after this
4181 // point. This code should be refactored so that the duplicate
4182 // checks occur in some sub function and thus are used...
4183 (void)best_rd;
4184 *rd_cost = best_rdc;
4185
4186 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
4187 pc_tree->index != 3) {
4188 int output_enabled = (bsize == BLOCK_64X64);
4189 encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
4190 pc_tree);
4191 }
4192
4193 if (bsize == BLOCK_64X64) {
4194 assert(tp_orig < *tp);
4195 assert(best_rdc.rate < INT_MAX);
4196 assert(best_rdc.dist < INT64_MAX);
4197 } else {
4198 assert(tp_orig == *tp);
4199 }
4200 }
4201
encode_rd_sb_row(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp)4202 static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
4203 TileDataEnc *tile_data, int mi_row,
4204 TOKENEXTRA **tp) {
4205 VP9_COMMON *const cm = &cpi->common;
4206 TileInfo *const tile_info = &tile_data->tile_info;
4207 MACROBLOCK *const x = &td->mb;
4208 MACROBLOCKD *const xd = &x->e_mbd;
4209 SPEED_FEATURES *const sf = &cpi->sf;
4210 const int mi_col_start = tile_info->mi_col_start;
4211 const int mi_col_end = tile_info->mi_col_end;
4212 int mi_col;
4213 const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
4214 const int num_sb_cols =
4215 get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
4216 int sb_col_in_tile;
4217
4218 // Initialize the left context for the new SB row
4219 memset(&xd->left_context, 0, sizeof(xd->left_context));
4220 memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
4221
4222 // Code each SB in the row
4223 for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
4224 mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) {
4225 const struct segmentation *const seg = &cm->seg;
4226 int dummy_rate;
4227 int64_t dummy_dist;
4228 RD_COST dummy_rdc;
4229 int i;
4230 int seg_skip = 0;
4231
4232 const int idx_str = cm->mi_stride * mi_row + mi_col;
4233 MODE_INFO **mi = cm->mi_grid_visible + idx_str;
4234
4235 (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
4236 sb_col_in_tile);
4237
4238 if (sf->adaptive_pred_interp_filter) {
4239 for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
4240
4241 for (i = 0; i < 64; ++i) {
4242 td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
4243 td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
4244 td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
4245 td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
4246 }
4247 }
4248
4249 for (i = 0; i < MAX_REF_FRAMES; ++i) {
4250 x->pred_mv[i].row = INT16_MAX;
4251 x->pred_mv[i].col = INT16_MAX;
4252 }
4253 td->pc_root->index = 0;
4254
4255 if (seg->enabled) {
4256 const uint8_t *const map =
4257 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
4258 int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
4259 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
4260 }
4261
4262 x->source_variance = UINT_MAX;
4263 if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
4264 const BLOCK_SIZE bsize =
4265 seg_skip ? BLOCK_64X64 : sf->always_this_block_size;
4266 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
4267 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4268 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
4269 &dummy_rate, &dummy_dist, 1, td->pc_root);
4270 } else if (cpi->partition_search_skippable_frame) {
4271 BLOCK_SIZE bsize;
4272 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
4273 bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
4274 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4275 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
4276 &dummy_rate, &dummy_dist, 1, td->pc_root);
4277 } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
4278 cm->frame_type != KEY_FRAME) {
4279 choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
4280 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
4281 &dummy_rate, &dummy_dist, 1, td->pc_root);
4282 } else {
4283 int orig_rdmult = cpi->rd.RDMULT;
4284 x->cb_rdmult = orig_rdmult;
4285 if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) {
4286 int dr =
4287 get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult);
4288 x->cb_rdmult = dr;
4289 }
4290
4291 // If required set upper and lower partition size limits
4292 if (sf->auto_min_max_partition_size) {
4293 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
4294 rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
4295 &x->min_partition_size, &x->max_partition_size);
4296 }
4297 td->pc_root->none.rdcost = 0;
4298 rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
4299 &dummy_rdc, INT64_MAX, td->pc_root);
4300 }
4301 (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
4302 sb_col_in_tile, num_sb_cols);
4303 }
4304 }
4305
init_encode_frame_mb_context(VP9_COMP * cpi)4306 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
4307 MACROBLOCK *const x = &cpi->td.mb;
4308 VP9_COMMON *const cm = &cpi->common;
4309 MACROBLOCKD *const xd = &x->e_mbd;
4310 const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
4311
4312 // Copy data over into macro block data structures.
4313 vp9_setup_src_planes(x, cpi->Source, 0, 0);
4314
4315 vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
4316
4317 // Note: this memset assumes above_context[0], [1] and [2]
4318 // are allocated as part of the same buffer.
4319 memset(xd->above_context[0], 0,
4320 sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE);
4321 memset(xd->above_seg_context, 0,
4322 sizeof(*xd->above_seg_context) * aligned_mi_cols);
4323 }
4324
check_dual_ref_flags(VP9_COMP * cpi)4325 static int check_dual_ref_flags(VP9_COMP *cpi) {
4326 const int ref_flags = cpi->ref_frame_flags;
4327
4328 if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
4329 return 0;
4330 } else {
4331 return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) +
4332 !!(ref_flags & VP9_ALT_FLAG)) >= 2;
4333 }
4334 }
4335
reset_skip_tx_size(VP9_COMMON * cm,TX_SIZE max_tx_size)4336 static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
4337 int mi_row, mi_col;
4338 const int mis = cm->mi_stride;
4339 MODE_INFO **mi_ptr = cm->mi_grid_visible;
4340
4341 for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
4342 for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
4343 if (mi_ptr[mi_col]->tx_size > max_tx_size)
4344 mi_ptr[mi_col]->tx_size = max_tx_size;
4345 }
4346 }
4347 }
4348
get_frame_type(const VP9_COMP * cpi)4349 static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
4350 if (frame_is_intra_only(&cpi->common))
4351 return INTRA_FRAME;
4352 else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
4353 return ALTREF_FRAME;
4354 else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
4355 return GOLDEN_FRAME;
4356 else
4357 return LAST_FRAME;
4358 }
4359
select_tx_mode(const VP9_COMP * cpi,MACROBLOCKD * const xd)4360 static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
4361 if (xd->lossless) return ONLY_4X4;
4362 if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode)
4363 return ALLOW_16X16;
4364 if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
4365 return ALLOW_32X32;
4366 else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
4367 cpi->sf.tx_size_search_method == USE_TX_8X8)
4368 return TX_MODE_SELECT;
4369 else
4370 return cpi->common.tx_mode;
4371 }
4372
hybrid_intra_mode_search(VP9_COMP * cpi,MACROBLOCK * const x,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)4373 static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x,
4374 RD_COST *rd_cost, BLOCK_SIZE bsize,
4375 PICK_MODE_CONTEXT *ctx) {
4376 if (!cpi->sf.nonrd_keyframe && bsize < BLOCK_16X16)
4377 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
4378 else
4379 vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
4380 }
4381
hybrid_search_svc_baseiskey(VP9_COMP * cpi,MACROBLOCK * const x,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,TileDataEnc * tile_data,int mi_row,int mi_col)4382 static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x,
4383 RD_COST *rd_cost, BLOCK_SIZE bsize,
4384 PICK_MODE_CONTEXT *ctx,
4385 TileDataEnc *tile_data, int mi_row,
4386 int mi_col) {
4387 if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) {
4388 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
4389 } else {
4390 if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF)
4391 vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
4392 else if (bsize >= BLOCK_8X8)
4393 vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize,
4394 ctx);
4395 else
4396 vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
4397 }
4398 }
4399
hybrid_search_scene_change(VP9_COMP * cpi,MACROBLOCK * const x,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,TileDataEnc * tile_data,int mi_row,int mi_col)4400 static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x,
4401 RD_COST *rd_cost, BLOCK_SIZE bsize,
4402 PICK_MODE_CONTEXT *ctx,
4403 TileDataEnc *tile_data, int mi_row,
4404 int mi_col) {
4405 if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) {
4406 vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
4407 } else {
4408 vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
4409 }
4410 }
4411
nonrd_pick_sb_modes(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)4412 static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
4413 MACROBLOCK *const x, int mi_row, int mi_col,
4414 RD_COST *rd_cost, BLOCK_SIZE bsize,
4415 PICK_MODE_CONTEXT *ctx) {
4416 VP9_COMMON *const cm = &cpi->common;
4417 TileInfo *const tile_info = &tile_data->tile_info;
4418 MACROBLOCKD *const xd = &x->e_mbd;
4419 MODE_INFO *mi;
4420 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
4421 BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size
4422 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
4423 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
4424 int plane;
4425
4426 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
4427
4428 set_segment_index(cpi, x, mi_row, mi_col, bsize, 0);
4429
4430 mi = xd->mi[0];
4431 mi->sb_type = bsize;
4432
4433 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
4434 struct macroblockd_plane *pd = &xd->plane[plane];
4435 memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
4436 (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
4437 memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
4438 (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
4439 }
4440
4441 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
4442 if (cyclic_refresh_segment_id_boosted(mi->segment_id))
4443 x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
4444
4445 if (frame_is_intra_only(cm))
4446 hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
4447 else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)
4448 hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
4449 mi_col);
4450 else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
4451 set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
4452 else if (bsize >= BLOCK_8X8) {
4453 if (cpi->rc.hybrid_intra_scene_change)
4454 hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
4455 mi_col);
4456 else
4457 vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize,
4458 ctx);
4459 } else {
4460 vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
4461 }
4462
4463 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
4464
4465 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
4466 struct macroblockd_plane *pd = &xd->plane[plane];
4467 memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
4468 (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
4469 memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
4470 (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
4471 }
4472
4473 if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost);
4474
4475 ctx->rate = rd_cost->rate;
4476 ctx->dist = rd_cost->dist;
4477 }
4478
fill_mode_info_sb(VP9_COMMON * cm,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize,PC_TREE * pc_tree)4479 static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
4480 int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
4481 MACROBLOCKD *xd = &x->e_mbd;
4482 int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
4483 PARTITION_TYPE partition = pc_tree->partitioning;
4484 BLOCK_SIZE subsize = get_subsize(bsize, partition);
4485
4486 assert(bsize >= BLOCK_8X8);
4487
4488 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
4489
4490 switch (partition) {
4491 case PARTITION_NONE:
4492 set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
4493 *(xd->mi[0]) = pc_tree->none.mic;
4494 *(x->mbmi_ext) = pc_tree->none.mbmi_ext;
4495 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
4496 break;
4497 case PARTITION_VERT:
4498 set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
4499 *(xd->mi[0]) = pc_tree->vertical[0].mic;
4500 *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext;
4501 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
4502
4503 if (mi_col + hbs < cm->mi_cols) {
4504 set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs);
4505 *(xd->mi[0]) = pc_tree->vertical[1].mic;
4506 *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext;
4507 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize);
4508 }
4509 break;
4510 case PARTITION_HORZ:
4511 set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
4512 *(xd->mi[0]) = pc_tree->horizontal[0].mic;
4513 *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext;
4514 duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
4515 if (mi_row + hbs < cm->mi_rows) {
4516 set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col);
4517 *(xd->mi[0]) = pc_tree->horizontal[1].mic;
4518 *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext;
4519 duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize);
4520 }
4521 break;
4522 case PARTITION_SPLIT: {
4523 fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]);
4524 fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
4525 pc_tree->split[1]);
4526 fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
4527 pc_tree->split[2]);
4528 fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
4529 pc_tree->split[3]);
4530 break;
4531 }
4532 default: break;
4533 }
4534 }
4535
4536 // Reset the prediction pixel ready flag recursively.
pred_pixel_ready_reset(PC_TREE * pc_tree,BLOCK_SIZE bsize)4537 static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
4538 pc_tree->none.pred_pixel_ready = 0;
4539 pc_tree->horizontal[0].pred_pixel_ready = 0;
4540 pc_tree->horizontal[1].pred_pixel_ready = 0;
4541 pc_tree->vertical[0].pred_pixel_ready = 0;
4542 pc_tree->vertical[1].pred_pixel_ready = 0;
4543
4544 if (bsize > BLOCK_8X8) {
4545 BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
4546 int i;
4547 for (i = 0; i < 4; ++i) pred_pixel_ready_reset(pc_tree->split[i], subsize);
4548 }
4549 }
4550
4551 #if CONFIG_ML_VAR_PARTITION
4552 #define FEATURES 6
4553 #define LABELS 2
ml_predict_var_paritioning(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col)4554 static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
4555 BLOCK_SIZE bsize, int mi_row,
4556 int mi_col) {
4557 VP9_COMMON *const cm = &cpi->common;
4558 const NN_CONFIG *nn_config = NULL;
4559
4560 switch (bsize) {
4561 case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break;
4562 case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break;
4563 case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break;
4564 case BLOCK_8X8: break;
4565 default: assert(0 && "Unexpected block size."); return -1;
4566 }
4567
4568 if (!nn_config) return -1;
4569
4570 vpx_clear_system_state();
4571
4572 {
4573 const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f;
4574 float features[FEATURES] = { 0.0f };
4575 const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
4576 int feature_idx = 0;
4577 float score[LABELS];
4578
4579 features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
4580 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
4581 {
4582 const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
4583 const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
4584 const int sb_offset_row = 8 * (mi_row & 7);
4585 const int sb_offset_col = 8 * (mi_col & 7);
4586 const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
4587 const uint8_t *src = x->plane[0].src.buf;
4588 const int src_stride = x->plane[0].src.stride;
4589 const int pred_stride = 64;
4590 unsigned int sse;
4591 int i;
4592 // Variance of whole block.
4593 const unsigned int var =
4594 cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
4595 const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
4596
4597 features[feature_idx++] = logf((float)var + 1.0f);
4598 for (i = 0; i < 4; ++i) {
4599 const int x_idx = (i & 1) * bs / 2;
4600 const int y_idx = (i >> 1) * bs / 2;
4601 const int src_offset = y_idx * src_stride + x_idx;
4602 const int pred_offset = y_idx * pred_stride + x_idx;
4603 // Variance of quarter block.
4604 const unsigned int sub_var =
4605 cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
4606 pred + pred_offset, pred_stride, &sse);
4607 const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
4608 features[feature_idx++] = var_ratio;
4609 }
4610 }
4611
4612 assert(feature_idx == FEATURES);
4613 nn_predict(features, nn_config, score);
4614 if (score[0] > thresh) return PARTITION_SPLIT;
4615 if (score[0] < -thresh) return PARTITION_NONE;
4616 return -1;
4617 }
4618 }
4619 #undef FEATURES
4620 #undef LABELS
4621 #endif // CONFIG_ML_VAR_PARTITION
4622
nonrd_pick_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,RD_COST * rd_cost,int do_recon,int64_t best_rd,PC_TREE * pc_tree)4623 static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
4624 TileDataEnc *tile_data, TOKENEXTRA **tp,
4625 int mi_row, int mi_col, BLOCK_SIZE bsize,
4626 RD_COST *rd_cost, int do_recon,
4627 int64_t best_rd, PC_TREE *pc_tree) {
4628 const SPEED_FEATURES *const sf = &cpi->sf;
4629 VP9_COMMON *const cm = &cpi->common;
4630 TileInfo *const tile_info = &tile_data->tile_info;
4631 MACROBLOCK *const x = &td->mb;
4632 MACROBLOCKD *const xd = &x->e_mbd;
4633 const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
4634 TOKENEXTRA *tp_orig = *tp;
4635 PICK_MODE_CONTEXT *ctx = &pc_tree->none;
4636 int i;
4637 BLOCK_SIZE subsize = bsize;
4638 RD_COST this_rdc, sum_rdc, best_rdc;
4639 int do_split = bsize >= BLOCK_8X8;
4640 int do_rect = 1;
4641 // Override skipping rectangular partition operations for edge blocks
4642 const int force_horz_split = (mi_row + ms >= cm->mi_rows);
4643 const int force_vert_split = (mi_col + ms >= cm->mi_cols);
4644 const int xss = x->e_mbd.plane[1].subsampling_x;
4645 const int yss = x->e_mbd.plane[1].subsampling_y;
4646
4647 int partition_none_allowed = !force_horz_split && !force_vert_split;
4648 int partition_horz_allowed =
4649 !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
4650 int partition_vert_allowed =
4651 !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
4652 #if CONFIG_ML_VAR_PARTITION
4653 const int use_ml_based_partitioning =
4654 sf->partition_search_type == ML_BASED_PARTITION;
4655 #endif // CONFIG_ML_VAR_PARTITION
4656
4657 (void)*tp_orig;
4658
4659 // Avoid checking for rectangular partitions for speed >= 6.
4660 if (cpi->oxcf.speed >= 6) do_rect = 0;
4661
4662 assert(num_8x8_blocks_wide_lookup[bsize] ==
4663 num_8x8_blocks_high_lookup[bsize]);
4664
4665 vp9_rd_cost_init(&sum_rdc);
4666 vp9_rd_cost_reset(&best_rdc);
4667 best_rdc.rdcost = best_rd;
4668
4669 // Determine partition types in search according to the speed features.
4670 // The threshold set here has to be of square block size.
4671 if (sf->auto_min_max_partition_size) {
4672 partition_none_allowed &=
4673 (bsize <= x->max_partition_size && bsize >= x->min_partition_size);
4674 partition_horz_allowed &=
4675 ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
4676 force_horz_split);
4677 partition_vert_allowed &=
4678 ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
4679 force_vert_split);
4680 do_split &= bsize > x->min_partition_size;
4681 }
4682 if (sf->use_square_partition_only) {
4683 partition_horz_allowed &= force_horz_split;
4684 partition_vert_allowed &= force_vert_split;
4685 }
4686
4687 #if CONFIG_ML_VAR_PARTITION
4688 if (use_ml_based_partitioning) {
4689 if (partition_none_allowed || do_split) do_rect = 0;
4690 if (partition_none_allowed && do_split) {
4691 const int ml_predicted_partition =
4692 ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col);
4693 if (ml_predicted_partition == PARTITION_NONE) do_split = 0;
4694 if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0;
4695 }
4696 }
4697 #endif // CONFIG_ML_VAR_PARTITION
4698
4699 if (!partition_none_allowed && !do_split) do_rect = 1;
4700
4701 ctx->pred_pixel_ready =
4702 !(partition_vert_allowed || partition_horz_allowed || do_split);
4703
4704 // PARTITION_NONE
4705 if (partition_none_allowed) {
4706 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize,
4707 ctx);
4708 ctx->mic = *xd->mi[0];
4709 ctx->mbmi_ext = *x->mbmi_ext;
4710 ctx->skip_txfm[0] = x->skip_txfm[0];
4711 ctx->skip = x->skip;
4712
4713 if (this_rdc.rate != INT_MAX) {
4714 const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
4715 this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
4716 this_rdc.rdcost =
4717 RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
4718 if (this_rdc.rdcost < best_rdc.rdcost) {
4719 best_rdc = this_rdc;
4720 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
4721
4722 #if CONFIG_ML_VAR_PARTITION
4723 if (!use_ml_based_partitioning)
4724 #endif // CONFIG_ML_VAR_PARTITION
4725 {
4726 int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
4727 int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
4728 dist_breakout_thr >>=
4729 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
4730 rate_breakout_thr *= num_pels_log2_lookup[bsize];
4731 if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
4732 this_rdc.dist < dist_breakout_thr) {
4733 do_split = 0;
4734 do_rect = 0;
4735 }
4736 }
4737 }
4738 }
4739 }
4740
4741 // store estimated motion vector
4742 store_pred_mv(x, ctx);
4743
4744 // PARTITION_SPLIT
4745 if (do_split) {
4746 int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
4747 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
4748 sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
4749 subsize = get_subsize(bsize, PARTITION_SPLIT);
4750 for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
4751 const int x_idx = (i & 1) * ms;
4752 const int y_idx = (i >> 1) * ms;
4753
4754 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
4755 continue;
4756 load_pred_mv(x, ctx);
4757 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
4758 mi_col + x_idx, subsize, &this_rdc, 0,
4759 best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
4760
4761 if (this_rdc.rate == INT_MAX) {
4762 vp9_rd_cost_reset(&sum_rdc);
4763 } else {
4764 sum_rdc.rate += this_rdc.rate;
4765 sum_rdc.dist += this_rdc.dist;
4766 sum_rdc.rdcost += this_rdc.rdcost;
4767 }
4768 }
4769
4770 if (sum_rdc.rdcost < best_rdc.rdcost) {
4771 best_rdc = sum_rdc;
4772 pc_tree->partitioning = PARTITION_SPLIT;
4773 } else {
4774 // skip rectangular partition test when larger block size
4775 // gives better rd cost
4776 if (sf->less_rectangular_check) do_rect &= !partition_none_allowed;
4777 }
4778 }
4779
4780 // PARTITION_HORZ
4781 if (partition_horz_allowed && do_rect) {
4782 subsize = get_subsize(bsize, PARTITION_HORZ);
4783 load_pred_mv(x, ctx);
4784 pc_tree->horizontal[0].pred_pixel_ready = 1;
4785 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
4786 &pc_tree->horizontal[0]);
4787
4788 pc_tree->horizontal[0].mic = *xd->mi[0];
4789 pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
4790 pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
4791 pc_tree->horizontal[0].skip = x->skip;
4792
4793 if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
4794 load_pred_mv(x, ctx);
4795 pc_tree->horizontal[1].pred_pixel_ready = 1;
4796 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc,
4797 subsize, &pc_tree->horizontal[1]);
4798
4799 pc_tree->horizontal[1].mic = *xd->mi[0];
4800 pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
4801 pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
4802 pc_tree->horizontal[1].skip = x->skip;
4803
4804 if (this_rdc.rate == INT_MAX) {
4805 vp9_rd_cost_reset(&sum_rdc);
4806 } else {
4807 int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
4808 this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
4809 sum_rdc.rate += this_rdc.rate;
4810 sum_rdc.dist += this_rdc.dist;
4811 sum_rdc.rdcost =
4812 RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
4813 }
4814 }
4815
4816 if (sum_rdc.rdcost < best_rdc.rdcost) {
4817 best_rdc = sum_rdc;
4818 pc_tree->partitioning = PARTITION_HORZ;
4819 } else {
4820 pred_pixel_ready_reset(pc_tree, bsize);
4821 }
4822 }
4823
4824 // PARTITION_VERT
4825 if (partition_vert_allowed && do_rect) {
4826 subsize = get_subsize(bsize, PARTITION_VERT);
4827 load_pred_mv(x, ctx);
4828 pc_tree->vertical[0].pred_pixel_ready = 1;
4829 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
4830 &pc_tree->vertical[0]);
4831 pc_tree->vertical[0].mic = *xd->mi[0];
4832 pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
4833 pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
4834 pc_tree->vertical[0].skip = x->skip;
4835
4836 if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
4837 load_pred_mv(x, ctx);
4838 pc_tree->vertical[1].pred_pixel_ready = 1;
4839 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc,
4840 subsize, &pc_tree->vertical[1]);
4841 pc_tree->vertical[1].mic = *xd->mi[0];
4842 pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
4843 pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
4844 pc_tree->vertical[1].skip = x->skip;
4845
4846 if (this_rdc.rate == INT_MAX) {
4847 vp9_rd_cost_reset(&sum_rdc);
4848 } else {
4849 int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
4850 sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
4851 sum_rdc.rate += this_rdc.rate;
4852 sum_rdc.dist += this_rdc.dist;
4853 sum_rdc.rdcost =
4854 RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
4855 }
4856 }
4857
4858 if (sum_rdc.rdcost < best_rdc.rdcost) {
4859 best_rdc = sum_rdc;
4860 pc_tree->partitioning = PARTITION_VERT;
4861 } else {
4862 pred_pixel_ready_reset(pc_tree, bsize);
4863 }
4864 }
4865
4866 *rd_cost = best_rdc;
4867
4868 if (best_rdc.rate == INT_MAX) {
4869 vp9_rd_cost_reset(rd_cost);
4870 return;
4871 }
4872
4873 // update mode info array
4874 fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree);
4875
4876 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) {
4877 int output_enabled = (bsize == BLOCK_64X64);
4878 encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
4879 pc_tree);
4880 }
4881
4882 if (bsize == BLOCK_64X64 && do_recon) {
4883 assert(tp_orig < *tp);
4884 assert(best_rdc.rate < INT_MAX);
4885 assert(best_rdc.dist < INT64_MAX);
4886 } else {
4887 assert(tp_orig == *tp);
4888 }
4889 }
4890
nonrd_select_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MODE_INFO ** mi,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int output_enabled,RD_COST * rd_cost,PC_TREE * pc_tree)4891 static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
4892 TileDataEnc *tile_data, MODE_INFO **mi,
4893 TOKENEXTRA **tp, int mi_row, int mi_col,
4894 BLOCK_SIZE bsize, int output_enabled,
4895 RD_COST *rd_cost, PC_TREE *pc_tree) {
4896 VP9_COMMON *const cm = &cpi->common;
4897 TileInfo *const tile_info = &tile_data->tile_info;
4898 MACROBLOCK *const x = &td->mb;
4899 MACROBLOCKD *const xd = &x->e_mbd;
4900 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
4901 const int mis = cm->mi_stride;
4902 PARTITION_TYPE partition;
4903 BLOCK_SIZE subsize;
4904 RD_COST this_rdc;
4905 BLOCK_SIZE subsize_ref =
4906 (cpi->sf.adapt_partition_source_sad) ? BLOCK_8X8 : BLOCK_16X16;
4907
4908 vp9_rd_cost_reset(&this_rdc);
4909 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
4910
4911 subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
4912 partition = partition_lookup[bsl][subsize];
4913
4914 if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) {
4915 x->max_partition_size = BLOCK_32X32;
4916 x->min_partition_size = BLOCK_16X16;
4917 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
4918 0, INT64_MAX, pc_tree);
4919 } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
4920 subsize >= subsize_ref) {
4921 x->max_partition_size = BLOCK_32X32;
4922 x->min_partition_size = BLOCK_8X8;
4923 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
4924 0, INT64_MAX, pc_tree);
4925 } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) {
4926 x->max_partition_size = BLOCK_16X16;
4927 x->min_partition_size = BLOCK_8X8;
4928 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
4929 0, INT64_MAX, pc_tree);
4930 } else {
4931 switch (partition) {
4932 case PARTITION_NONE:
4933 pc_tree->none.pred_pixel_ready = 1;
4934 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
4935 &pc_tree->none);
4936 pc_tree->none.mic = *xd->mi[0];
4937 pc_tree->none.mbmi_ext = *x->mbmi_ext;
4938 pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
4939 pc_tree->none.skip = x->skip;
4940 break;
4941 case PARTITION_VERT:
4942 pc_tree->vertical[0].pred_pixel_ready = 1;
4943 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
4944 &pc_tree->vertical[0]);
4945 pc_tree->vertical[0].mic = *xd->mi[0];
4946 pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
4947 pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
4948 pc_tree->vertical[0].skip = x->skip;
4949 if (mi_col + hbs < cm->mi_cols) {
4950 pc_tree->vertical[1].pred_pixel_ready = 1;
4951 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
4952 &this_rdc, subsize, &pc_tree->vertical[1]);
4953 pc_tree->vertical[1].mic = *xd->mi[0];
4954 pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
4955 pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
4956 pc_tree->vertical[1].skip = x->skip;
4957 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
4958 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
4959 rd_cost->rate += this_rdc.rate;
4960 rd_cost->dist += this_rdc.dist;
4961 }
4962 }
4963 break;
4964 case PARTITION_HORZ:
4965 pc_tree->horizontal[0].pred_pixel_ready = 1;
4966 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
4967 &pc_tree->horizontal[0]);
4968 pc_tree->horizontal[0].mic = *xd->mi[0];
4969 pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
4970 pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
4971 pc_tree->horizontal[0].skip = x->skip;
4972 if (mi_row + hbs < cm->mi_rows) {
4973 pc_tree->horizontal[1].pred_pixel_ready = 1;
4974 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
4975 &this_rdc, subsize, &pc_tree->horizontal[1]);
4976 pc_tree->horizontal[1].mic = *xd->mi[0];
4977 pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
4978 pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
4979 pc_tree->horizontal[1].skip = x->skip;
4980 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
4981 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
4982 rd_cost->rate += this_rdc.rate;
4983 rd_cost->dist += this_rdc.dist;
4984 }
4985 }
4986 break;
4987 default:
4988 assert(partition == PARTITION_SPLIT);
4989 subsize = get_subsize(bsize, PARTITION_SPLIT);
4990 nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
4991 subsize, output_enabled, rd_cost,
4992 pc_tree->split[0]);
4993 nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
4994 mi_col + hbs, subsize, output_enabled, &this_rdc,
4995 pc_tree->split[1]);
4996 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
4997 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
4998 rd_cost->rate += this_rdc.rate;
4999 rd_cost->dist += this_rdc.dist;
5000 }
5001 nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp,
5002 mi_row + hbs, mi_col, subsize, output_enabled,
5003 &this_rdc, pc_tree->split[2]);
5004 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
5005 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
5006 rd_cost->rate += this_rdc.rate;
5007 rd_cost->dist += this_rdc.dist;
5008 }
5009 nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
5010 mi_row + hbs, mi_col + hbs, subsize,
5011 output_enabled, &this_rdc, pc_tree->split[3]);
5012 if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
5013 rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
5014 rd_cost->rate += this_rdc.rate;
5015 rd_cost->dist += this_rdc.dist;
5016 }
5017 break;
5018 }
5019 }
5020
5021 if (bsize == BLOCK_64X64 && output_enabled)
5022 encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree);
5023 }
5024
nonrd_use_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MODE_INFO ** mi,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int output_enabled,RD_COST * dummy_cost,PC_TREE * pc_tree)5025 static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
5026 TileDataEnc *tile_data, MODE_INFO **mi,
5027 TOKENEXTRA **tp, int mi_row, int mi_col,
5028 BLOCK_SIZE bsize, int output_enabled,
5029 RD_COST *dummy_cost, PC_TREE *pc_tree) {
5030 VP9_COMMON *const cm = &cpi->common;
5031 TileInfo *tile_info = &tile_data->tile_info;
5032 MACROBLOCK *const x = &td->mb;
5033 MACROBLOCKD *const xd = &x->e_mbd;
5034 const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
5035 const int mis = cm->mi_stride;
5036 PARTITION_TYPE partition;
5037 BLOCK_SIZE subsize;
5038
5039 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
5040
5041 subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
5042 partition = partition_lookup[bsl][subsize];
5043
5044 if (output_enabled && bsize != BLOCK_4X4) {
5045 int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
5046 td->counts->partition[ctx][partition]++;
5047 }
5048
5049 switch (partition) {
5050 case PARTITION_NONE:
5051 pc_tree->none.pred_pixel_ready = 1;
5052 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
5053 subsize, &pc_tree->none);
5054 pc_tree->none.mic = *xd->mi[0];
5055 pc_tree->none.mbmi_ext = *x->mbmi_ext;
5056 pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
5057 pc_tree->none.skip = x->skip;
5058 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
5059 subsize, &pc_tree->none);
5060 break;
5061 case PARTITION_VERT:
5062 pc_tree->vertical[0].pred_pixel_ready = 1;
5063 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
5064 subsize, &pc_tree->vertical[0]);
5065 pc_tree->vertical[0].mic = *xd->mi[0];
5066 pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
5067 pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
5068 pc_tree->vertical[0].skip = x->skip;
5069 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
5070 subsize, &pc_tree->vertical[0]);
5071 if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
5072 pc_tree->vertical[1].pred_pixel_ready = 1;
5073 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost,
5074 subsize, &pc_tree->vertical[1]);
5075 pc_tree->vertical[1].mic = *xd->mi[0];
5076 pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
5077 pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
5078 pc_tree->vertical[1].skip = x->skip;
5079 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs,
5080 output_enabled, subsize, &pc_tree->vertical[1]);
5081 }
5082 break;
5083 case PARTITION_HORZ:
5084 pc_tree->horizontal[0].pred_pixel_ready = 1;
5085 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
5086 subsize, &pc_tree->horizontal[0]);
5087 pc_tree->horizontal[0].mic = *xd->mi[0];
5088 pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
5089 pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
5090 pc_tree->horizontal[0].skip = x->skip;
5091 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
5092 subsize, &pc_tree->horizontal[0]);
5093
5094 if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
5095 pc_tree->horizontal[1].pred_pixel_ready = 1;
5096 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost,
5097 subsize, &pc_tree->horizontal[1]);
5098 pc_tree->horizontal[1].mic = *xd->mi[0];
5099 pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
5100 pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
5101 pc_tree->horizontal[1].skip = x->skip;
5102 encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col,
5103 output_enabled, subsize, &pc_tree->horizontal[1]);
5104 }
5105 break;
5106 default:
5107 assert(partition == PARTITION_SPLIT);
5108 subsize = get_subsize(bsize, PARTITION_SPLIT);
5109 if (bsize == BLOCK_8X8) {
5110 nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
5111 subsize, pc_tree->leaf_split[0]);
5112 encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
5113 subsize, pc_tree->leaf_split[0]);
5114 } else {
5115 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize,
5116 output_enabled, dummy_cost, pc_tree->split[0]);
5117 nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
5118 mi_col + hbs, subsize, output_enabled, dummy_cost,
5119 pc_tree->split[1]);
5120 nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp,
5121 mi_row + hbs, mi_col, subsize, output_enabled,
5122 dummy_cost, pc_tree->split[2]);
5123 nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
5124 mi_row + hbs, mi_col + hbs, subsize, output_enabled,
5125 dummy_cost, pc_tree->split[3]);
5126 }
5127 break;
5128 }
5129
5130 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
5131 update_partition_context(xd, mi_row, mi_col, subsize, bsize);
5132 }
5133
5134 #if CONFIG_ML_VAR_PARTITION
5135 // Get a prediction(stored in x->est_pred) for the whole 64x64 superblock.
get_estimated_pred(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)5136 static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile,
5137 MACROBLOCK *x, int mi_row, int mi_col) {
5138 VP9_COMMON *const cm = &cpi->common;
5139 const int is_key_frame = frame_is_intra_only(cm);
5140
5141 set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
5142
5143 if (!is_key_frame) {
5144 MACROBLOCKD *xd = &x->e_mbd;
5145 MODE_INFO *mi = xd->mi[0];
5146 YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
5147 const YV12_BUFFER_CONFIG *yv12_g = NULL;
5148 const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
5149 (mi_row + 4 < cm->mi_rows);
5150 int pixels_wide = 64, pixels_high = 64;
5151 unsigned int y_sad_g, y_sad_thr;
5152 unsigned int y_sad = UINT_MAX;
5153
5154 assert(yv12 != NULL);
5155
5156 if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
5157 if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
5158
5159 if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
5160 cpi->svc.use_gf_temporal_ref_current_layer) {
5161 // For now, GOLDEN will not be used for non-zero spatial layers, since
5162 // it may not be a temporal reference.
5163 yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
5164 }
5165
5166 // Only compute y_sad_g (sad for golden reference) for speed < 8.
5167 if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
5168 (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
5169 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
5170 &cm->frame_refs[GOLDEN_FRAME - 1].sf);
5171 y_sad_g = cpi->fn_ptr[bsize].sdf(
5172 x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
5173 xd->plane[0].pre[0].stride);
5174 } else {
5175 y_sad_g = UINT_MAX;
5176 }
5177
5178 if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
5179 cpi->rc.is_src_frame_alt_ref) {
5180 yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
5181 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
5182 &cm->frame_refs[ALTREF_FRAME - 1].sf);
5183 mi->ref_frame[0] = ALTREF_FRAME;
5184 y_sad_g = UINT_MAX;
5185 } else {
5186 vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
5187 &cm->frame_refs[LAST_FRAME - 1].sf);
5188 mi->ref_frame[0] = LAST_FRAME;
5189 }
5190 mi->ref_frame[1] = NONE;
5191 mi->sb_type = BLOCK_64X64;
5192 mi->mv[0].as_int = 0;
5193 mi->interp_filter = BILINEAR;
5194
5195 {
5196 const MV dummy_mv = { 0, 0 };
5197 y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
5198 &dummy_mv);
5199 x->sb_use_mv_part = 1;
5200 x->sb_mvcol_part = mi->mv[0].as_mv.col;
5201 x->sb_mvrow_part = mi->mv[0].as_mv.row;
5202 }
5203
5204 // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
5205 // are close if short_circuit_low_temp_var is on.
5206 y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
5207 if (y_sad_g < y_sad_thr) {
5208 vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
5209 &cm->frame_refs[GOLDEN_FRAME - 1].sf);
5210 mi->ref_frame[0] = GOLDEN_FRAME;
5211 mi->mv[0].as_int = 0;
5212 y_sad = y_sad_g;
5213 } else {
5214 x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
5215 }
5216
5217 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
5218 xd->plane[0].dst.buf = x->est_pred;
5219 xd->plane[0].dst.stride = 64;
5220 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
5221 } else {
5222 #if CONFIG_VP9_HIGHBITDEPTH
5223 switch (xd->bd) {
5224 case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
5225 case 10:
5226 memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
5227 break;
5228 case 12:
5229 memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
5230 break;
5231 }
5232 #else
5233 memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
5234 #endif // CONFIG_VP9_HIGHBITDEPTH
5235 }
5236 }
5237 #endif // CONFIG_ML_VAR_PARTITION
5238
encode_nonrd_sb_row(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp)5239 static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
5240 TileDataEnc *tile_data, int mi_row,
5241 TOKENEXTRA **tp) {
5242 SPEED_FEATURES *const sf = &cpi->sf;
5243 VP9_COMMON *const cm = &cpi->common;
5244 TileInfo *const tile_info = &tile_data->tile_info;
5245 MACROBLOCK *const x = &td->mb;
5246 MACROBLOCKD *const xd = &x->e_mbd;
5247 const int mi_col_start = tile_info->mi_col_start;
5248 const int mi_col_end = tile_info->mi_col_end;
5249 int mi_col;
5250 const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
5251 const int num_sb_cols =
5252 get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
5253 int sb_col_in_tile;
5254
5255 // Initialize the left context for the new SB row
5256 memset(&xd->left_context, 0, sizeof(xd->left_context));
5257 memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
5258
5259 // Code each SB in the row
5260 for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
5261 mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) {
5262 const struct segmentation *const seg = &cm->seg;
5263 RD_COST dummy_rdc;
5264 const int idx_str = cm->mi_stride * mi_row + mi_col;
5265 MODE_INFO **mi = cm->mi_grid_visible + idx_str;
5266 PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
5267 BLOCK_SIZE bsize = BLOCK_64X64;
5268 int seg_skip = 0;
5269 int i;
5270
5271 (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
5272 sb_col_in_tile);
5273
5274 if (cpi->use_skin_detection) {
5275 vp9_compute_skin_sb(cpi, BLOCK_16X16, mi_row, mi_col);
5276 }
5277
5278 x->source_variance = UINT_MAX;
5279 for (i = 0; i < MAX_REF_FRAMES; ++i) {
5280 x->pred_mv[i].row = INT16_MAX;
5281 x->pred_mv[i].col = INT16_MAX;
5282 }
5283 vp9_rd_cost_init(&dummy_rdc);
5284 x->color_sensitivity[0] = 0;
5285 x->color_sensitivity[1] = 0;
5286 x->sb_is_skin = 0;
5287 x->skip_low_source_sad = 0;
5288 x->lowvar_highsumdiff = 0;
5289 x->content_state_sb = 0;
5290 x->zero_temp_sad_source = 0;
5291 x->sb_use_mv_part = 0;
5292 x->sb_mvcol_part = 0;
5293 x->sb_mvrow_part = 0;
5294 x->sb_pickmode_part = 0;
5295 x->arf_frame_usage = 0;
5296 x->lastgolden_frame_usage = 0;
5297
5298 if (seg->enabled) {
5299 const uint8_t *const map =
5300 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
5301 int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
5302 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
5303 if (seg_skip) {
5304 partition_search_type = FIXED_PARTITION;
5305 }
5306 }
5307
5308 if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) {
5309 int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3);
5310 int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
5311 int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2);
5312 if (sf->adapt_partition_source_sad &&
5313 (cpi->oxcf.rc_mode == VPX_VBR && !cpi->rc.is_src_frame_alt_ref &&
5314 source_sad > sf->adapt_partition_thresh &&
5315 (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)))
5316 partition_search_type = REFERENCE_PARTITION;
5317 }
5318
5319 // Set the partition type of the 64X64 block
5320 switch (partition_search_type) {
5321 case VAR_BASED_PARTITION:
5322 // TODO(jingning, marpan): The mode decision and encoding process
5323 // support both intra and inter sub8x8 block coding for RTC mode.
5324 // Tune the thresholds accordingly to use sub8x8 block coding for
5325 // coding performance improvement.
5326 choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
5327 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
5328 BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
5329 break;
5330 #if CONFIG_ML_VAR_PARTITION
5331 case ML_BASED_PARTITION:
5332 get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
5333 x->max_partition_size = BLOCK_64X64;
5334 x->min_partition_size = BLOCK_8X8;
5335 x->sb_pickmode_part = 1;
5336 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
5337 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
5338 td->pc_root);
5339 break;
5340 #endif // CONFIG_ML_VAR_PARTITION
5341 case SOURCE_VAR_BASED_PARTITION:
5342 set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
5343 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
5344 BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
5345 break;
5346 case FIXED_PARTITION:
5347 if (!seg_skip) bsize = sf->always_this_block_size;
5348 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
5349 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
5350 BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
5351 break;
5352 default:
5353 assert(partition_search_type == REFERENCE_PARTITION);
5354 x->sb_pickmode_part = 1;
5355 set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
5356 // Use nonrd_pick_partition on scene-cut for VBR mode.
5357 // nonrd_pick_partition does not support 4x4 partition, so avoid it
5358 // on key frame for now.
5359 if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad &&
5360 cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) &&
5361 (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
5362 // Use lower max_partition_size for low resoultions.
5363 if (cm->width <= 352 && cm->height <= 288)
5364 x->max_partition_size = BLOCK_32X32;
5365 else
5366 x->max_partition_size = BLOCK_64X64;
5367 x->min_partition_size = BLOCK_8X8;
5368 nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
5369 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
5370 td->pc_root);
5371 } else {
5372 choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
5373 // TODO(marpan): Seems like nonrd_select_partition does not support
5374 // 4x4 partition. Since 4x4 is used on key frame, use this switch
5375 // for now.
5376 if (frame_is_intra_only(cm))
5377 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
5378 BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
5379 else
5380 nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
5381 BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
5382 }
5383
5384 break;
5385 }
5386
5387 // Update ref_frame usage for inter frame if this group is ARF group.
5388 if (!cpi->rc.is_src_frame_alt_ref && !cpi->refresh_golden_frame &&
5389 !cpi->refresh_alt_ref_frame && cpi->rc.alt_ref_gf_group &&
5390 cpi->sf.use_altref_onepass) {
5391 int sboffset = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
5392 if (cpi->count_arf_frame_usage != NULL)
5393 cpi->count_arf_frame_usage[sboffset] = x->arf_frame_usage;
5394 if (cpi->count_lastgolden_frame_usage != NULL)
5395 cpi->count_lastgolden_frame_usage[sboffset] = x->lastgolden_frame_usage;
5396 }
5397
5398 (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
5399 sb_col_in_tile, num_sb_cols);
5400 }
5401 }
5402 // end RTC play code
5403
variance(const diff * const d)5404 static INLINE uint32_t variance(const diff *const d) {
5405 return d->sse - (uint32_t)(((int64_t)d->sum * d->sum) >> 8);
5406 }
5407
5408 #if CONFIG_VP9_HIGHBITDEPTH
variance_highbd(diff * const d)5409 static INLINE uint32_t variance_highbd(diff *const d) {
5410 const int64_t var = (int64_t)d->sse - (((int64_t)d->sum * d->sum) >> 8);
5411 return (var >= 0) ? (uint32_t)var : 0;
5412 }
5413 #endif // CONFIG_VP9_HIGHBITDEPTH
5414
set_var_thresh_from_histogram(VP9_COMP * cpi)5415 static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
5416 const SPEED_FEATURES *const sf = &cpi->sf;
5417 const VP9_COMMON *const cm = &cpi->common;
5418
5419 const uint8_t *src = cpi->Source->y_buffer;
5420 const uint8_t *last_src = cpi->Last_Source->y_buffer;
5421 const int src_stride = cpi->Source->y_stride;
5422 const int last_stride = cpi->Last_Source->y_stride;
5423
5424 // Pick cutoff threshold
5425 const int cutoff = (VPXMIN(cm->width, cm->height) >= 720)
5426 ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100)
5427 : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
5428 DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]);
5429 diff *var16 = cpi->source_diff_var;
5430
5431 int sum = 0;
5432 int i, j;
5433
5434 memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
5435
5436 for (i = 0; i < cm->mb_rows; i++) {
5437 for (j = 0; j < cm->mb_cols; j++) {
5438 #if CONFIG_VP9_HIGHBITDEPTH
5439 if (cm->use_highbitdepth) {
5440 switch (cm->bit_depth) {
5441 case VPX_BITS_8:
5442 vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
5443 &var16->sse, &var16->sum);
5444 var16->var = variance(var16);
5445 break;
5446 case VPX_BITS_10:
5447 vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
5448 &var16->sse, &var16->sum);
5449 var16->var = variance_highbd(var16);
5450 break;
5451 default:
5452 assert(cm->bit_depth == VPX_BITS_12);
5453 vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
5454 &var16->sse, &var16->sum);
5455 var16->var = variance_highbd(var16);
5456 break;
5457 }
5458 } else {
5459 vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
5460 &var16->sum);
5461 var16->var = variance(var16);
5462 }
5463 #else
5464 vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
5465 &var16->sum);
5466 var16->var = variance(var16);
5467 #endif // CONFIG_VP9_HIGHBITDEPTH
5468
5469 if (var16->var >= VAR_HIST_MAX_BG_VAR)
5470 hist[VAR_HIST_BINS - 1]++;
5471 else
5472 hist[var16->var / VAR_HIST_FACTOR]++;
5473
5474 src += 16;
5475 last_src += 16;
5476 var16++;
5477 }
5478
5479 src = src - cm->mb_cols * 16 + 16 * src_stride;
5480 last_src = last_src - cm->mb_cols * 16 + 16 * last_stride;
5481 }
5482
5483 cpi->source_var_thresh = 0;
5484
5485 if (hist[VAR_HIST_BINS - 1] < cutoff) {
5486 for (i = 0; i < VAR_HIST_BINS - 1; i++) {
5487 sum += hist[i];
5488
5489 if (sum > cutoff) {
5490 cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR;
5491 return 0;
5492 }
5493 }
5494 }
5495
5496 return sf->search_type_check_frequency;
5497 }
5498
source_var_based_partition_search_method(VP9_COMP * cpi)5499 static void source_var_based_partition_search_method(VP9_COMP *cpi) {
5500 VP9_COMMON *const cm = &cpi->common;
5501 SPEED_FEATURES *const sf = &cpi->sf;
5502
5503 if (cm->frame_type == KEY_FRAME) {
5504 // For key frame, use SEARCH_PARTITION.
5505 sf->partition_search_type = SEARCH_PARTITION;
5506 } else if (cm->intra_only) {
5507 sf->partition_search_type = FIXED_PARTITION;
5508 } else {
5509 if (cm->last_width != cm->width || cm->last_height != cm->height) {
5510 if (cpi->source_diff_var) vpx_free(cpi->source_diff_var);
5511
5512 CHECK_MEM_ERROR(cm, cpi->source_diff_var,
5513 vpx_calloc(cm->MBs, sizeof(diff)));
5514 }
5515
5516 if (!cpi->frames_till_next_var_check)
5517 cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi);
5518
5519 if (cpi->frames_till_next_var_check > 0) {
5520 sf->partition_search_type = FIXED_PARTITION;
5521 cpi->frames_till_next_var_check--;
5522 }
5523 }
5524 }
5525
get_skip_encode_frame(const VP9_COMMON * cm,ThreadData * const td)5526 static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) {
5527 unsigned int intra_count = 0, inter_count = 0;
5528 int j;
5529
5530 for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
5531 intra_count += td->counts->intra_inter[j][0];
5532 inter_count += td->counts->intra_inter[j][1];
5533 }
5534
5535 return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME &&
5536 cm->show_frame;
5537 }
5538
vp9_init_tile_data(VP9_COMP * cpi)5539 void vp9_init_tile_data(VP9_COMP *cpi) {
5540 VP9_COMMON *const cm = &cpi->common;
5541 const int tile_cols = 1 << cm->log2_tile_cols;
5542 const int tile_rows = 1 << cm->log2_tile_rows;
5543 int tile_col, tile_row;
5544 TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
5545 TOKENLIST *tplist = cpi->tplist[0][0];
5546 int tile_tok = 0;
5547 int tplist_count = 0;
5548
5549 if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
5550 if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
5551 CHECK_MEM_ERROR(
5552 cm, cpi->tile_data,
5553 vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
5554 cpi->allocated_tiles = tile_cols * tile_rows;
5555
5556 for (tile_row = 0; tile_row < tile_rows; ++tile_row)
5557 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
5558 TileDataEnc *tile_data =
5559 &cpi->tile_data[tile_row * tile_cols + tile_col];
5560 int i, j;
5561 for (i = 0; i < BLOCK_SIZES; ++i) {
5562 for (j = 0; j < MAX_MODES; ++j) {
5563 tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
5564 #if CONFIG_CONSISTENT_RECODE
5565 tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;
5566 #endif
5567 tile_data->mode_map[i][j] = j;
5568 }
5569 }
5570 #if CONFIG_MULTITHREAD
5571 tile_data->row_base_thresh_freq_fact = NULL;
5572 #endif
5573 }
5574 }
5575
5576 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
5577 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
5578 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
5579 TileInfo *tile_info = &this_tile->tile_info;
5580 if (cpi->sf.adaptive_rd_thresh_row_mt &&
5581 this_tile->row_base_thresh_freq_fact == NULL)
5582 vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
5583 vp9_tile_init(tile_info, cm, tile_row, tile_col);
5584
5585 cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
5586 pre_tok = cpi->tile_tok[tile_row][tile_col];
5587 tile_tok = allocated_tokens(*tile_info);
5588
5589 cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
5590 tplist = cpi->tplist[tile_row][tile_col];
5591 tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
5592 }
5593 }
5594 }
5595
vp9_encode_sb_row(VP9_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)5596 void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row,
5597 int tile_col, int mi_row) {
5598 VP9_COMMON *const cm = &cpi->common;
5599 const int tile_cols = 1 << cm->log2_tile_cols;
5600 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
5601 const TileInfo *const tile_info = &this_tile->tile_info;
5602 TOKENEXTRA *tok = NULL;
5603 int tile_sb_row;
5604 int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1;
5605
5606 tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >>
5607 MI_BLOCK_SIZE_LOG2;
5608 get_start_tok(cpi, tile_row, tile_col, mi_row, &tok);
5609 cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok;
5610
5611 if (cpi->sf.use_nonrd_pick_mode)
5612 encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
5613 else
5614 encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
5615
5616 cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok;
5617 cpi->tplist[tile_row][tile_col][tile_sb_row].count =
5618 (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop -
5619 cpi->tplist[tile_row][tile_col][tile_sb_row].start);
5620 assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <=
5621 get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols));
5622
5623 (void)tile_mb_cols;
5624 }
5625
vp9_encode_tile(VP9_COMP * cpi,ThreadData * td,int tile_row,int tile_col)5626 void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
5627 int tile_col) {
5628 VP9_COMMON *const cm = &cpi->common;
5629 const int tile_cols = 1 << cm->log2_tile_cols;
5630 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
5631 const TileInfo *const tile_info = &this_tile->tile_info;
5632 const int mi_row_start = tile_info->mi_row_start;
5633 const int mi_row_end = tile_info->mi_row_end;
5634 int mi_row;
5635
5636 for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
5637 vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
5638 }
5639
encode_tiles(VP9_COMP * cpi)5640 static void encode_tiles(VP9_COMP *cpi) {
5641 VP9_COMMON *const cm = &cpi->common;
5642 const int tile_cols = 1 << cm->log2_tile_cols;
5643 const int tile_rows = 1 << cm->log2_tile_rows;
5644 int tile_col, tile_row;
5645
5646 vp9_init_tile_data(cpi);
5647
5648 for (tile_row = 0; tile_row < tile_rows; ++tile_row)
5649 for (tile_col = 0; tile_col < tile_cols; ++tile_col)
5650 vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);
5651 }
5652
5653 #if CONFIG_FP_MB_STATS
input_fpmb_stats(FIRSTPASS_MB_STATS * firstpass_mb_stats,VP9_COMMON * cm,uint8_t ** this_frame_mb_stats)5654 static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
5655 VP9_COMMON *cm, uint8_t **this_frame_mb_stats) {
5656 uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
5657 cm->current_video_frame * cm->MBs * sizeof(uint8_t);
5658
5659 if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
5660
5661 *this_frame_mb_stats = mb_stats_in;
5662
5663 return 1;
5664 }
5665 #endif
5666
encode_frame_internal(VP9_COMP * cpi)5667 static void encode_frame_internal(VP9_COMP *cpi) {
5668 SPEED_FEATURES *const sf = &cpi->sf;
5669 ThreadData *const td = &cpi->td;
5670 MACROBLOCK *const x = &td->mb;
5671 VP9_COMMON *const cm = &cpi->common;
5672 MACROBLOCKD *const xd = &x->e_mbd;
5673 const int gf_group_index = cpi->twopass.gf_group.index;
5674
5675 xd->mi = cm->mi_grid_visible;
5676 xd->mi[0] = cm->mi;
5677 vp9_zero(*td->counts);
5678 vp9_zero(cpi->td.rd_counts);
5679
5680 xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 &&
5681 cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
5682
5683 #if CONFIG_VP9_HIGHBITDEPTH
5684 if (cm->use_highbitdepth)
5685 x->fwd_txfm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
5686 else
5687 x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
5688 x->highbd_inv_txfm_add =
5689 xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
5690 #else
5691 x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
5692 #endif // CONFIG_VP9_HIGHBITDEPTH
5693 x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
5694 #if CONFIG_CONSISTENT_RECODE
5695 x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1;
5696 #endif
5697 if (xd->lossless) x->optimize = 0;
5698 x->sharpness = cpi->oxcf.sharpness;
5699 x->adjust_rdmult_by_segment = (cpi->oxcf.aq_mode == VARIANCE_AQ);
5700
5701 cm->tx_mode = select_tx_mode(cpi, xd);
5702
5703 vp9_frame_init_quantizer(cpi);
5704
5705 vp9_initialize_rd_consts(cpi);
5706 vp9_initialize_me_consts(cpi, x, cm->base_qindex);
5707 init_encode_frame_mb_context(cpi);
5708 cm->use_prev_frame_mvs =
5709 !cm->error_resilient_mode && cm->width == cm->last_width &&
5710 cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame;
5711 // Special case: set prev_mi to NULL when the previous mode info
5712 // context cannot be used.
5713 cm->prev_mi =
5714 cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL;
5715
5716 x->quant_fp = cpi->sf.use_quant_fp;
5717 vp9_zero(x->skip_txfm);
5718 if (sf->use_nonrd_pick_mode) {
5719 // Initialize internal buffer pointers for rtc coding, where non-RD
5720 // mode decision is used and hence no buffer pointer swap needed.
5721 int i;
5722 struct macroblock_plane *const p = x->plane;
5723 struct macroblockd_plane *const pd = xd->plane;
5724 PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none;
5725
5726 for (i = 0; i < MAX_MB_PLANE; ++i) {
5727 p[i].coeff = ctx->coeff_pbuf[i][0];
5728 p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
5729 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
5730 p[i].eobs = ctx->eobs_pbuf[i][0];
5731 }
5732 vp9_zero(x->zcoeff_blk);
5733
5734 if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 &&
5735 !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) &&
5736 !cpi->use_svc)
5737 cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
5738
5739 if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
5740 source_var_based_partition_search_method(cpi);
5741 } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE &&
5742 cpi->sf.enable_tpl_model) {
5743 TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
5744 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
5745
5746 int tpl_stride = tpl_frame->stride;
5747 int64_t intra_cost_base = 0;
5748 int64_t mc_dep_cost_base = 0;
5749 int row, col;
5750
5751 for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
5752 for (col = 0; col < cm->mi_cols; ++col) {
5753 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
5754 intra_cost_base += this_stats->intra_cost;
5755 mc_dep_cost_base += this_stats->mc_dep_cost;
5756 }
5757 }
5758
5759 vpx_clear_system_state();
5760
5761 if (tpl_frame->is_valid)
5762 cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
5763 }
5764
5765 {
5766 struct vpx_usec_timer emr_timer;
5767 vpx_usec_timer_start(&emr_timer);
5768
5769 #if CONFIG_FP_MB_STATS
5770 if (cpi->use_fp_mb_stats) {
5771 input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
5772 &cpi->twopass.this_frame_mb_stats);
5773 }
5774 #endif
5775
5776 if (!cpi->row_mt) {
5777 cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;
5778 cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;
5779 // If allowed, encoding tiles in parallel with one thread handling one
5780 // tile when row based multi-threading is disabled.
5781 if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
5782 vp9_encode_tiles_mt(cpi);
5783 else
5784 encode_tiles(cpi);
5785 } else {
5786 cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;
5787 cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;
5788 vp9_encode_tiles_row_mt(cpi);
5789 }
5790
5791 vpx_usec_timer_mark(&emr_timer);
5792 cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
5793 }
5794
5795 sf->skip_encode_frame =
5796 sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0;
5797
5798 #if 0
5799 // Keep record of the total distortion this time around for future use
5800 cpi->last_frame_distortion = cpi->frame_distortion;
5801 #endif
5802 }
5803
get_interp_filter(const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS],int is_alt_ref)5804 static INTERP_FILTER get_interp_filter(
5805 const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
5806 if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
5807 threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
5808 threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
5809 return EIGHTTAP_SMOOTH;
5810 } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] &&
5811 threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) {
5812 return EIGHTTAP_SHARP;
5813 } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) {
5814 return EIGHTTAP;
5815 } else {
5816 return SWITCHABLE;
5817 }
5818 }
5819
compute_frame_aq_offset(struct VP9_COMP * cpi)5820 static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
5821 VP9_COMMON *const cm = &cpi->common;
5822 MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
5823 struct segmentation *const seg = &cm->seg;
5824
5825 int mi_row, mi_col;
5826 int sum_delta = 0;
5827 int map_index = 0;
5828 int qdelta_index;
5829 int segment_id;
5830
5831 for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
5832 MODE_INFO **mi_8x8 = mi_8x8_ptr;
5833 for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) {
5834 segment_id = mi_8x8[0]->segment_id;
5835 qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
5836 sum_delta += qdelta_index;
5837 map_index++;
5838 }
5839 mi_8x8_ptr += cm->mi_stride;
5840 }
5841
5842 return sum_delta / (cm->mi_rows * cm->mi_cols);
5843 }
5844
5845 #if CONFIG_CONSISTENT_RECODE
restore_encode_params(VP9_COMP * cpi)5846 static void restore_encode_params(VP9_COMP *cpi) {
5847 VP9_COMMON *const cm = &cpi->common;
5848 const int tile_cols = 1 << cm->log2_tile_cols;
5849 const int tile_rows = 1 << cm->log2_tile_rows;
5850 int tile_col, tile_row;
5851 int i, j;
5852 RD_OPT *rd_opt = &cpi->rd;
5853 for (i = 0; i < MAX_REF_FRAMES; i++) {
5854 for (j = 0; j < REFERENCE_MODES; j++)
5855 rd_opt->prediction_type_threshes[i][j] =
5856 rd_opt->prediction_type_threshes_prev[i][j];
5857
5858 for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
5859 rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j];
5860 }
5861
5862 if (cpi->tile_data != NULL) {
5863 for (tile_row = 0; tile_row < tile_rows; ++tile_row)
5864 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
5865 TileDataEnc *tile_data =
5866 &cpi->tile_data[tile_row * tile_cols + tile_col];
5867 for (i = 0; i < BLOCK_SIZES; ++i) {
5868 for (j = 0; j < MAX_MODES; ++j) {
5869 tile_data->thresh_freq_fact[i][j] =
5870 tile_data->thresh_freq_fact_prev[i][j];
5871 }
5872 }
5873 }
5874 }
5875
5876 cm->interp_filter = cpi->sf.default_interp_filter;
5877 }
5878 #endif
5879
vp9_encode_frame(VP9_COMP * cpi)5880 void vp9_encode_frame(VP9_COMP *cpi) {
5881 VP9_COMMON *const cm = &cpi->common;
5882
5883 #if CONFIG_CONSISTENT_RECODE
5884 restore_encode_params(cpi);
5885 #endif
5886
5887 // In the longer term the encoder should be generalized to match the
5888 // decoder such that we allow compound where one of the 3 buffers has a
5889 // different sign bias and that buffer is then the fixed ref. However, this
5890 // requires further work in the rd loop. For now the only supported encoder
5891 // side behavior is where the ALT ref buffer has opposite sign bias to
5892 // the other two.
5893 if (!frame_is_intra_only(cm)) {
5894 if (vp9_compound_reference_allowed(cm)) {
5895 cpi->allow_comp_inter_inter = 1;
5896 vp9_setup_compound_reference_mode(cm);
5897 } else {
5898 cpi->allow_comp_inter_inter = 0;
5899 }
5900 }
5901
5902 if (cpi->sf.frame_parameter_update) {
5903 int i;
5904 RD_OPT *const rd_opt = &cpi->rd;
5905 FRAME_COUNTS *counts = cpi->td.counts;
5906 RD_COUNTS *const rdc = &cpi->td.rd_counts;
5907
5908 // This code does a single RD pass over the whole frame assuming
5909 // either compound, single or hybrid prediction as per whatever has
5910 // worked best for that type of frame in the past.
5911 // It also predicts whether another coding mode would have worked
5912 // better than this coding mode. If that is the case, it remembers
5913 // that for subsequent frames.
5914 // It also does the same analysis for transform size selection.
5915 const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
5916 int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
5917 int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
5918 const int is_alt_ref = frame_type == ALTREF_FRAME;
5919
5920 /* prediction (compound, single or hybrid) mode selection */
5921 if (is_alt_ref || !cpi->allow_comp_inter_inter)
5922 cm->reference_mode = SINGLE_REFERENCE;
5923 else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
5924 mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] &&
5925 check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
5926 cm->reference_mode = COMPOUND_REFERENCE;
5927 else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT])
5928 cm->reference_mode = SINGLE_REFERENCE;
5929 else
5930 cm->reference_mode = REFERENCE_MODE_SELECT;
5931
5932 if (cm->interp_filter == SWITCHABLE)
5933 cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
5934
5935 encode_frame_internal(cpi);
5936
5937 for (i = 0; i < REFERENCE_MODES; ++i)
5938 mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
5939
5940 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
5941 filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2;
5942
5943 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
5944 int single_count_zero = 0;
5945 int comp_count_zero = 0;
5946
5947 for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
5948 single_count_zero += counts->comp_inter[i][0];
5949 comp_count_zero += counts->comp_inter[i][1];
5950 }
5951
5952 if (comp_count_zero == 0) {
5953 cm->reference_mode = SINGLE_REFERENCE;
5954 vp9_zero(counts->comp_inter);
5955 } else if (single_count_zero == 0) {
5956 cm->reference_mode = COMPOUND_REFERENCE;
5957 vp9_zero(counts->comp_inter);
5958 }
5959 }
5960
5961 if (cm->tx_mode == TX_MODE_SELECT) {
5962 int count4x4 = 0;
5963 int count8x8_lp = 0, count8x8_8x8p = 0;
5964 int count16x16_16x16p = 0, count16x16_lp = 0;
5965 int count32x32 = 0;
5966
5967 for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
5968 count4x4 += counts->tx.p32x32[i][TX_4X4];
5969 count4x4 += counts->tx.p16x16[i][TX_4X4];
5970 count4x4 += counts->tx.p8x8[i][TX_4X4];
5971
5972 count8x8_lp += counts->tx.p32x32[i][TX_8X8];
5973 count8x8_lp += counts->tx.p16x16[i][TX_8X8];
5974 count8x8_8x8p += counts->tx.p8x8[i][TX_8X8];
5975
5976 count16x16_16x16p += counts->tx.p16x16[i][TX_16X16];
5977 count16x16_lp += counts->tx.p32x32[i][TX_16X16];
5978 count32x32 += counts->tx.p32x32[i][TX_32X32];
5979 }
5980 if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
5981 count32x32 == 0) {
5982 cm->tx_mode = ALLOW_8X8;
5983 reset_skip_tx_size(cm, TX_8X8);
5984 } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
5985 count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
5986 cm->tx_mode = ONLY_4X4;
5987 reset_skip_tx_size(cm, TX_4X4);
5988 } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
5989 cm->tx_mode = ALLOW_32X32;
5990 } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
5991 cm->tx_mode = ALLOW_16X16;
5992 reset_skip_tx_size(cm, TX_16X16);
5993 }
5994 }
5995 } else {
5996 FRAME_COUNTS *counts = cpi->td.counts;
5997 cm->reference_mode = SINGLE_REFERENCE;
5998 if (cpi->allow_comp_inter_inter && cpi->sf.use_compound_nonrd_pickmode &&
5999 cpi->rc.alt_ref_gf_group && !cpi->rc.is_src_frame_alt_ref &&
6000 cm->frame_type != KEY_FRAME)
6001 cm->reference_mode = REFERENCE_MODE_SELECT;
6002
6003 encode_frame_internal(cpi);
6004
6005 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
6006 int single_count_zero = 0;
6007 int comp_count_zero = 0;
6008 int i;
6009 for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
6010 single_count_zero += counts->comp_inter[i][0];
6011 comp_count_zero += counts->comp_inter[i][1];
6012 }
6013 if (comp_count_zero == 0) {
6014 cm->reference_mode = SINGLE_REFERENCE;
6015 vp9_zero(counts->comp_inter);
6016 } else if (single_count_zero == 0) {
6017 cm->reference_mode = COMPOUND_REFERENCE;
6018 vp9_zero(counts->comp_inter);
6019 }
6020 }
6021 }
6022
6023 // If segmented AQ is enabled compute the average AQ weighting.
6024 if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) &&
6025 (cm->seg.update_map || cm->seg.update_data)) {
6026 cm->seg.aq_av_offset = compute_frame_aq_offset(cpi);
6027 }
6028 }
6029
sum_intra_stats(FRAME_COUNTS * counts,const MODE_INFO * mi)6030 static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
6031 const PREDICTION_MODE y_mode = mi->mode;
6032 const PREDICTION_MODE uv_mode = mi->uv_mode;
6033 const BLOCK_SIZE bsize = mi->sb_type;
6034
6035 if (bsize < BLOCK_8X8) {
6036 int idx, idy;
6037 const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
6038 const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
6039 for (idy = 0; idy < 2; idy += num_4x4_h)
6040 for (idx = 0; idx < 2; idx += num_4x4_w)
6041 ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode];
6042 } else {
6043 ++counts->y_mode[size_group_lookup[bsize]][y_mode];
6044 }
6045
6046 ++counts->uv_mode[y_mode][uv_mode];
6047 }
6048
update_zeromv_cnt(VP9_COMP * const cpi,const MODE_INFO * const mi,int mi_row,int mi_col,BLOCK_SIZE bsize)6049 static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi,
6050 int mi_row, int mi_col, BLOCK_SIZE bsize) {
6051 const VP9_COMMON *const cm = &cpi->common;
6052 MV mv = mi->mv[0].as_mv;
6053 const int bw = num_8x8_blocks_wide_lookup[bsize];
6054 const int bh = num_8x8_blocks_high_lookup[bsize];
6055 const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
6056 const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
6057 const int block_index = mi_row * cm->mi_cols + mi_col;
6058 int x, y;
6059 for (y = 0; y < ymis; y++)
6060 for (x = 0; x < xmis; x++) {
6061 int map_offset = block_index + y * cm->mi_cols + x;
6062 if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) &&
6063 mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
6064 if (abs(mv.row) < 8 && abs(mv.col) < 8) {
6065 if (cpi->consec_zero_mv[map_offset] < 255)
6066 cpi->consec_zero_mv[map_offset]++;
6067 } else {
6068 cpi->consec_zero_mv[map_offset] = 0;
6069 }
6070 }
6071 }
6072 }
6073
encode_superblock(VP9_COMP * cpi,ThreadData * td,TOKENEXTRA ** t,int output_enabled,int mi_row,int mi_col,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)6074 static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
6075 int output_enabled, int mi_row, int mi_col,
6076 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
6077 VP9_COMMON *const cm = &cpi->common;
6078 MACROBLOCK *const x = &td->mb;
6079 MACROBLOCKD *const xd = &x->e_mbd;
6080 MODE_INFO *mi = xd->mi[0];
6081 const int seg_skip =
6082 segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP);
6083 x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 &&
6084 cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
6085 cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
6086 cpi->sf.allow_skip_recode;
6087
6088 if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode)
6089 memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
6090
6091 x->skip_optimize = ctx->is_coded;
6092 ctx->is_coded = 1;
6093 x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
6094 x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
6095 x->q_index < QIDX_SKIP_THRESH);
6096
6097 if (x->skip_encode) return;
6098
6099 if (!is_inter_block(mi)) {
6100 int plane;
6101 #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
6102 if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
6103 (xd->above_mi == NULL || xd->left_mi == NULL) &&
6104 need_top_left[mi->uv_mode])
6105 assert(0);
6106 #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
6107 mi->skip = 1;
6108 for (plane = 0; plane < MAX_MB_PLANE; ++plane)
6109 vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
6110 if (output_enabled) sum_intra_stats(td->counts, mi);
6111 vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
6112 VPXMAX(bsize, BLOCK_8X8));
6113 } else {
6114 int ref;
6115 const int is_compound = has_second_ref(mi);
6116 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
6117 for (ref = 0; ref < 1 + is_compound; ++ref) {
6118 YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]);
6119 assert(cfg != NULL);
6120 vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
6121 &xd->block_refs[ref]->sf);
6122 }
6123 if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
6124 vp9_build_inter_predictors_sby(xd, mi_row, mi_col,
6125 VPXMAX(bsize, BLOCK_8X8));
6126
6127 vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col,
6128 VPXMAX(bsize, BLOCK_8X8));
6129
6130 vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
6131 vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
6132 VPXMAX(bsize, BLOCK_8X8));
6133 }
6134
6135 if (seg_skip) {
6136 assert(mi->skip);
6137 }
6138
6139 if (output_enabled) {
6140 if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 &&
6141 !(is_inter_block(mi) && mi->skip)) {
6142 ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
6143 &td->counts->tx)[mi->tx_size];
6144 } else {
6145 // The new intra coding scheme requires no change of transform size
6146 if (is_inter_block(mi)) {
6147 mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
6148 max_txsize_lookup[bsize]);
6149 } else {
6150 mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4;
6151 }
6152 }
6153
6154 ++td->counts->tx.tx_totals[mi->tx_size];
6155 ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
6156 if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
6157 vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
6158 if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 &&
6159 (!cpi->use_svc ||
6160 (cpi->use_svc &&
6161 !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
6162 cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)))
6163 update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
6164 }
6165 }
6166