• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <math.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 
17 #include "config/aom_config.h"
18 #include "config/aom_dsp_rtcd.h"
19 #include "config/av1_rtcd.h"
20 
21 #include "aom_dsp/aom_dsp_common.h"
22 #include "aom_dsp/binary_codes_writer.h"
23 #include "aom_ports/mem.h"
24 #include "aom_ports/aom_timer.h"
25 
26 #include "av1/common/reconinter.h"
27 #include "av1/common/blockd.h"
28 
29 #include "av1/encoder/encodeframe.h"
30 #include "av1/encoder/var_based_part.h"
31 #include "av1/encoder/reconinter_enc.h"
32 #include "av1/encoder/rdopt_utils.h"
33 
34 // Possible values for the force_split variable while evaluating variance based
35 // partitioning.
36 enum {
37   // Evaluate all partition types
38   PART_EVAL_ALL = 0,
39   // Force PARTITION_SPLIT
40   PART_EVAL_ONLY_SPLIT = 1,
41   // Force PARTITION_NONE
42   PART_EVAL_ONLY_NONE = 2
43 } UENUM1BYTE(PART_EVAL_STATUS);
44 
45 typedef struct {
46   VPVariance *part_variances;
47   VPartVar *split[4];
48 } variance_node;
49 
tree_to_node(void * data,BLOCK_SIZE bsize,variance_node * node)50 static AOM_INLINE void tree_to_node(void *data, BLOCK_SIZE bsize,
51                                     variance_node *node) {
52   node->part_variances = NULL;
53   switch (bsize) {
54     case BLOCK_128X128: {
55       VP128x128 *vt = (VP128x128 *)data;
56       node->part_variances = &vt->part_variances;
57       for (int split_idx = 0; split_idx < 4; split_idx++)
58         node->split[split_idx] = &vt->split[split_idx].part_variances.none;
59       break;
60     }
61     case BLOCK_64X64: {
62       VP64x64 *vt = (VP64x64 *)data;
63       node->part_variances = &vt->part_variances;
64       for (int split_idx = 0; split_idx < 4; split_idx++)
65         node->split[split_idx] = &vt->split[split_idx].part_variances.none;
66       break;
67     }
68     case BLOCK_32X32: {
69       VP32x32 *vt = (VP32x32 *)data;
70       node->part_variances = &vt->part_variances;
71       for (int split_idx = 0; split_idx < 4; split_idx++)
72         node->split[split_idx] = &vt->split[split_idx].part_variances.none;
73       break;
74     }
75     case BLOCK_16X16: {
76       VP16x16 *vt = (VP16x16 *)data;
77       node->part_variances = &vt->part_variances;
78       for (int split_idx = 0; split_idx < 4; split_idx++)
79         node->split[split_idx] = &vt->split[split_idx].part_variances.none;
80       break;
81     }
82     case BLOCK_8X8: {
83       VP8x8 *vt = (VP8x8 *)data;
84       node->part_variances = &vt->part_variances;
85       for (int split_idx = 0; split_idx < 4; split_idx++)
86         node->split[split_idx] = &vt->split[split_idx].part_variances.none;
87       break;
88     }
89     default: {
90       VP4x4 *vt = (VP4x4 *)data;
91       assert(bsize == BLOCK_4X4);
92       node->part_variances = &vt->part_variances;
93       for (int split_idx = 0; split_idx < 4; split_idx++)
94         node->split[split_idx] = &vt->split[split_idx];
95       break;
96     }
97   }
98 }
99 
100 // Set variance values given sum square error, sum error, count.
fill_variance(uint32_t s2,int32_t s,int c,VPartVar * v)101 static AOM_INLINE void fill_variance(uint32_t s2, int32_t s, int c,
102                                      VPartVar *v) {
103   v->sum_square_error = s2;
104   v->sum_error = s;
105   v->log2_count = c;
106 }
107 
get_variance(VPartVar * v)108 static AOM_INLINE void get_variance(VPartVar *v) {
109   v->variance =
110       (int)(256 * (v->sum_square_error -
111                    (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
112                               v->log2_count)) >>
113             v->log2_count);
114 }
115 
sum_2_variances(const VPartVar * a,const VPartVar * b,VPartVar * r)116 static AOM_INLINE void sum_2_variances(const VPartVar *a, const VPartVar *b,
117                                        VPartVar *r) {
118   assert(a->log2_count == b->log2_count);
119   fill_variance(a->sum_square_error + b->sum_square_error,
120                 a->sum_error + b->sum_error, a->log2_count + 1, r);
121 }
122 
fill_variance_tree(void * data,BLOCK_SIZE bsize)123 static AOM_INLINE void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
124   variance_node node;
125   memset(&node, 0, sizeof(node));
126   tree_to_node(data, bsize, &node);
127   sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
128   sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
129   sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
130   sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
131   sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
132                   &node.part_variances->none);
133 }
134 
set_block_size(AV1_COMP * const cpi,int mi_row,int mi_col,BLOCK_SIZE bsize)135 static AOM_INLINE void set_block_size(AV1_COMP *const cpi, int mi_row,
136                                       int mi_col, BLOCK_SIZE bsize) {
137   if (cpi->common.mi_params.mi_cols > mi_col &&
138       cpi->common.mi_params.mi_rows > mi_row) {
139     CommonModeInfoParams *mi_params = &cpi->common.mi_params;
140     const int mi_grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col);
141     const int mi_alloc_idx = get_alloc_mi_idx(mi_params, mi_row, mi_col);
142     MB_MODE_INFO *mi = mi_params->mi_grid_base[mi_grid_idx] =
143         &mi_params->mi_alloc[mi_alloc_idx];
144     mi->bsize = bsize;
145   }
146 }
147 
set_vt_partitioning(AV1_COMP * cpi,MACROBLOCKD * const xd,const TileInfo * const tile,void * data,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t threshold,BLOCK_SIZE bsize_min,PART_EVAL_STATUS force_split)148 static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCKD *const xd,
149                                const TileInfo *const tile, void *data,
150                                BLOCK_SIZE bsize, int mi_row, int mi_col,
151                                int64_t threshold, BLOCK_SIZE bsize_min,
152                                PART_EVAL_STATUS force_split) {
153   AV1_COMMON *const cm = &cpi->common;
154   variance_node vt;
155   const int block_width = mi_size_wide[bsize];
156   const int block_height = mi_size_high[bsize];
157   int bs_width_check = block_width;
158   int bs_height_check = block_height;
159   int bs_width_vert_check = block_width >> 1;
160   int bs_height_horiz_check = block_height >> 1;
161   // On the right and bottom boundary we only need to check
162   // if half the bsize fits, because boundary is extended
163   // up to 64. So do this check only for sb_size = 64X64.
164   if (cm->seq_params->sb_size == BLOCK_64X64) {
165     if (tile->mi_col_end == cm->mi_params.mi_cols) {
166       bs_width_check = (block_width >> 1) + 1;
167       bs_width_vert_check = (block_width >> 2) + 1;
168     }
169     if (tile->mi_row_end == cm->mi_params.mi_rows) {
170       bs_height_check = (block_height >> 1) + 1;
171       bs_height_horiz_check = (block_height >> 2) + 1;
172     }
173   }
174 
175   assert(block_height == block_width);
176   tree_to_node(data, bsize, &vt);
177 
178   if (mi_col + bs_width_check <= tile->mi_col_end &&
179       mi_row + bs_height_check <= tile->mi_row_end &&
180       force_split == PART_EVAL_ONLY_NONE) {
181     set_block_size(cpi, mi_row, mi_col, bsize);
182     return 1;
183   }
184   if (force_split == PART_EVAL_ONLY_SPLIT) return 0;
185 
186   // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
187   // variance is below threshold, otherwise split will be selected.
188   // No check for vert/horiz split as too few samples for variance.
189   if (bsize == bsize_min) {
190     // Variance already computed to set the force_split.
191     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
192     if (mi_col + bs_width_check <= tile->mi_col_end &&
193         mi_row + bs_height_check <= tile->mi_row_end &&
194         vt.part_variances->none.variance < threshold) {
195       set_block_size(cpi, mi_row, mi_col, bsize);
196       return 1;
197     }
198     return 0;
199   } else if (bsize > bsize_min) {
200     // Variance already computed to set the force_split.
201     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
202     // For key frame: take split for bsize above 32X32 or very high variance.
203     if (frame_is_intra_only(cm) &&
204         (bsize > BLOCK_32X32 ||
205          vt.part_variances->none.variance > (threshold << 4))) {
206       return 0;
207     }
208     // If variance is low, take the bsize (no split).
209     if (mi_col + bs_width_check <= tile->mi_col_end &&
210         mi_row + bs_height_check <= tile->mi_row_end &&
211         vt.part_variances->none.variance < threshold) {
212       set_block_size(cpi, mi_row, mi_col, bsize);
213       return 1;
214     }
215     // Check vertical split.
216     if (mi_row + bs_height_check <= tile->mi_row_end &&
217         mi_col + bs_width_vert_check <= tile->mi_col_end) {
218       BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
219       BLOCK_SIZE plane_bsize =
220           get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x,
221                                xd->plane[AOM_PLANE_U].subsampling_y);
222       get_variance(&vt.part_variances->vert[0]);
223       get_variance(&vt.part_variances->vert[1]);
224       if (vt.part_variances->vert[0].variance < threshold &&
225           vt.part_variances->vert[1].variance < threshold &&
226           plane_bsize < BLOCK_INVALID) {
227         set_block_size(cpi, mi_row, mi_col, subsize);
228         set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
229         return 1;
230       }
231     }
232     // Check horizontal split.
233     if (mi_col + bs_width_check <= tile->mi_col_end &&
234         mi_row + bs_height_horiz_check <= tile->mi_row_end) {
235       BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
236       BLOCK_SIZE plane_bsize =
237           get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x,
238                                xd->plane[AOM_PLANE_U].subsampling_y);
239       get_variance(&vt.part_variances->horz[0]);
240       get_variance(&vt.part_variances->horz[1]);
241       if (vt.part_variances->horz[0].variance < threshold &&
242           vt.part_variances->horz[1].variance < threshold &&
243           plane_bsize < BLOCK_INVALID) {
244         set_block_size(cpi, mi_row, mi_col, subsize);
245         set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
246         return 1;
247       }
248     }
249     return 0;
250   }
251   return 0;
252 }
253 
all_blks_inside(int x16_idx,int y16_idx,int pixels_wide,int pixels_high)254 static AOM_INLINE int all_blks_inside(int x16_idx, int y16_idx, int pixels_wide,
255                                       int pixels_high) {
256   int all_inside = 1;
257   for (int idx = 0; idx < 4; idx++) {
258     all_inside &= ((x16_idx + GET_BLK_IDX_X(idx, 3)) < pixels_wide);
259     all_inside &= ((y16_idx + GET_BLK_IDX_Y(idx, 3)) < pixels_high);
260   }
261   return all_inside;
262 }
263 
264 #if CONFIG_AV1_HIGHBITDEPTH
265 // TODO(yunqingwang): Perform average of four 8x8 blocks similar to lowbd
fill_variance_8x8avg_highbd(const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,int x16_idx,int y16_idx,VP16x16 * vst,int pixels_wide,int pixels_high)266 static AOM_INLINE void fill_variance_8x8avg_highbd(
267     const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf,
268     int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide,
269     int pixels_high) {
270   for (int idx = 0; idx < 4; idx++) {
271     const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
272     const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
273     unsigned int sse = 0;
274     int sum = 0;
275     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
276       int src_avg = aom_highbd_avg_8x8(src_buf + y8_idx * src_stride + x8_idx,
277                                        src_stride);
278       int dst_avg = aom_highbd_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx,
279                                        dst_stride);
280 
281       sum = src_avg - dst_avg;
282       sse = sum * sum;
283     }
284     fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none);
285   }
286 }
287 #endif
288 
fill_variance_8x8avg_lowbd(const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,int x16_idx,int y16_idx,VP16x16 * vst,int pixels_wide,int pixels_high)289 static AOM_INLINE void fill_variance_8x8avg_lowbd(
290     const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf,
291     int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide,
292     int pixels_high) {
293   unsigned int sse[4] = { 0 };
294   int sum[4] = { 0 };
295 
296   if (all_blks_inside(x16_idx, y16_idx, pixels_wide, pixels_high)) {
297     int src_avg[4];
298     int dst_avg[4];
299     aom_avg_8x8_quad(src_buf, src_stride, x16_idx, y16_idx, src_avg);
300     aom_avg_8x8_quad(dst_buf, dst_stride, x16_idx, y16_idx, dst_avg);
301     for (int idx = 0; idx < 4; idx++) {
302       sum[idx] = src_avg[idx] - dst_avg[idx];
303       sse[idx] = sum[idx] * sum[idx];
304     }
305   } else {
306     for (int idx = 0; idx < 4; idx++) {
307       const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
308       const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
309       if (x8_idx < pixels_wide && y8_idx < pixels_high) {
310         int src_avg =
311             aom_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride);
312         int dst_avg =
313             aom_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, dst_stride);
314         sum[idx] = src_avg - dst_avg;
315         sse[idx] = sum[idx] * sum[idx];
316       }
317     }
318   }
319 
320   for (int idx = 0; idx < 4; idx++) {
321     fill_variance(sse[idx], sum[idx], 0, &vst->split[idx].part_variances.none);
322   }
323 }
324 
325 // Obtain parameters required to calculate variance (such as sum, sse, etc,.)
326 // at 8x8 sub-block level for a given 16x16 block.
327 // The function can be called only when is_key_frame is false since sum is
328 // computed between source and reference frames.
fill_variance_8x8avg(const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,int x16_idx,int y16_idx,VP16x16 * vst,int highbd_flag,int pixels_wide,int pixels_high)329 static AOM_INLINE void fill_variance_8x8avg(
330     const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf,
331     int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int highbd_flag,
332     int pixels_wide, int pixels_high) {
333 #if CONFIG_AV1_HIGHBITDEPTH
334   if (highbd_flag) {
335     fill_variance_8x8avg_highbd(src_buf, src_stride, dst_buf, dst_stride,
336                                 x16_idx, y16_idx, vst, pixels_wide,
337                                 pixels_high);
338     return;
339   }
340 #else
341   (void)highbd_flag;
342 #endif  // CONFIG_AV1_HIGHBITDEPTH
343   fill_variance_8x8avg_lowbd(src_buf, src_stride, dst_buf, dst_stride, x16_idx,
344                              y16_idx, vst, pixels_wide, pixels_high);
345 }
346 
compute_minmax_8x8(const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,int x16_idx,int y16_idx,int highbd_flag,int pixels_wide,int pixels_high)347 static int compute_minmax_8x8(const uint8_t *src_buf, int src_stride,
348                               const uint8_t *dst_buf, int dst_stride,
349                               int x16_idx, int y16_idx,
350 #if CONFIG_AV1_HIGHBITDEPTH
351                               int highbd_flag,
352 #endif
353                               int pixels_wide, int pixels_high) {
354   int minmax_max = 0;
355   int minmax_min = 255;
356   // Loop over the 4 8x8 subblocks.
357   for (int idx = 0; idx < 4; idx++) {
358     const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
359     const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
360     int min = 0;
361     int max = 0;
362     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
363 #if CONFIG_AV1_HIGHBITDEPTH
364       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
365         aom_highbd_minmax_8x8(
366             src_buf + y8_idx * src_stride + x8_idx, src_stride,
367             dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, &max);
368       } else {
369         aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride,
370                        dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min,
371                        &max);
372       }
373 #else
374       aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride,
375                      dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min,
376                      &max);
377 #endif
378       if ((max - min) > minmax_max) minmax_max = (max - min);
379       if ((max - min) < minmax_min) minmax_min = (max - min);
380     }
381   }
382   return (minmax_max - minmax_min);
383 }
384 
385 // Function to compute average and variance of 4x4 sub-block.
386 // The function can be called only when is_key_frame is true since sum is
387 // computed using source frame only.
fill_variance_4x4avg(const uint8_t * src_buf,int src_stride,int x8_idx,int y8_idx,VP8x8 * vst,int highbd_flag,int pixels_wide,int pixels_high,int border_offset_4x4)388 static AOM_INLINE void fill_variance_4x4avg(const uint8_t *src_buf,
389                                             int src_stride, int x8_idx,
390                                             int y8_idx, VP8x8 *vst,
391 #if CONFIG_AV1_HIGHBITDEPTH
392                                             int highbd_flag,
393 #endif
394                                             int pixels_wide, int pixels_high,
395                                             int border_offset_4x4) {
396   for (int idx = 0; idx < 4; idx++) {
397     const int x4_idx = x8_idx + GET_BLK_IDX_X(idx, 2);
398     const int y4_idx = y8_idx + GET_BLK_IDX_Y(idx, 2);
399     unsigned int sse = 0;
400     int sum = 0;
401     if (x4_idx < pixels_wide - border_offset_4x4 &&
402         y4_idx < pixels_high - border_offset_4x4) {
403       int src_avg;
404       int dst_avg = 128;
405 #if CONFIG_AV1_HIGHBITDEPTH
406       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
407         src_avg = aom_highbd_avg_4x4(src_buf + y4_idx * src_stride + x4_idx,
408                                      src_stride);
409       } else {
410         src_avg =
411             aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride);
412       }
413 #else
414       src_avg = aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride);
415 #endif
416 
417       sum = src_avg - dst_avg;
418       sse = sum * sum;
419     }
420     fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none);
421   }
422 }
423 
424 // TODO(kyslov) Bring back threshold adjustment based on content state
scale_part_thresh_content(int64_t threshold_base,int speed,int width,int height,int non_reference_frame)425 static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
426                                          int width, int height,
427                                          int non_reference_frame) {
428   (void)width;
429   (void)height;
430   int64_t threshold = threshold_base;
431   if (non_reference_frame) threshold = (3 * threshold) >> 1;
432   if (speed >= 8) {
433     return (5 * threshold) >> 2;
434   }
435   return threshold;
436 }
437 
438 // Tune thresholds less or more aggressively to prefer larger partitions
tune_thresh_based_on_qindex(AV1_COMP * cpi,int64_t thresholds[],uint64_t block_sad,int current_qindex,int num_pixels,bool is_segment_id_boosted,int source_sad_nonrd,int lighting_change)439 static AOM_INLINE void tune_thresh_based_on_qindex(
440     AV1_COMP *cpi, int64_t thresholds[], uint64_t block_sad, int current_qindex,
441     int num_pixels, bool is_segment_id_boosted, int source_sad_nonrd,
442     int lighting_change) {
443   double weight;
444   if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) {
445     const int win = 20;
446     if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
447       weight = 1.0;
448     else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win)
449       weight = 0.0;
450     else
451       weight =
452           1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win);
453     if (num_pixels > RESOLUTION_480P) {
454       for (int i = 0; i < 4; i++) {
455         thresholds[i] <<= 1;
456       }
457     }
458     if (num_pixels <= RESOLUTION_288P) {
459       thresholds[3] = INT64_MAX;
460       if (is_segment_id_boosted == false) {
461         thresholds[1] <<= 2;
462         thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4;
463       } else {
464         thresholds[1] <<= 1;
465         thresholds[2] <<= 3;
466       }
467       // Allow for split to 8x8 for superblocks where part of it has
468       // moving boundary. So allow for sb with source_sad above threshold,
469       // and avoid very large source_sad or high source content, to avoid
470       // too many 8x8 within superblock.
471       uint64_t avg_source_sad_thresh = 25000;
472       uint64_t block_sad_low = 25000;
473       uint64_t block_sad_high = 50000;
474       if (cpi->svc.temporal_layer_id == 0 &&
475           cpi->svc.number_temporal_layers > 1) {
476         // Increase the sad thresholds for base TL0, as reference/LAST is
477         // 2/4 frames behind (for 2/3 #TL).
478         avg_source_sad_thresh = 40000;
479         block_sad_high = 70000;
480       }
481       if (is_segment_id_boosted == false &&
482           cpi->rc.avg_source_sad < avg_source_sad_thresh &&
483           block_sad > block_sad_low && block_sad < block_sad_high &&
484           !lighting_change) {
485         thresholds[2] = (3 * thresholds[2]) >> 2;
486         thresholds[3] = thresholds[2] << 3;
487       }
488       // Condition the increase of partition thresholds on the segment
489       // and the content. Avoid the increase for superblocks which have
490       // high source sad, unless the whole frame has very high motion
491       // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
492       // have high source sad).
493     } else if (num_pixels > RESOLUTION_480P && is_segment_id_boosted == false &&
494                (source_sad_nonrd != kHighSad ||
495                 cpi->rc.avg_source_sad > 50000)) {
496       thresholds[0] = (3 * thresholds[0]) >> 1;
497       thresholds[3] = INT64_MAX;
498       if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
499         thresholds[1] =
500             (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
501         thresholds[2] =
502             (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
503       }
504     } else if (current_qindex > QINDEX_LARGE_BLOCK_THR &&
505                is_segment_id_boosted == false &&
506                (source_sad_nonrd != kHighSad ||
507                 cpi->rc.avg_source_sad > 50000)) {
508       thresholds[1] =
509           (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
510       thresholds[2] =
511           (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
512       thresholds[3] = INT64_MAX;
513     }
514   } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) {
515     thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0;
516     thresholds[2] =
517         (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2];
518   } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) {
519     const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1;
520     if (current_qindex < QINDEX_LARGE_BLOCK_THR - 45)
521       weight = 1.0;
522     else if (current_qindex > QINDEX_LARGE_BLOCK_THR + 45)
523       weight = 0.0;
524     else
525       weight = 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + 45) / (2 * 45);
526     thresholds[1] =
527         (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
528     thresholds[2] =
529         (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
530     thresholds[3] =
531         (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]);
532   }
533   if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128))
534     thresholds[3] = INT64_MAX;
535 }
536 
set_vbp_thresholds_key_frame(AV1_COMP * cpi,int64_t thresholds[],int64_t threshold_base,int threshold_left_shift,int num_pixels)537 static void set_vbp_thresholds_key_frame(AV1_COMP *cpi, int64_t thresholds[],
538                                          int64_t threshold_base,
539                                          int threshold_left_shift,
540                                          int num_pixels) {
541   if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
542     const int shift_steps =
543         threshold_left_shift - (cpi->oxcf.mode == ALLINTRA ? 7 : 8);
544     assert(shift_steps >= 0);
545     threshold_base <<= shift_steps;
546   }
547   thresholds[0] = threshold_base;
548   thresholds[1] = threshold_base;
549   if (num_pixels < RESOLUTION_720P) {
550     thresholds[2] = threshold_base / 3;
551     thresholds[3] = threshold_base >> 1;
552   } else {
553     int shift_val = 2;
554     if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
555       shift_val = 0;
556     }
557 
558     thresholds[2] = threshold_base >> shift_val;
559     thresholds[3] = threshold_base >> shift_val;
560   }
561   thresholds[4] = threshold_base << 2;
562 }
563 
tune_thresh_based_on_resolution(AV1_COMP * cpi,int64_t thresholds[],int64_t threshold_base,int current_qindex,int source_sad_rd,int num_pixels)564 static AOM_INLINE void tune_thresh_based_on_resolution(
565     AV1_COMP *cpi, int64_t thresholds[], int64_t threshold_base,
566     int current_qindex, int source_sad_rd, int num_pixels) {
567   if (num_pixels >= RESOLUTION_720P) thresholds[3] = thresholds[3] << 1;
568   if (num_pixels <= RESOLUTION_288P) {
569     const int qindex_thr[5][2] = {
570       { 200, 220 }, { 140, 170 }, { 120, 150 }, { 200, 210 }, { 170, 220 },
571     };
572     int th_idx = 0;
573     if (cpi->sf.rt_sf.var_part_based_on_qidx >= 1)
574       th_idx =
575           (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0;
576     if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3)
577       th_idx = cpi->sf.rt_sf.var_part_based_on_qidx;
578     const int qindex_low_thr = qindex_thr[th_idx][0];
579     const int qindex_high_thr = qindex_thr[th_idx][1];
580     if (current_qindex >= qindex_high_thr) {
581       threshold_base = (5 * threshold_base) >> 1;
582       thresholds[1] = threshold_base >> 3;
583       thresholds[2] = threshold_base << 2;
584       thresholds[3] = threshold_base << 5;
585     } else if (current_qindex < qindex_low_thr) {
586       thresholds[1] = threshold_base >> 3;
587       thresholds[2] = threshold_base >> 1;
588       thresholds[3] = threshold_base << 3;
589     } else {
590       int64_t qi_diff_low = current_qindex - qindex_low_thr;
591       int64_t qi_diff_high = qindex_high_thr - current_qindex;
592       int64_t threshold_diff = qindex_high_thr - qindex_low_thr;
593       int64_t threshold_base_high = (5 * threshold_base) >> 1;
594 
595       threshold_diff = threshold_diff > 0 ? threshold_diff : 1;
596       threshold_base =
597           (qi_diff_low * threshold_base_high + qi_diff_high * threshold_base) /
598           threshold_diff;
599       thresholds[1] = threshold_base >> 3;
600       thresholds[2] = ((qi_diff_low * threshold_base) +
601                        qi_diff_high * (threshold_base >> 1)) /
602                       threshold_diff;
603       thresholds[3] = ((qi_diff_low * (threshold_base << 5)) +
604                        qi_diff_high * (threshold_base << 3)) /
605                       threshold_diff;
606     }
607   } else if (num_pixels < RESOLUTION_720P) {
608     thresholds[2] = (5 * threshold_base) >> 2;
609   } else if (num_pixels < RESOLUTION_1080P) {
610     thresholds[2] = threshold_base << 1;
611   } else {
612     // num_pixels >= RESOLUTION_1080P
613     if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
614       if (num_pixels < RESOLUTION_1440P) {
615         thresholds[2] = (5 * threshold_base) >> 1;
616       } else {
617         thresholds[2] = (7 * threshold_base) >> 1;
618       }
619     } else {
620       if (cpi->oxcf.speed > 7) {
621         thresholds[2] = 6 * threshold_base;
622       } else {
623         thresholds[2] = 3 * threshold_base;
624       }
625     }
626   }
627 }
628 
629 // Increase partition thresholds for noisy content. Apply it only for
630 // superblocks where sumdiff is low, as we assume the sumdiff of superblock
631 // whose only change is due to noise will be low (i.e, noise will average
632 // out over large block).
tune_thresh_noisy_content(AV1_COMP * cpi,int64_t threshold_base,int content_lowsumdiff,int num_pixels)633 static AOM_INLINE int64_t tune_thresh_noisy_content(AV1_COMP *cpi,
634                                                     int64_t threshold_base,
635                                                     int content_lowsumdiff,
636                                                     int num_pixels) {
637   AV1_COMMON *const cm = &cpi->common;
638   int64_t updated_thresh_base = threshold_base;
639   if (cpi->noise_estimate.enabled && content_lowsumdiff &&
640       num_pixels > RESOLUTION_480P && cm->current_frame.frame_number > 60) {
641     NOISE_LEVEL noise_level =
642         av1_noise_estimate_extract_level(&cpi->noise_estimate);
643     if (noise_level == kHigh)
644       updated_thresh_base = (5 * updated_thresh_base) >> 1;
645     else if (noise_level == kMedium &&
646              !cpi->sf.rt_sf.prefer_large_partition_blocks)
647       updated_thresh_base = (5 * updated_thresh_base) >> 2;
648   }
649   // TODO(kyslov) Enable var based partition adjusment on temporal denoising
650 #if 0  // CONFIG_AV1_TEMPORAL_DENOISING
651   if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
652       cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
653       updated_thresh_base =
654           av1_scale_part_thresh(updated_thresh_base, cpi->denoiser.denoising_level,
655                                 content_state, cpi->svc.temporal_layer_id);
656   else
657     threshold_base =
658         scale_part_thresh_content(updated_thresh_base, cpi->oxcf.speed, cm->width,
659                                   cm->height, cpi->ppi->rtc_ref.non_reference_frame);
660 #else
661   // Increase base variance threshold based on content_state/sum_diff level.
662   updated_thresh_base = scale_part_thresh_content(
663       updated_thresh_base, cpi->oxcf.speed, cm->width, cm->height,
664       cpi->ppi->rtc_ref.non_reference_frame);
665 #endif
666   return updated_thresh_base;
667 }
668 
set_vbp_thresholds(AV1_COMP * cpi,int64_t thresholds[],uint64_t blk_sad,int qindex,int content_lowsumdiff,int source_sad_nonrd,int source_sad_rd,bool is_segment_id_boosted,int lighting_change)669 static AOM_INLINE void set_vbp_thresholds(
670     AV1_COMP *cpi, int64_t thresholds[], uint64_t blk_sad, int qindex,
671     int content_lowsumdiff, int source_sad_nonrd, int source_sad_rd,
672     bool is_segment_id_boosted, int lighting_change) {
673   AV1_COMMON *const cm = &cpi->common;
674   const int is_key_frame = frame_is_intra_only(cm);
675   const int threshold_multiplier = is_key_frame ? 120 : 1;
676   const int ac_q = av1_ac_quant_QTX(qindex, 0, cm->seq_params->bit_depth);
677   int64_t threshold_base = (int64_t)(threshold_multiplier * ac_q);
678   const int current_qindex = cm->quant_params.base_qindex;
679   const int threshold_left_shift = cpi->sf.rt_sf.var_part_split_threshold_shift;
680   const int num_pixels = cm->width * cm->height;
681 
682   if (is_key_frame) {
683     set_vbp_thresholds_key_frame(cpi, thresholds, threshold_base,
684                                  threshold_left_shift, num_pixels);
685     return;
686   }
687 
688   threshold_base = tune_thresh_noisy_content(cpi, threshold_base,
689                                              content_lowsumdiff, num_pixels);
690   thresholds[0] = threshold_base >> 1;
691   thresholds[1] = threshold_base;
692   thresholds[3] = threshold_base << threshold_left_shift;
693 
694   tune_thresh_based_on_resolution(cpi, thresholds, threshold_base,
695                                   current_qindex, source_sad_rd, num_pixels);
696 
697   tune_thresh_based_on_qindex(cpi, thresholds, blk_sad, current_qindex,
698                               num_pixels, is_segment_id_boosted,
699                               source_sad_nonrd, lighting_change);
700 }
701 
702 // Set temporal variance low flag for superblock 64x64.
703 // Only first 25 in the array are used in this case.
set_low_temp_var_flag_64x64(CommonModeInfoParams * mi_params,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP64x64 * vt,const int64_t thresholds[],int mi_col,int mi_row)704 static AOM_INLINE void set_low_temp_var_flag_64x64(
705     CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
706     MACROBLOCKD *xd, VP64x64 *vt, const int64_t thresholds[], int mi_col,
707     int mi_row) {
708   if (xd->mi[0]->bsize == BLOCK_64X64) {
709     if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
710       part_info->variance_low[0] = 1;
711   } else if (xd->mi[0]->bsize == BLOCK_64X32) {
712     for (int part_idx = 0; part_idx < 2; part_idx++) {
713       if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2))
714         part_info->variance_low[part_idx + 1] = 1;
715     }
716   } else if (xd->mi[0]->bsize == BLOCK_32X64) {
717     for (int part_idx = 0; part_idx < 2; part_idx++) {
718       if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2))
719         part_info->variance_low[part_idx + 3] = 1;
720     }
721   } else {
722     static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
723     for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
724       const int idx_str = mi_params->mi_stride * (mi_row + idx[lvl1_idx][0]) +
725                           mi_col + idx[lvl1_idx][1];
726       MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str;
727 
728       if (mi_params->mi_cols <= mi_col + idx[lvl1_idx][1] ||
729           mi_params->mi_rows <= mi_row + idx[lvl1_idx][0])
730         continue;
731 
732       if (*this_mi == NULL) continue;
733 
734       if ((*this_mi)->bsize == BLOCK_32X32) {
735         int64_t threshold_32x32 = (5 * thresholds[1]) >> 3;
736         if (vt->split[lvl1_idx].part_variances.none.variance < threshold_32x32)
737           part_info->variance_low[lvl1_idx + 5] = 1;
738       } else {
739         // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
740         // inside.
741         if ((*this_mi)->bsize == BLOCK_16X16 ||
742             (*this_mi)->bsize == BLOCK_32X16 ||
743             (*this_mi)->bsize == BLOCK_16X32) {
744           for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
745             if (vt->split[lvl1_idx]
746                     .split[lvl2_idx]
747                     .part_variances.none.variance < (thresholds[2] >> 8))
748               part_info->variance_low[(lvl1_idx << 2) + lvl2_idx + 9] = 1;
749           }
750         }
751       }
752     }
753   }
754 }
755 
set_low_temp_var_flag_128x128(CommonModeInfoParams * mi_params,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP128x128 * vt,const int64_t thresholds[],int mi_col,int mi_row)756 static AOM_INLINE void set_low_temp_var_flag_128x128(
757     CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
758     MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col,
759     int mi_row) {
760   if (xd->mi[0]->bsize == BLOCK_128X128) {
761     if (vt->part_variances.none.variance < (thresholds[0] >> 1))
762       part_info->variance_low[0] = 1;
763   } else if (xd->mi[0]->bsize == BLOCK_128X64) {
764     for (int part_idx = 0; part_idx < 2; part_idx++) {
765       if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2))
766         part_info->variance_low[part_idx + 1] = 1;
767     }
768   } else if (xd->mi[0]->bsize == BLOCK_64X128) {
769     for (int part_idx = 0; part_idx < 2; part_idx++) {
770       if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2))
771         part_info->variance_low[part_idx + 3] = 1;
772     }
773   } else {
774     static const int idx64[4][2] = {
775       { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 }
776     };
777     static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
778     for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
779       const int idx_str = mi_params->mi_stride * (mi_row + idx64[lvl1_idx][0]) +
780                           mi_col + idx64[lvl1_idx][1];
781       MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str;
782       if (*mi_64 == NULL) continue;
783       if (mi_params->mi_cols <= mi_col + idx64[lvl1_idx][1] ||
784           mi_params->mi_rows <= mi_row + idx64[lvl1_idx][0])
785         continue;
786       const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3;
787       if ((*mi_64)->bsize == BLOCK_64X64) {
788         if (vt->split[lvl1_idx].part_variances.none.variance < threshold_64x64)
789           part_info->variance_low[5 + lvl1_idx] = 1;
790       } else if ((*mi_64)->bsize == BLOCK_64X32) {
791         for (int part_idx = 0; part_idx < 2; part_idx++)
792           if (vt->split[lvl1_idx].part_variances.horz[part_idx].variance <
793               (threshold_64x64 >> 1))
794             part_info->variance_low[9 + (lvl1_idx << 1) + part_idx] = 1;
795       } else if ((*mi_64)->bsize == BLOCK_32X64) {
796         for (int part_idx = 0; part_idx < 2; part_idx++)
797           if (vt->split[lvl1_idx].part_variances.vert[part_idx].variance <
798               (threshold_64x64 >> 1))
799             part_info->variance_low[17 + (lvl1_idx << 1) + part_idx] = 1;
800       } else {
801         for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
802           const int idx_str1 =
803               mi_params->mi_stride * idx32[lvl2_idx][0] + idx32[lvl2_idx][1];
804           MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1;
805           if (*mi_32 == NULL) continue;
806 
807           if (mi_params->mi_cols <=
808                   mi_col + idx64[lvl1_idx][1] + idx32[lvl2_idx][1] ||
809               mi_params->mi_rows <=
810                   mi_row + idx64[lvl1_idx][0] + idx32[lvl2_idx][0])
811             continue;
812           const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3;
813           if ((*mi_32)->bsize == BLOCK_32X32) {
814             if (vt->split[lvl1_idx]
815                     .split[lvl2_idx]
816                     .part_variances.none.variance < threshold_32x32)
817               part_info->variance_low[25 + (lvl1_idx << 2) + lvl2_idx] = 1;
818           } else {
819             // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
820             // inside.
821             if ((*mi_32)->bsize == BLOCK_16X16 ||
822                 (*mi_32)->bsize == BLOCK_32X16 ||
823                 (*mi_32)->bsize == BLOCK_16X32) {
824               for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) {
825                 VPartVar *none_var = &vt->split[lvl1_idx]
826                                           .split[lvl2_idx]
827                                           .split[lvl3_idx]
828                                           .part_variances.none;
829                 if (none_var->variance < (thresholds[3] >> 8))
830                   part_info->variance_low[41 + (lvl1_idx << 4) +
831                                           (lvl2_idx << 2) + lvl3_idx] = 1;
832               }
833             }
834           }
835         }
836       }
837     }
838   }
839 }
840 
set_low_temp_var_flag(AV1_COMP * cpi,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP128x128 * vt,int64_t thresholds[],MV_REFERENCE_FRAME ref_frame_partition,int mi_col,int mi_row,const bool is_small_sb)841 static AOM_INLINE void set_low_temp_var_flag(
842     AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd,
843     VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition,
844     int mi_col, int mi_row, const bool is_small_sb) {
845   AV1_COMMON *const cm = &cpi->common;
846   // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected.
847   // If the temporal variance is small set the flag
848   // variance_low for the block. The variance threshold can be adjusted, the
849   // higher the more aggressive.
850   if (ref_frame_partition == LAST_FRAME) {
851     if (is_small_sb)
852       set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd,
853                                   &(vt->split[0]), thresholds, mi_col, mi_row);
854     else
855       set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt,
856                                     thresholds, mi_col, mi_row);
857   }
858 }
859 
860 static const int pos_shift_16x16[4][4] = {
861   { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
862 };
863 
av1_get_force_skip_low_temp_var_small_sb(const uint8_t * variance_low,int mi_row,int mi_col,BLOCK_SIZE bsize)864 int av1_get_force_skip_low_temp_var_small_sb(const uint8_t *variance_low,
865                                              int mi_row, int mi_col,
866                                              BLOCK_SIZE bsize) {
867   // Relative indices of MB inside the superblock.
868   const int mi_x = mi_row & 0xF;
869   const int mi_y = mi_col & 0xF;
870   // Relative indices of 16x16 block inside the superblock.
871   const int i = mi_x >> 2;
872   const int j = mi_y >> 2;
873   int force_skip_low_temp_var = 0;
874   // Set force_skip_low_temp_var based on the block size and block offset.
875   switch (bsize) {
876     case BLOCK_64X64: force_skip_low_temp_var = variance_low[0]; break;
877     case BLOCK_64X32:
878       if (!mi_y && !mi_x) {
879         force_skip_low_temp_var = variance_low[1];
880       } else if (!mi_y && mi_x) {
881         force_skip_low_temp_var = variance_low[2];
882       }
883       break;
884     case BLOCK_32X64:
885       if (!mi_y && !mi_x) {
886         force_skip_low_temp_var = variance_low[3];
887       } else if (mi_y && !mi_x) {
888         force_skip_low_temp_var = variance_low[4];
889       }
890       break;
891     case BLOCK_32X32:
892       if (!mi_y && !mi_x) {
893         force_skip_low_temp_var = variance_low[5];
894       } else if (mi_y && !mi_x) {
895         force_skip_low_temp_var = variance_low[6];
896       } else if (!mi_y && mi_x) {
897         force_skip_low_temp_var = variance_low[7];
898       } else if (mi_y && mi_x) {
899         force_skip_low_temp_var = variance_low[8];
900       }
901       break;
902     case BLOCK_32X16:
903     case BLOCK_16X32:
904     case BLOCK_16X16:
905       force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
906       break;
907     default: break;
908   }
909 
910   return force_skip_low_temp_var;
911 }
912 
av1_get_force_skip_low_temp_var(const uint8_t * variance_low,int mi_row,int mi_col,BLOCK_SIZE bsize)913 int av1_get_force_skip_low_temp_var(const uint8_t *variance_low, int mi_row,
914                                     int mi_col, BLOCK_SIZE bsize) {
915   int force_skip_low_temp_var = 0;
916   int x, y;
917   x = (mi_col & 0x1F) >> 4;
918   // y = (mi_row & 0x1F) >> 4;
919   // const int idx64 = (y << 1) + x;
920   y = (mi_row & 0x17) >> 3;
921   const int idx64 = y + x;
922 
923   x = (mi_col & 0xF) >> 3;
924   // y = (mi_row & 0xF) >> 3;
925   // const int idx32 = (y << 1) + x;
926   y = (mi_row & 0xB) >> 2;
927   const int idx32 = y + x;
928 
929   x = (mi_col & 0x7) >> 2;
930   // y = (mi_row & 0x7) >> 2;
931   // const int idx16 = (y << 1) + x;
932   y = (mi_row & 0x5) >> 1;
933   const int idx16 = y + x;
934   // Set force_skip_low_temp_var based on the block size and block offset.
935   switch (bsize) {
936     case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break;
937     case BLOCK_128X64:
938       assert((mi_col & 0x1F) == 0);
939       force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)];
940       break;
941     case BLOCK_64X128:
942       assert((mi_row & 0x1F) == 0);
943       force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)];
944       break;
945     case BLOCK_64X64:
946       // Location of this 64x64 block inside the 128x128 superblock
947       force_skip_low_temp_var = variance_low[5 + idx64];
948       break;
949     case BLOCK_64X32:
950       x = (mi_col & 0x1F) >> 4;
951       y = (mi_row & 0x1F) >> 3;
952       /*
953       .---------------.---------------.
954       | x=0,y=0,idx=0 | x=0,y=0,idx=2 |
955       :---------------+---------------:
956       | x=0,y=1,idx=1 | x=1,y=1,idx=3 |
957       :---------------+---------------:
958       | x=0,y=2,idx=4 | x=1,y=2,idx=6 |
959       :---------------+---------------:
960       | x=0,y=3,idx=5 | x=1,y=3,idx=7 |
961       '---------------'---------------'
962       */
963       const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2);
964       force_skip_low_temp_var = variance_low[9 + idx64x32];
965       break;
966     case BLOCK_32X64:
967       x = (mi_col & 0x1F) >> 3;
968       y = (mi_row & 0x1F) >> 4;
969       const int idx32x64 = (y << 2) + x;
970       force_skip_low_temp_var = variance_low[17 + idx32x64];
971       break;
972     case BLOCK_32X32:
973       force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32];
974       break;
975     case BLOCK_32X16:
976     case BLOCK_16X32:
977     case BLOCK_16X16:
978       force_skip_low_temp_var =
979           variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16];
980       break;
981     default: break;
982   }
983   return force_skip_low_temp_var;
984 }
985 
av1_set_variance_partition_thresholds(AV1_COMP * cpi,int qindex,int content_lowsumdiff)986 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int qindex,
987                                            int content_lowsumdiff) {
988   SPEED_FEATURES *const sf = &cpi->sf;
989   if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) {
990     return;
991   } else {
992     set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, 0, qindex,
993                        content_lowsumdiff, 0, 0, 0, 0);
994     // The threshold below is not changed locally.
995     cpi->vbp_info.threshold_minmax = 15 + (qindex >> 3);
996   }
997 }
998 
chroma_check(AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int y_sad,unsigned int y_sad_g,unsigned int y_sad_alt,bool is_key_frame,bool zero_motion,unsigned int * uv_sad)999 static AOM_INLINE void chroma_check(AV1_COMP *cpi, MACROBLOCK *x,
1000                                     BLOCK_SIZE bsize, unsigned int y_sad,
1001                                     unsigned int y_sad_g,
1002                                     unsigned int y_sad_alt, bool is_key_frame,
1003                                     bool zero_motion, unsigned int *uv_sad) {
1004   MACROBLOCKD *xd = &x->e_mbd;
1005   const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
1006   int shift_upper_limit = 1;
1007   int shift_lower_limit = 3;
1008   int fac_uv = 6;
1009   if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return;
1010 
1011   // Use lower threshold (more conservative in setting color flag) for
1012   // higher resolutions non-screen, which tend to have more camera noise.
1013   // Since this may be used to skip compound mode in nonrd pickmode, which
1014   // is generally more effective for higher resolutions, better to be more
1015   // conservative.
1016   if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) {
1017     if (cpi->common.width * cpi->common.height >= RESOLUTION_1080P)
1018       fac_uv = 3;
1019     else
1020       fac_uv = 5;
1021   }
1022   if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
1023       cpi->rc.high_source_sad) {
1024     shift_lower_limit = 7;
1025   } else if (source_sad_nonrd >= kMedSad && x->source_variance > 500 &&
1026              cpi->common.width * cpi->common.height >= 640 * 360) {
1027     shift_upper_limit = 2;
1028     shift_lower_limit = source_sad_nonrd > kMedSad ? 5 : 4;
1029   }
1030 
1031   MB_MODE_INFO *mi = xd->mi[0];
1032   const AV1_COMMON *const cm = &cpi->common;
1033   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
1034   const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
1035   const YV12_BUFFER_CONFIG *yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
1036   const struct scale_factors *const sf =
1037       get_ref_scale_factors_const(cm, LAST_FRAME);
1038   struct buf_2d dst;
1039   unsigned int uv_sad_g = 0;
1040   unsigned int uv_sad_alt = 0;
1041 
1042   for (int plane = AOM_PLANE_U; plane < MAX_MB_PLANE; ++plane) {
1043     struct macroblock_plane *p = &x->plane[plane];
1044     struct macroblockd_plane *pd = &xd->plane[plane];
1045     const BLOCK_SIZE bs =
1046         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1047 
1048     if (bs != BLOCK_INVALID) {
1049       // For last:
1050       if (zero_motion) {
1051         if (mi->ref_frame[0] == LAST_FRAME) {
1052           uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
1053               p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride);
1054         } else {
1055           uint8_t *src = (plane == 1) ? yv12->u_buffer : yv12->v_buffer;
1056           setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12->uv_crop_width,
1057                            yv12->uv_crop_height, yv12->uv_stride, xd->mi_row,
1058                            xd->mi_col, sf, xd->plane[plane].subsampling_x,
1059                            xd->plane[plane].subsampling_y);
1060 
1061           uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
1062               p->src.buf, p->src.stride, dst.buf, dst.stride);
1063         }
1064       } else {
1065         uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
1066             p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride);
1067       }
1068 
1069       // For golden:
1070       if (y_sad_g != UINT_MAX) {
1071         uint8_t *src = (plane == 1) ? yv12_g->u_buffer : yv12_g->v_buffer;
1072         setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width,
1073                          yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row,
1074                          xd->mi_col, sf, xd->plane[plane].subsampling_x,
1075                          xd->plane[plane].subsampling_y);
1076         uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf,
1077                                             dst.stride);
1078       }
1079 
1080       // For altref:
1081       if (y_sad_alt != UINT_MAX) {
1082         uint8_t *src = (plane == 1) ? yv12_alt->u_buffer : yv12_alt->v_buffer;
1083         setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_alt->uv_crop_width,
1084                          yv12_alt->uv_crop_height, yv12_alt->uv_stride,
1085                          xd->mi_row, xd->mi_col, sf,
1086                          xd->plane[plane].subsampling_x,
1087                          xd->plane[plane].subsampling_y);
1088         uv_sad_alt = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
1089                                               dst.buf, dst.stride);
1090       }
1091     }
1092 
1093     if (uv_sad[plane - 1] > (y_sad >> shift_upper_limit))
1094       x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 1;
1095     else if (uv_sad[plane - 1] < (y_sad >> shift_lower_limit))
1096       x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 0;
1097     // Borderline case: to be refined at coding block level in nonrd_pickmode,
1098     // for coding block size < sb_size.
1099     else
1100       x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 2;
1101 
1102     x->color_sensitivity_sb_g[COLOR_SENS_IDX(plane)] =
1103         uv_sad_g > y_sad_g / fac_uv;
1104     x->color_sensitivity_sb_alt[COLOR_SENS_IDX(plane)] =
1105         uv_sad_alt > y_sad_alt / fac_uv;
1106   }
1107 }
1108 
fill_variance_tree_leaves(AV1_COMP * cpi,MACROBLOCK * x,VP128x128 * vt,PART_EVAL_STATUS * force_split,int avg_16x16[][4],int maxvar_16x16[][4],int minvar_16x16[][4],int64_t * thresholds,const uint8_t * src_buf,int src_stride,const uint8_t * dst_buf,int dst_stride,bool is_key_frame,const bool is_small_sb)1109 static void fill_variance_tree_leaves(
1110     AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, PART_EVAL_STATUS *force_split,
1111     int avg_16x16[][4], int maxvar_16x16[][4], int minvar_16x16[][4],
1112     int64_t *thresholds, const uint8_t *src_buf, int src_stride,
1113     const uint8_t *dst_buf, int dst_stride, bool is_key_frame,
1114     const bool is_small_sb) {
1115   MACROBLOCKD *xd = &x->e_mbd;
1116   const int num_64x64_blocks = is_small_sb ? 1 : 4;
1117   // TODO(kyslov) Bring back compute_minmax_variance with content type detection
1118   const int compute_minmax_variance = 0;
1119   const int segment_id = xd->mi[0]->segment_id;
1120   int pixels_wide = 128, pixels_high = 128;
1121   int border_offset_4x4 = 0;
1122   int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf;
1123   // dst_buf pointer is not used for is_key_frame, so it should be NULL.
1124   assert(IMPLIES(is_key_frame, dst_buf == NULL));
1125   if (is_small_sb) {
1126     pixels_wide = 64;
1127     pixels_high = 64;
1128   }
1129   if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
1130   if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
1131 #if CONFIG_AV1_TEMPORAL_DENOISING
1132   temporal_denoising |= cpi->oxcf.noise_sensitivity;
1133 #endif
1134   // For temporal filtering or temporal denoiser enabled: since the source
1135   // is modified we need to avoid 4x4 avg along superblock boundary, since
1136   // simd code will load 8 pixels for 4x4 avg and so can access source
1137   // data outside superblock (while its being modified by temporal filter).
1138   // Temporal filtering is never done on key frames.
1139   if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4;
1140   for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; blk64_idx++) {
1141     const int x64_idx = GET_BLK_IDX_X(blk64_idx, 6);
1142     const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 6);
1143     const int blk64_scale_idx = blk64_idx << 2;
1144     force_split[blk64_idx + 1] = PART_EVAL_ALL;
1145 
1146     for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
1147       const int x32_idx = x64_idx + GET_BLK_IDX_X(lvl1_idx, 5);
1148       const int y32_idx = y64_idx + GET_BLK_IDX_Y(lvl1_idx, 5);
1149       const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
1150       force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ALL;
1151       avg_16x16[blk64_idx][lvl1_idx] = 0;
1152       maxvar_16x16[blk64_idx][lvl1_idx] = 0;
1153       minvar_16x16[blk64_idx][lvl1_idx] = INT_MAX;
1154       for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
1155         const int x16_idx = x32_idx + GET_BLK_IDX_X(lvl2_idx, 4);
1156         const int y16_idx = y32_idx + GET_BLK_IDX_Y(lvl2_idx, 4);
1157         const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
1158         VP16x16 *vst = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
1159         force_split[split_index] = PART_EVAL_ALL;
1160         if (is_key_frame) {
1161           // Go down to 4x4 down-sampling for variance.
1162           for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) {
1163             const int x8_idx = x16_idx + GET_BLK_IDX_X(lvl3_idx, 3);
1164             const int y8_idx = y16_idx + GET_BLK_IDX_Y(lvl3_idx, 3);
1165             VP8x8 *vst2 = &vst->split[lvl3_idx];
1166             fill_variance_4x4avg(src_buf, src_stride, x8_idx, y8_idx, vst2,
1167 #if CONFIG_AV1_HIGHBITDEPTH
1168                                  xd->cur_buf->flags,
1169 #endif
1170                                  pixels_wide, pixels_high, border_offset_4x4);
1171           }
1172         } else {
1173           fill_variance_8x8avg(src_buf, src_stride, dst_buf, dst_stride,
1174                                x16_idx, y16_idx, vst, is_cur_buf_hbd(xd),
1175                                pixels_wide, pixels_high);
1176 
1177           fill_variance_tree(vst, BLOCK_16X16);
1178           VPartVar *none_var = &vt->split[blk64_idx]
1179                                     .split[lvl1_idx]
1180                                     .split[lvl2_idx]
1181                                     .part_variances.none;
1182           get_variance(none_var);
1183           const int val_none_var = none_var->variance;
1184           avg_16x16[blk64_idx][lvl1_idx] += val_none_var;
1185           minvar_16x16[blk64_idx][lvl1_idx] =
1186               AOMMIN(minvar_16x16[blk64_idx][lvl1_idx], val_none_var);
1187           maxvar_16x16[blk64_idx][lvl1_idx] =
1188               AOMMAX(maxvar_16x16[blk64_idx][lvl1_idx], val_none_var);
1189           if (val_none_var > thresholds[3]) {
1190             // 16X16 variance is above threshold for split, so force split to
1191             // 8x8 for this 16x16 block (this also forces splits for upper
1192             // levels).
1193             force_split[split_index] = PART_EVAL_ONLY_SPLIT;
1194             force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1195             force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1196             force_split[0] = PART_EVAL_ONLY_SPLIT;
1197           } else if (!cyclic_refresh_segment_id_boosted(segment_id) &&
1198                      compute_minmax_variance && val_none_var > thresholds[2]) {
1199             // We have some nominal amount of 16x16 variance (based on average),
1200             // compute the minmax over the 8x8 sub-blocks, and if above
1201             // threshold, force split to 8x8 block for this 16x16 block.
1202             int minmax = compute_minmax_8x8(src_buf, src_stride, dst_buf,
1203                                             dst_stride, x16_idx, y16_idx,
1204 #if CONFIG_AV1_HIGHBITDEPTH
1205                                             xd->cur_buf->flags,
1206 #endif
1207                                             pixels_wide, pixels_high);
1208             const int thresh_minmax = (int)cpi->vbp_info.threshold_minmax;
1209             if (minmax > thresh_minmax) {
1210               force_split[split_index] = PART_EVAL_ONLY_SPLIT;
1211               force_split[5 + blk64_scale_idx + lvl1_idx] =
1212                   PART_EVAL_ONLY_SPLIT;
1213               force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1214               force_split[0] = PART_EVAL_ONLY_SPLIT;
1215             }
1216           }
1217         }
1218       }
1219     }
1220   }
1221 }
1222 
set_ref_frame_for_partition(AV1_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,MV_REFERENCE_FRAME * ref_frame_partition,MB_MODE_INFO * mi,unsigned int * y_sad,unsigned int * y_sad_g,unsigned int * y_sad_alt,const YV12_BUFFER_CONFIG * yv12_g,const YV12_BUFFER_CONFIG * yv12_alt,int mi_row,int mi_col,int num_planes)1223 static AOM_INLINE void set_ref_frame_for_partition(
1224     AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
1225     MV_REFERENCE_FRAME *ref_frame_partition, MB_MODE_INFO *mi,
1226     unsigned int *y_sad, unsigned int *y_sad_g, unsigned int *y_sad_alt,
1227     const YV12_BUFFER_CONFIG *yv12_g, const YV12_BUFFER_CONFIG *yv12_alt,
1228     int mi_row, int mi_col, int num_planes) {
1229   AV1_COMMON *const cm = &cpi->common;
1230   const bool is_set_golden_ref_frame =
1231       *y_sad_g < 0.9 * *y_sad && *y_sad_g < *y_sad_alt;
1232   const bool is_set_altref_ref_frame =
1233       *y_sad_alt < 0.9 * *y_sad && *y_sad_alt < *y_sad_g;
1234 
1235   if (is_set_golden_ref_frame) {
1236     av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1237                          get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
1238     mi->ref_frame[0] = GOLDEN_FRAME;
1239     mi->mv[0].as_int = 0;
1240     *y_sad = *y_sad_g;
1241     *ref_frame_partition = GOLDEN_FRAME;
1242     x->nonrd_prune_ref_frame_search = 0;
1243     x->sb_me_partition = 0;
1244   } else if (is_set_altref_ref_frame) {
1245     av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
1246                          get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
1247     mi->ref_frame[0] = ALTREF_FRAME;
1248     mi->mv[0].as_int = 0;
1249     *y_sad = *y_sad_alt;
1250     *ref_frame_partition = ALTREF_FRAME;
1251     x->nonrd_prune_ref_frame_search = 0;
1252     x->sb_me_partition = 0;
1253   } else {
1254     *ref_frame_partition = LAST_FRAME;
1255     x->nonrd_prune_ref_frame_search =
1256         cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1257   }
1258 }
1259 
mv_distance(const FULLPEL_MV * mv0,const FULLPEL_MV * mv1)1260 static AOM_FORCE_INLINE int mv_distance(const FULLPEL_MV *mv0,
1261                                         const FULLPEL_MV *mv1) {
1262   return abs(mv0->row - mv1->row) + abs(mv0->col - mv1->col);
1263 }
1264 
evaluate_neighbour_mvs(AV1_COMP * cpi,MACROBLOCK * x,unsigned int * y_sad,bool is_small_sb,int est_motion)1265 static AOM_INLINE void evaluate_neighbour_mvs(AV1_COMP *cpi, MACROBLOCK *x,
1266                                               unsigned int *y_sad,
1267                                               bool is_small_sb,
1268                                               int est_motion) {
1269   const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
1270   // TODO(yunqingwang@google.com): test if this condition works with other
1271   // speeds.
1272   if (est_motion > 2 && source_sad_nonrd > kMedSad) return;
1273 
1274   MACROBLOCKD *xd = &x->e_mbd;
1275   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1276   MB_MODE_INFO *mi = xd->mi[0];
1277 
1278   unsigned int above_y_sad = UINT_MAX;
1279   unsigned int left_y_sad = UINT_MAX;
1280   FULLPEL_MV above_mv = kZeroFullMv;
1281   FULLPEL_MV left_mv = kZeroFullMv;
1282   SubpelMvLimits subpel_mv_limits;
1283   const MV dummy_mv = { 0, 0 };
1284   av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, &dummy_mv);
1285 
1286   // Current best MV
1287   FULLPEL_MV best_mv = get_fullmv_from_mv(&mi->mv[0].as_mv);
1288   const int multi = (est_motion > 2 && source_sad_nonrd > kLowSad) ? 7 : 8;
1289 
1290   if (xd->up_available) {
1291     const MB_MODE_INFO *above_mbmi = xd->above_mbmi;
1292     if (above_mbmi->mode >= INTRA_MODE_END &&
1293         above_mbmi->ref_frame[0] == LAST_FRAME) {
1294       MV temp = above_mbmi->mv[0].as_mv;
1295       clamp_mv(&temp, &subpel_mv_limits);
1296       above_mv = get_fullmv_from_mv(&temp);
1297 
1298       if (mv_distance(&best_mv, &above_mv) > 0) {
1299         uint8_t const *ref_buf =
1300             get_buf_from_fullmv(&xd->plane[0].pre[0], &above_mv);
1301         above_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1302             x->plane[0].src.buf, x->plane[0].src.stride, ref_buf,
1303             xd->plane[0].pre[0].stride);
1304       }
1305     }
1306   }
1307   if (xd->left_available) {
1308     const MB_MODE_INFO *left_mbmi = xd->left_mbmi;
1309     if (left_mbmi->mode >= INTRA_MODE_END &&
1310         left_mbmi->ref_frame[0] == LAST_FRAME) {
1311       MV temp = left_mbmi->mv[0].as_mv;
1312       clamp_mv(&temp, &subpel_mv_limits);
1313       left_mv = get_fullmv_from_mv(&temp);
1314 
1315       if (mv_distance(&best_mv, &left_mv) > 0 &&
1316           mv_distance(&above_mv, &left_mv) > 0) {
1317         uint8_t const *ref_buf =
1318             get_buf_from_fullmv(&xd->plane[0].pre[0], &left_mv);
1319         left_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1320             x->plane[0].src.buf, x->plane[0].src.stride, ref_buf,
1321             xd->plane[0].pre[0].stride);
1322       }
1323     }
1324   }
1325 
1326   if (above_y_sad < ((multi * *y_sad) >> 3) && above_y_sad < left_y_sad) {
1327     *y_sad = above_y_sad;
1328     mi->mv[0].as_mv = get_mv_from_fullmv(&above_mv);
1329     clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
1330   }
1331   if (left_y_sad < ((multi * *y_sad) >> 3) && left_y_sad < above_y_sad) {
1332     *y_sad = left_y_sad;
1333     mi->mv[0].as_mv = get_mv_from_fullmv(&left_mv);
1334     clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
1335   }
1336 }
1337 
setup_planes(AV1_COMP * cpi,MACROBLOCK * x,unsigned int * y_sad,unsigned int * y_sad_g,unsigned int * y_sad_alt,unsigned int * y_sad_last,MV_REFERENCE_FRAME * ref_frame_partition,struct scale_factors * sf_no_scale,int mi_row,int mi_col,bool is_small_sb,bool scaled_ref_last)1338 static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
1339                          unsigned int *y_sad_g, unsigned int *y_sad_alt,
1340                          unsigned int *y_sad_last,
1341                          MV_REFERENCE_FRAME *ref_frame_partition,
1342                          struct scale_factors *sf_no_scale, int mi_row,
1343                          int mi_col, bool is_small_sb, bool scaled_ref_last) {
1344   AV1_COMMON *const cm = &cpi->common;
1345   MACROBLOCKD *xd = &x->e_mbd;
1346   const int num_planes = av1_num_planes(cm);
1347   bool scaled_ref_golden = false;
1348   bool scaled_ref_alt = false;
1349   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1350   MB_MODE_INFO *mi = xd->mi[0];
1351   const YV12_BUFFER_CONFIG *yv12 =
1352       scaled_ref_last ? av1_get_scaled_ref_frame(cpi, LAST_FRAME)
1353                       : get_ref_frame_yv12_buf(cm, LAST_FRAME);
1354   assert(yv12 != NULL);
1355   const YV12_BUFFER_CONFIG *yv12_g = NULL;
1356   const YV12_BUFFER_CONFIG *yv12_alt = NULL;
1357   // Check if LAST is a reference. For spatial layers always use it as
1358   // reference scaling.
1359   int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) ||
1360                      cpi->svc.number_spatial_layers > 1;
1361   int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG;
1362   int use_alt_ref = cpi->ppi->rtc_ref.set_ref_frame_config ||
1363                     cpi->sf.rt_sf.use_nonrd_altref_frame ||
1364                     (cpi->sf.rt_sf.use_comp_ref_nonrd &&
1365                      cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 1);
1366 
1367   // For 1 spatial layer: GOLDEN is another temporal reference.
1368   // Check if it should be used as reference for partitioning.
1369   if (cpi->svc.number_spatial_layers == 1 && use_golden_ref &&
1370       (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
1371     yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
1372     if (yv12_g && (yv12_g->y_crop_height != cm->height ||
1373                    yv12_g->y_crop_width != cm->width)) {
1374       yv12_g = av1_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
1375       scaled_ref_golden = true;
1376     }
1377     if (yv12_g && yv12_g != yv12) {
1378       av1_setup_pre_planes(
1379           xd, 0, yv12_g, mi_row, mi_col,
1380           scaled_ref_golden ? NULL : get_ref_scale_factors(cm, GOLDEN_FRAME),
1381           num_planes);
1382       *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf(
1383           x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
1384           xd->plane[AOM_PLANE_Y].pre[0].buf,
1385           xd->plane[AOM_PLANE_Y].pre[0].stride);
1386     }
1387   }
1388 
1389   // For 1 spatial layer: ALTREF is another temporal reference.
1390   // Check if it should be used as reference for partitioning.
1391   if (cpi->svc.number_spatial_layers == 1 && use_alt_ref &&
1392       (cpi->ref_frame_flags & AOM_ALT_FLAG) &&
1393       (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
1394     yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
1395     if (yv12_alt && (yv12_alt->y_crop_height != cm->height ||
1396                      yv12_alt->y_crop_width != cm->width)) {
1397       yv12_alt = av1_get_scaled_ref_frame(cpi, ALTREF_FRAME);
1398       scaled_ref_alt = true;
1399     }
1400     if (yv12_alt && yv12_alt != yv12) {
1401       av1_setup_pre_planes(
1402           xd, 0, yv12_alt, mi_row, mi_col,
1403           scaled_ref_alt ? NULL : get_ref_scale_factors(cm, ALTREF_FRAME),
1404           num_planes);
1405       *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf(
1406           x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
1407           xd->plane[AOM_PLANE_Y].pre[0].buf,
1408           xd->plane[AOM_PLANE_Y].pre[0].stride);
1409     }
1410   }
1411 
1412   if (use_last_ref) {
1413     const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
1414     av1_setup_pre_planes(
1415         xd, 0, yv12, mi_row, mi_col,
1416         scaled_ref_last ? NULL : get_ref_scale_factors(cm, LAST_FRAME),
1417         num_planes);
1418     mi->ref_frame[0] = LAST_FRAME;
1419     mi->ref_frame[1] = NONE_FRAME;
1420     mi->bsize = cm->seq_params->sb_size;
1421     mi->mv[0].as_int = 0;
1422     mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
1423 
1424     int est_motion = cpi->sf.rt_sf.estimate_motion_for_var_based_partition;
1425     // TODO(b/290596301): Look into adjusting this condition.
1426     // There is regression on color content when
1427     // estimate_motion_for_var_based_partition = 3 and high motion,
1428     // so for now force it to 2 based on superblock sad.
1429     if (est_motion > 2 && source_sad_nonrd > kMedSad) est_motion = 2;
1430 
1431     if (est_motion == 1 || est_motion == 2) {
1432       if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
1433         // For screen only do int_pro_motion for spatial variance above
1434         // threshold and motion level above LowSad.
1435         if (x->source_variance > 100 && source_sad_nonrd > kLowSad) {
1436           int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
1437           int me_search_size_col =
1438               is_screen ? 96 : block_size_wide[cm->seq_params->sb_size] >> 1;
1439           // For screen use larger search size row motion to capture
1440           // vertical scroll, which can be larger motion.
1441           int me_search_size_row =
1442               is_screen ? 192 : block_size_high[cm->seq_params->sb_size] >> 1;
1443           unsigned int y_sad_zero;
1444           *y_sad = av1_int_pro_motion_estimation(
1445               cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv,
1446               &y_sad_zero, me_search_size_col, me_search_size_row);
1447           // The logic below selects whether the motion estimated in the
1448           // int_pro_motion() will be used in nonrd_pickmode. Only do this
1449           // for screen for now.
1450           if (is_screen) {
1451             unsigned int thresh_sad =
1452                 (cm->seq_params->sb_size == BLOCK_128X128) ? 50000 : 20000;
1453             if (*y_sad < (y_sad_zero >> 1) && *y_sad < thresh_sad) {
1454               x->sb_me_partition = 1;
1455               x->sb_me_mv.as_int = mi->mv[0].as_int;
1456             } else {
1457               x->sb_me_partition = 0;
1458               // Fall back to using zero motion.
1459               *y_sad = y_sad_zero;
1460               mi->mv[0].as_int = 0;
1461             }
1462           }
1463         }
1464       }
1465     }
1466 
1467     if (*y_sad == UINT_MAX) {
1468       *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1469           x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
1470           xd->plane[AOM_PLANE_Y].pre[0].buf,
1471           xd->plane[AOM_PLANE_Y].pre[0].stride);
1472     }
1473 
1474     // Evaluate if neighbours' MVs give better predictions. Zero MV is tested
1475     // already, so only non-zero MVs are tested here. Here the neighbour blocks
1476     // are the first block above or left to this superblock.
1477     if (est_motion >= 2 && (xd->up_available || xd->left_available))
1478       evaluate_neighbour_mvs(cpi, x, y_sad, is_small_sb, est_motion);
1479 
1480     *y_sad_last = *y_sad;
1481   }
1482 
1483   // Pick the ref frame for partitioning, use golden or altref frame only if
1484   // its lower sad, bias to LAST with factor 0.9.
1485   set_ref_frame_for_partition(cpi, x, xd, ref_frame_partition, mi, y_sad,
1486                               y_sad_g, y_sad_alt, yv12_g, yv12_alt, mi_row,
1487                               mi_col, num_planes);
1488 
1489   // Only calculate the predictor for non-zero MV.
1490   if (mi->mv[0].as_int != 0) {
1491     if (!scaled_ref_last) {
1492       set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
1493     } else {
1494       xd->block_ref_scale_factors[0] = sf_no_scale;
1495       xd->block_ref_scale_factors[1] = sf_no_scale;
1496     }
1497     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
1498                                   cm->seq_params->sb_size, AOM_PLANE_Y,
1499                                   num_planes - 1);
1500   }
1501 }
1502 
1503 // Decides whether to split or merge a 16x16 partition block in variance based
1504 // partitioning based on the 8x8 sub-block variances.
get_part_eval_based_on_sub_blk_var(VP16x16 * var_16x16_info,int64_t threshold16)1505 static AOM_INLINE PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var(
1506     VP16x16 *var_16x16_info, int64_t threshold16) {
1507   int max_8x8_var = 0, min_8x8_var = INT_MAX;
1508   for (int split_idx = 0; split_idx < 4; split_idx++) {
1509     get_variance(&var_16x16_info->split[split_idx].part_variances.none);
1510     int this_8x8_var =
1511         var_16x16_info->split[split_idx].part_variances.none.variance;
1512     max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var);
1513     min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var);
1514   }
1515   // If the difference between maximum and minimum sub-block variances is high,
1516   // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate
1517   // only PARTITION_NONE. The shift factor for threshold16 has been derived
1518   // empirically.
1519   return ((max_8x8_var - min_8x8_var) > (threshold16 << 2))
1520              ? PART_EVAL_ONLY_SPLIT
1521              : PART_EVAL_ONLY_NONE;
1522 }
1523 
is_set_force_zeromv_skip_based_on_src_sad(int set_zeromv_skip_based_on_source_sad,SOURCE_SAD source_sad_nonrd)1524 static AOM_INLINE bool is_set_force_zeromv_skip_based_on_src_sad(
1525     int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) {
1526   if (set_zeromv_skip_based_on_source_sad == 0) return false;
1527 
1528   if (set_zeromv_skip_based_on_source_sad >= 3)
1529     return source_sad_nonrd <= kLowSad;
1530   else if (set_zeromv_skip_based_on_source_sad >= 2)
1531     return source_sad_nonrd <= kVeryLowSad;
1532   else if (set_zeromv_skip_based_on_source_sad >= 1)
1533     return source_sad_nonrd == kZeroSad;
1534 
1535   return false;
1536 }
1537 
set_force_zeromv_skip_for_sb(AV1_COMP * cpi,MACROBLOCK * x,const TileInfo * const tile,VP128x128 * vt,unsigned int * uv_sad,int mi_row,int mi_col,unsigned int y_sad,BLOCK_SIZE bsize)1538 static AOM_INLINE bool set_force_zeromv_skip_for_sb(
1539     AV1_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, VP128x128 *vt,
1540     unsigned int *uv_sad, int mi_row, int mi_col, unsigned int y_sad,
1541     BLOCK_SIZE bsize) {
1542   AV1_COMMON *const cm = &cpi->common;
1543   if (!is_set_force_zeromv_skip_based_on_src_sad(
1544           cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
1545           x->content_state_sb.source_sad_nonrd))
1546     return false;
1547   int shift = cpi->sf.rt_sf.increase_source_sad_thresh ? 1 : 0;
1548   const int block_width = mi_size_wide[cm->seq_params->sb_size];
1549   const int block_height = mi_size_high[cm->seq_params->sb_size];
1550   const unsigned int thresh_exit_part_y =
1551       cpi->zeromv_skip_thresh_exit_part[bsize] << shift;
1552   unsigned int thresh_exit_part_uv =
1553       CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y) << shift;
1554   // Be more aggressive in UV threshold if source_sad >= VeryLowSad
1555   // to suppreess visual artifact caused by the speed feature:
1556   // set_zeromv_skip_based_on_source_sad = 2. For now only for
1557   // part_early_exit_zeromv = 1.
1558   if (x->content_state_sb.source_sad_nonrd >= kVeryLowSad &&
1559       cpi->sf.rt_sf.part_early_exit_zeromv == 1)
1560     thresh_exit_part_uv = thresh_exit_part_uv >> 3;
1561   if (mi_col + block_width <= tile->mi_col_end &&
1562       mi_row + block_height <= tile->mi_row_end && y_sad < thresh_exit_part_y &&
1563       uv_sad[0] < thresh_exit_part_uv && uv_sad[1] < thresh_exit_part_uv) {
1564     set_block_size(cpi, mi_row, mi_col, bsize);
1565     x->force_zeromv_skip_for_sb = 1;
1566     aom_free(vt);
1567     // Partition shape is set here at SB level.
1568     // Exit needs to happen from av1_choose_var_based_partitioning().
1569     return true;
1570   } else if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
1571              cpi->sf.rt_sf.part_early_exit_zeromv >= 2)
1572     x->force_zeromv_skip_for_sb = 2;
1573   return false;
1574 }
1575 
av1_choose_var_based_partitioning(AV1_COMP * cpi,const TileInfo * const tile,ThreadData * td,MACROBLOCK * x,int mi_row,int mi_col)1576 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
1577                                       ThreadData *td, MACROBLOCK *x, int mi_row,
1578                                       int mi_col) {
1579 #if CONFIG_COLLECT_COMPONENT_TIMING
1580   start_timing(cpi, choose_var_based_partitioning_time);
1581 #endif
1582   AV1_COMMON *const cm = &cpi->common;
1583   MACROBLOCKD *xd = &x->e_mbd;
1584   const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds;
1585   PART_EVAL_STATUS force_split[85];
1586   int avg_64x64;
1587   int max_var_32x32[4];
1588   int min_var_32x32[4];
1589   int var_32x32;
1590   int var_64x64;
1591   int min_var_64x64 = INT_MAX;
1592   int max_var_64x64 = 0;
1593   int avg_16x16[4][4];
1594   int maxvar_16x16[4][4];
1595   int minvar_16x16[4][4];
1596   const uint8_t *src_buf;
1597   const uint8_t *dst_buf;
1598   int dst_stride;
1599   unsigned int uv_sad[MAX_MB_PLANE - 1];
1600   NOISE_LEVEL noise_level = kLow;
1601   bool is_zero_motion = true;
1602   bool scaled_ref_last = false;
1603   struct scale_factors sf_no_scale;
1604   av1_setup_scale_factors_for_frame(&sf_no_scale, cm->width, cm->height,
1605                                     cm->width, cm->height);
1606 
1607   bool is_key_frame =
1608       (frame_is_intra_only(cm) ||
1609        (cpi->ppi->use_svc &&
1610         cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
1611 
1612   assert(cm->seq_params->sb_size == BLOCK_64X64 ||
1613          cm->seq_params->sb_size == BLOCK_128X128);
1614   const bool is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
1615   const int num_64x64_blocks = is_small_sb ? 1 : 4;
1616 
1617   unsigned int y_sad = UINT_MAX;
1618   unsigned int y_sad_g = UINT_MAX;
1619   unsigned int y_sad_alt = UINT_MAX;
1620   unsigned int y_sad_last = UINT_MAX;
1621   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1622 
1623   // Ref frame used in partitioning.
1624   MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
1625 
1626   int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1],
1627                             vbp_thresholds[2], vbp_thresholds[3],
1628                             vbp_thresholds[4] };
1629 
1630   const int segment_id = xd->mi[0]->segment_id;
1631   uint64_t blk_sad = 0;
1632   if (cpi->src_sad_blk_64x64 != NULL &&
1633       cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
1634     const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
1635                                   ? (cm->seq_params->mib_size >> 1)
1636                                   : cm->seq_params->mib_size;
1637     const int sb_cols =
1638         (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
1639     const int sbi_col = mi_col / sb_size_by_mb;
1640     const int sbi_row = mi_row / sb_size_by_mb;
1641     blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
1642   }
1643 
1644   const bool is_segment_id_boosted =
1645       cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
1646       cyclic_refresh_segment_id_boosted(segment_id);
1647   const int qindex =
1648       is_segment_id_boosted
1649           ? av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex)
1650           : cm->quant_params.base_qindex;
1651   set_vbp_thresholds(
1652       cpi, thresholds, blk_sad, qindex, x->content_state_sb.low_sumdiff,
1653       x->content_state_sb.source_sad_nonrd, x->content_state_sb.source_sad_rd,
1654       is_segment_id_boosted, x->content_state_sb.lighting_change);
1655 
1656   src_buf = x->plane[AOM_PLANE_Y].src.buf;
1657   int src_stride = x->plane[AOM_PLANE_Y].src.stride;
1658 
1659   // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
1660   // 5-20 for the 16x16 blocks.
1661   force_split[0] = PART_EVAL_ALL;
1662   memset(x->part_search_info.variance_low, 0,
1663          sizeof(x->part_search_info.variance_low));
1664 
1665   // Check if LAST frame is NULL, and if so, treat this frame
1666   // as a key frame, for the purpose of the superblock partitioning.
1667   // LAST == NULL can happen in cases where enhancement spatial layers are
1668   // enabled dyanmically and the only reference is the spatial(GOLDEN).
1669   // If LAST frame has a different resolution: set the scaled_ref_last flag
1670   // and check if ref_scaled is NULL.
1671   if (!frame_is_intra_only(cm)) {
1672     const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, LAST_FRAME);
1673     if (ref == NULL) {
1674       is_key_frame = true;
1675     } else if (ref->y_crop_height != cm->height ||
1676                ref->y_crop_width != cm->width) {
1677       scaled_ref_last = true;
1678       const YV12_BUFFER_CONFIG *ref_scaled =
1679           av1_get_scaled_ref_frame(cpi, LAST_FRAME);
1680       if (ref_scaled == NULL) is_key_frame = true;
1681     }
1682   }
1683 
1684   x->source_variance = UINT_MAX;
1685   // For nord_pickmode: compute source_variance, only for superblocks with
1686   // some motion for now. This input can then be used to bias the partitioning
1687   // or the chroma_check.
1688   if (cpi->sf.rt_sf.use_nonrd_pick_mode &&
1689       x->content_state_sb.source_sad_nonrd > kLowSad)
1690     x->source_variance = av1_get_perpixel_variance_facade(
1691         cpi, xd, &x->plane[0].src, cm->seq_params->sb_size, AOM_PLANE_Y);
1692 
1693   if (!is_key_frame) {
1694     setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last,
1695                  &ref_frame_partition, &sf_no_scale, mi_row, mi_col,
1696                  is_small_sb, scaled_ref_last);
1697 
1698     MB_MODE_INFO *mi = xd->mi[0];
1699     // Use reference SB directly for zero mv.
1700     if (mi->mv[0].as_int != 0) {
1701       dst_buf = xd->plane[AOM_PLANE_Y].dst.buf;
1702       dst_stride = xd->plane[AOM_PLANE_Y].dst.stride;
1703       is_zero_motion = false;
1704     } else {
1705       dst_buf = xd->plane[AOM_PLANE_Y].pre[0].buf;
1706       dst_stride = xd->plane[AOM_PLANE_Y].pre[0].stride;
1707     }
1708   } else {
1709     dst_buf = NULL;
1710     dst_stride = 0;
1711   }
1712 
1713   // check and set the color sensitivity of sb.
1714   av1_zero(uv_sad);
1715   chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, y_sad_alt, is_key_frame,
1716                is_zero_motion, uv_sad);
1717 
1718   x->force_zeromv_skip_for_sb = 0;
1719 
1720   VP128x128 *vt;
1721   AOM_CHECK_MEM_ERROR(xd->error_info, vt, aom_malloc(sizeof(*vt)));
1722   vt->split = td->vt64x64;
1723 
1724   // If the superblock is completely static (zero source sad) and
1725   // the y_sad (relative to LAST ref) is very small, take the sb_size partition
1726   // and exit, and force zeromv_last skip mode for nonrd_pickmode.
1727   // Only do this on the base segment (so the QP-boosted segment, if applied,
1728   // can still continue cleaning/ramping up the quality).
1729   // Condition on color uv_sad is also added.
1730   if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv &&
1731       cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE &&
1732       ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0) {
1733     // Exit here, if zero mv skip flag is set at SB level.
1734     if (set_force_zeromv_skip_for_sb(cpi, x, tile, vt, uv_sad, mi_row, mi_col,
1735                                      y_sad, bsize))
1736       return 0;
1737   }
1738 
1739   if (cpi->noise_estimate.enabled)
1740     noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
1741 
1742   // Fill in the entire tree of 8x8 (for inter frames) or 4x4 (for key frames)
1743   // variances for splits.
1744   fill_variance_tree_leaves(cpi, x, vt, force_split, avg_16x16, maxvar_16x16,
1745                             minvar_16x16, thresholds, src_buf, src_stride,
1746                             dst_buf, dst_stride, is_key_frame, is_small_sb);
1747 
1748   avg_64x64 = 0;
1749   for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) {
1750     max_var_32x32[blk64_idx] = 0;
1751     min_var_32x32[blk64_idx] = INT_MAX;
1752     const int blk64_scale_idx = blk64_idx << 2;
1753     for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
1754       const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
1755       for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
1756         if (!is_key_frame) continue;
1757         VP16x16 *vtemp = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
1758         for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++)
1759           fill_variance_tree(&vtemp->split[lvl3_idx], BLOCK_8X8);
1760         fill_variance_tree(vtemp, BLOCK_16X16);
1761         // If variance of this 16x16 block is above the threshold, force block
1762         // to split. This also forces a split on the upper levels.
1763         get_variance(&vtemp->part_variances.none);
1764         if (vtemp->part_variances.none.variance > thresholds[3]) {
1765           const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
1766           force_split[split_index] =
1767               cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var
1768                   ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3])
1769                   : PART_EVAL_ONLY_SPLIT;
1770           force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1771           force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1772           force_split[0] = PART_EVAL_ONLY_SPLIT;
1773         }
1774       }
1775       fill_variance_tree(&vt->split[blk64_idx].split[lvl1_idx], BLOCK_32X32);
1776       // If variance of this 32x32 block is above the threshold, or if its above
1777       // (some threshold of) the average variance over the sub-16x16 blocks,
1778       // then force this block to split. This also forces a split on the upper
1779       // (64x64) level.
1780       uint64_t frame_sad_thresh = 20000;
1781       const int is_360p_or_smaller = cm->width * cm->height <= RESOLUTION_360P;
1782       if (cpi->svc.number_temporal_layers > 2 &&
1783           cpi->svc.temporal_layer_id == 0)
1784         frame_sad_thresh = frame_sad_thresh << 1;
1785       if (force_split[5 + blk64_scale_idx + lvl1_idx] == PART_EVAL_ALL) {
1786         get_variance(&vt->split[blk64_idx].split[lvl1_idx].part_variances.none);
1787         var_32x32 =
1788             vt->split[blk64_idx].split[lvl1_idx].part_variances.none.variance;
1789         max_var_32x32[blk64_idx] = AOMMAX(var_32x32, max_var_32x32[blk64_idx]);
1790         min_var_32x32[blk64_idx] = AOMMIN(var_32x32, min_var_32x32[blk64_idx]);
1791         const int max_min_var_16X16_diff = (maxvar_16x16[blk64_idx][lvl1_idx] -
1792                                             minvar_16x16[blk64_idx][lvl1_idx]);
1793 
1794         if (var_32x32 > thresholds[2] ||
1795             (!is_key_frame && var_32x32 > (thresholds[2] >> 1) &&
1796              var_32x32 > (avg_16x16[blk64_idx][lvl1_idx] >> 1))) {
1797           force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1798           force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1799           force_split[0] = PART_EVAL_ONLY_SPLIT;
1800         } else if (!is_key_frame && is_360p_or_smaller &&
1801                    ((max_min_var_16X16_diff > (thresholds[2] >> 1) &&
1802                      maxvar_16x16[blk64_idx][lvl1_idx] > thresholds[2]) ||
1803                     (cpi->sf.rt_sf.prefer_large_partition_blocks &&
1804                      x->content_state_sb.source_sad_nonrd > kLowSad &&
1805                      cpi->rc.frame_source_sad < frame_sad_thresh &&
1806                      maxvar_16x16[blk64_idx][lvl1_idx] > (thresholds[2] >> 4) &&
1807                      maxvar_16x16[blk64_idx][lvl1_idx] >
1808                          (minvar_16x16[blk64_idx][lvl1_idx] << 2)))) {
1809           force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1810           force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1811           force_split[0] = PART_EVAL_ONLY_SPLIT;
1812         }
1813       }
1814     }
1815     if (force_split[1 + blk64_idx] == PART_EVAL_ALL) {
1816       fill_variance_tree(&vt->split[blk64_idx], BLOCK_64X64);
1817       get_variance(&vt->split[blk64_idx].part_variances.none);
1818       var_64x64 = vt->split[blk64_idx].part_variances.none.variance;
1819       max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
1820       min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
1821       // If the difference of the max-min variances of sub-blocks or max
1822       // variance of a sub-block is above some threshold of then force this
1823       // block to split. Only checking this for noise level >= medium, if
1824       // encoder is in SVC or if we already forced large blocks.
1825       const int max_min_var_32x32_diff =
1826           max_var_32x32[blk64_idx] - min_var_32x32[blk64_idx];
1827       const int check_max_var = max_var_32x32[blk64_idx] > thresholds[1] >> 1;
1828       const bool check_noise_lvl = noise_level >= kMedium ||
1829                                    cpi->ppi->use_svc ||
1830                                    cpi->sf.rt_sf.prefer_large_partition_blocks;
1831       const int64_t set_threshold = 3 * (thresholds[1] >> 3);
1832 
1833       if (!is_key_frame && max_min_var_32x32_diff > set_threshold &&
1834           check_max_var && check_noise_lvl) {
1835         force_split[1 + blk64_idx] = PART_EVAL_ONLY_SPLIT;
1836         force_split[0] = PART_EVAL_ONLY_SPLIT;
1837       }
1838       avg_64x64 += var_64x64;
1839     }
1840     if (is_small_sb) force_split[0] = PART_EVAL_ONLY_SPLIT;
1841   }
1842 
1843   if (force_split[0] == PART_EVAL_ALL) {
1844     fill_variance_tree(vt, BLOCK_128X128);
1845     get_variance(&vt->part_variances.none);
1846     const int set_avg_64x64 = (9 * avg_64x64) >> 5;
1847     if (!is_key_frame && vt->part_variances.none.variance > set_avg_64x64)
1848       force_split[0] = PART_EVAL_ONLY_SPLIT;
1849 
1850     if (!is_key_frame &&
1851         (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
1852         max_var_64x64 > thresholds[0] >> 1)
1853       force_split[0] = PART_EVAL_ONLY_SPLIT;
1854   }
1855 
1856   if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end ||
1857       !set_vt_partitioning(cpi, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
1858                            thresholds[0], BLOCK_16X16, force_split[0])) {
1859     for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) {
1860       const int x64_idx = GET_BLK_IDX_X(blk64_idx, 4);
1861       const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 4);
1862       const int blk64_scale_idx = blk64_idx << 2;
1863 
1864       // Now go through the entire structure, splitting every block size until
1865       // we get to one that's got a variance lower than our threshold.
1866       if (set_vt_partitioning(cpi, xd, tile, &vt->split[blk64_idx], BLOCK_64X64,
1867                               mi_row + y64_idx, mi_col + x64_idx, thresholds[1],
1868                               BLOCK_16X16, force_split[1 + blk64_idx]))
1869         continue;
1870       for (int lvl1_idx = 0; lvl1_idx < 4; ++lvl1_idx) {
1871         const int x32_idx = GET_BLK_IDX_X(lvl1_idx, 3);
1872         const int y32_idx = GET_BLK_IDX_Y(lvl1_idx, 3);
1873         const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
1874         if (set_vt_partitioning(
1875                 cpi, xd, tile, &vt->split[blk64_idx].split[lvl1_idx],
1876                 BLOCK_32X32, (mi_row + y64_idx + y32_idx),
1877                 (mi_col + x64_idx + x32_idx), thresholds[2], BLOCK_16X16,
1878                 force_split[5 + blk64_scale_idx + lvl1_idx]))
1879           continue;
1880         for (int lvl2_idx = 0; lvl2_idx < 4; ++lvl2_idx) {
1881           const int x16_idx = GET_BLK_IDX_X(lvl2_idx, 2);
1882           const int y16_idx = GET_BLK_IDX_Y(lvl2_idx, 2);
1883           const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
1884           VP16x16 *vtemp =
1885               &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
1886           if (set_vt_partitioning(cpi, xd, tile, vtemp, BLOCK_16X16,
1887                                   mi_row + y64_idx + y32_idx + y16_idx,
1888                                   mi_col + x64_idx + x32_idx + x16_idx,
1889                                   thresholds[3], BLOCK_8X8,
1890                                   force_split[split_index]))
1891             continue;
1892           for (int lvl3_idx = 0; lvl3_idx < 4; ++lvl3_idx) {
1893             const int x8_idx = GET_BLK_IDX_X(lvl3_idx, 1);
1894             const int y8_idx = GET_BLK_IDX_Y(lvl3_idx, 1);
1895             set_block_size(cpi, (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
1896                            (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
1897                            BLOCK_8X8);
1898           }
1899         }
1900       }
1901     }
1902   }
1903 
1904   if (cpi->sf.rt_sf.short_circuit_low_temp_var) {
1905     set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds,
1906                           ref_frame_partition, mi_col, mi_row, is_small_sb);
1907   }
1908 
1909   aom_free(vt);
1910 #if CONFIG_COLLECT_COMPONENT_TIMING
1911   end_timing(cpi, choose_var_based_partitioning_time);
1912 #endif
1913   return 0;
1914 }
1915