• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <math.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 
17 #include "config/aom_config.h"
18 #include "config/aom_dsp_rtcd.h"
19 #include "config/av1_rtcd.h"
20 
21 #include "aom_dsp/aom_dsp_common.h"
22 #include "aom_dsp/binary_codes_writer.h"
23 #include "aom_ports/mem.h"
24 #include "aom_ports/aom_timer.h"
25 
26 #include "av1/common/reconinter.h"
27 #include "av1/common/blockd.h"
28 
29 #include "av1/encoder/encodeframe.h"
30 #include "av1/encoder/var_based_part.h"
31 #include "av1/encoder/reconinter_enc.h"
32 
33 extern const uint8_t AV1_VAR_OFFS[];
34 
35 // Possible values for the force_split variable while evaluating variance based
36 // partitioning.
37 enum {
38   // Evaluate all partition types
39   PART_EVAL_ALL = 0,
40   // Force PARTITION_SPLIT
41   PART_EVAL_ONLY_SPLIT = 1,
42   // Force PARTITION_NONE
43   PART_EVAL_ONLY_NONE = 2
44 } UENUM1BYTE(PART_EVAL_STATUS);
45 
46 typedef struct {
47   VPVariance *part_variances;
48   VPartVar *split[4];
49 } variance_node;
50 
tree_to_node(void * data,BLOCK_SIZE bsize,variance_node * node)51 static AOM_INLINE void tree_to_node(void *data, BLOCK_SIZE bsize,
52                                     variance_node *node) {
53   int i;
54   node->part_variances = NULL;
55   switch (bsize) {
56     case BLOCK_128X128: {
57       VP128x128 *vt = (VP128x128 *)data;
58       node->part_variances = &vt->part_variances;
59       for (i = 0; i < 4; i++)
60         node->split[i] = &vt->split[i].part_variances.none;
61       break;
62     }
63     case BLOCK_64X64: {
64       VP64x64 *vt = (VP64x64 *)data;
65       node->part_variances = &vt->part_variances;
66       for (i = 0; i < 4; i++)
67         node->split[i] = &vt->split[i].part_variances.none;
68       break;
69     }
70     case BLOCK_32X32: {
71       VP32x32 *vt = (VP32x32 *)data;
72       node->part_variances = &vt->part_variances;
73       for (i = 0; i < 4; i++)
74         node->split[i] = &vt->split[i].part_variances.none;
75       break;
76     }
77     case BLOCK_16X16: {
78       VP16x16 *vt = (VP16x16 *)data;
79       node->part_variances = &vt->part_variances;
80       for (i = 0; i < 4; i++)
81         node->split[i] = &vt->split[i].part_variances.none;
82       break;
83     }
84     case BLOCK_8X8: {
85       VP8x8 *vt = (VP8x8 *)data;
86       node->part_variances = &vt->part_variances;
87       for (i = 0; i < 4; i++)
88         node->split[i] = &vt->split[i].part_variances.none;
89       break;
90     }
91     default: {
92       VP4x4 *vt = (VP4x4 *)data;
93       assert(bsize == BLOCK_4X4);
94       node->part_variances = &vt->part_variances;
95       for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
96       break;
97     }
98   }
99 }
100 
101 // Set variance values given sum square error, sum error, count.
fill_variance(uint32_t s2,int32_t s,int c,VPartVar * v)102 static AOM_INLINE void fill_variance(uint32_t s2, int32_t s, int c,
103                                      VPartVar *v) {
104   v->sum_square_error = s2;
105   v->sum_error = s;
106   v->log2_count = c;
107 }
108 
get_variance(VPartVar * v)109 static AOM_INLINE void get_variance(VPartVar *v) {
110   v->variance =
111       (int)(256 * (v->sum_square_error -
112                    (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
113                               v->log2_count)) >>
114             v->log2_count);
115 }
116 
sum_2_variances(const VPartVar * a,const VPartVar * b,VPartVar * r)117 static AOM_INLINE void sum_2_variances(const VPartVar *a, const VPartVar *b,
118                                        VPartVar *r) {
119   assert(a->log2_count == b->log2_count);
120   fill_variance(a->sum_square_error + b->sum_square_error,
121                 a->sum_error + b->sum_error, a->log2_count + 1, r);
122 }
123 
fill_variance_tree(void * data,BLOCK_SIZE bsize)124 static AOM_INLINE void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
125   variance_node node;
126   memset(&node, 0, sizeof(node));
127   tree_to_node(data, bsize, &node);
128   sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
129   sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
130   sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
131   sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
132   sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
133                   &node.part_variances->none);
134 }
135 
set_block_size(AV1_COMP * const cpi,int mi_row,int mi_col,BLOCK_SIZE bsize)136 static AOM_INLINE void set_block_size(AV1_COMP *const cpi, int mi_row,
137                                       int mi_col, BLOCK_SIZE bsize) {
138   if (cpi->common.mi_params.mi_cols > mi_col &&
139       cpi->common.mi_params.mi_rows > mi_row) {
140     CommonModeInfoParams *mi_params = &cpi->common.mi_params;
141     const int mi_grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col);
142     const int mi_alloc_idx = get_alloc_mi_idx(mi_params, mi_row, mi_col);
143     MB_MODE_INFO *mi = mi_params->mi_grid_base[mi_grid_idx] =
144         &mi_params->mi_alloc[mi_alloc_idx];
145     mi->bsize = bsize;
146   }
147 }
148 
set_vt_partitioning(AV1_COMP * cpi,MACROBLOCKD * const xd,const TileInfo * const tile,void * data,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t threshold,BLOCK_SIZE bsize_min,PART_EVAL_STATUS force_split)149 static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCKD *const xd,
150                                const TileInfo *const tile, void *data,
151                                BLOCK_SIZE bsize, int mi_row, int mi_col,
152                                int64_t threshold, BLOCK_SIZE bsize_min,
153                                PART_EVAL_STATUS force_split) {
154   AV1_COMMON *const cm = &cpi->common;
155   variance_node vt;
156   const int block_width = mi_size_wide[bsize];
157   const int block_height = mi_size_high[bsize];
158   int bs_width_check = block_width;
159   int bs_height_check = block_height;
160   int bs_width_vert_check = block_width >> 1;
161   int bs_height_horiz_check = block_height >> 1;
162   // On the right and bottom boundary we only need to check
163   // if half the bsize fits, because boundary is extended
164   // up to 64. So do this check only for sb_size = 64X64.
165   if (cm->seq_params->sb_size == BLOCK_64X64) {
166     if (tile->mi_col_end == cm->mi_params.mi_cols) {
167       bs_width_check = (block_width >> 1) + 1;
168       bs_width_vert_check = (block_width >> 2) + 1;
169     }
170     if (tile->mi_row_end == cm->mi_params.mi_rows) {
171       bs_height_check = (block_height >> 1) + 1;
172       bs_height_horiz_check = (block_height >> 2) + 1;
173     }
174   }
175 
176   assert(block_height == block_width);
177   tree_to_node(data, bsize, &vt);
178 
179   if (mi_col + bs_width_check <= tile->mi_col_end &&
180       mi_row + bs_height_check <= tile->mi_row_end &&
181       force_split == PART_EVAL_ONLY_NONE) {
182     set_block_size(cpi, mi_row, mi_col, bsize);
183     return 1;
184   }
185   if (force_split == PART_EVAL_ONLY_SPLIT) return 0;
186 
187   // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
188   // variance is below threshold, otherwise split will be selected.
189   // No check for vert/horiz split as too few samples for variance.
190   if (bsize == bsize_min) {
191     // Variance already computed to set the force_split.
192     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
193     if (mi_col + bs_width_check <= tile->mi_col_end &&
194         mi_row + bs_height_check <= tile->mi_row_end &&
195         vt.part_variances->none.variance < threshold) {
196       set_block_size(cpi, mi_row, mi_col, bsize);
197       return 1;
198     }
199     return 0;
200   } else if (bsize > bsize_min) {
201     // Variance already computed to set the force_split.
202     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
203     // For key frame: take split for bsize above 32X32 or very high variance.
204     if (frame_is_intra_only(cm) &&
205         (bsize > BLOCK_32X32 ||
206          vt.part_variances->none.variance > (threshold << 4))) {
207       return 0;
208     }
209     // If variance is low, take the bsize (no split).
210     if (mi_col + bs_width_check <= tile->mi_col_end &&
211         mi_row + bs_height_check <= tile->mi_row_end &&
212         vt.part_variances->none.variance < threshold) {
213       set_block_size(cpi, mi_row, mi_col, bsize);
214       return 1;
215     }
216     // Check vertical split.
217     if (mi_row + bs_height_check <= tile->mi_row_end &&
218         mi_col + bs_width_vert_check <= tile->mi_col_end) {
219       BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
220       get_variance(&vt.part_variances->vert[0]);
221       get_variance(&vt.part_variances->vert[1]);
222       if (vt.part_variances->vert[0].variance < threshold &&
223           vt.part_variances->vert[1].variance < threshold &&
224           get_plane_block_size(subsize, xd->plane[1].subsampling_x,
225                                xd->plane[1].subsampling_y) < BLOCK_INVALID) {
226         set_block_size(cpi, mi_row, mi_col, subsize);
227         set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
228         return 1;
229       }
230     }
231     // Check horizontal split.
232     if (mi_col + bs_width_check <= tile->mi_col_end &&
233         mi_row + bs_height_horiz_check <= tile->mi_row_end) {
234       BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
235       get_variance(&vt.part_variances->horz[0]);
236       get_variance(&vt.part_variances->horz[1]);
237       if (vt.part_variances->horz[0].variance < threshold &&
238           vt.part_variances->horz[1].variance < threshold &&
239           get_plane_block_size(subsize, xd->plane[1].subsampling_x,
240                                xd->plane[1].subsampling_y) < BLOCK_INVALID) {
241         set_block_size(cpi, mi_row, mi_col, subsize);
242         set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
243         return 1;
244       }
245     }
246     return 0;
247   }
248   return 0;
249 }
250 
all_blks_inside(int x16_idx,int y16_idx,int pixels_wide,int pixels_high)251 static AOM_INLINE int all_blks_inside(int x16_idx, int y16_idx, int pixels_wide,
252                                       int pixels_high) {
253   int all_inside = 1;
254   for (int k = 0; k < 4; k++) {
255     all_inside &= ((x16_idx + ((k & 1) << 3)) < pixels_wide);
256     all_inside &= ((y16_idx + ((k >> 1) << 3)) < pixels_high);
257   }
258   return all_inside;
259 }
260 
261 #if CONFIG_AV1_HIGHBITDEPTH
262 // TODO(yunqingwang): Perform average of four 8x8 blocks similar to lowbd
fill_variance_8x8avg_highbd(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,VP16x16 * vst,int pixels_wide,int pixels_high,int is_key_frame)263 static AOM_INLINE void fill_variance_8x8avg_highbd(
264     const uint8_t *s, int sp, const uint8_t *d, int dp, int x16_idx,
265     int y16_idx, VP16x16 *vst, int pixels_wide, int pixels_high,
266     int is_key_frame) {
267   for (int k = 0; k < 4; k++) {
268     const int x8_idx = x16_idx + ((k & 1) << 3);
269     const int y8_idx = y16_idx + ((k >> 1) << 3);
270     unsigned int sse = 0;
271     int sum = 0;
272     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
273       int s_avg;
274       int d_avg = 128;
275       s_avg = aom_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
276       if (!is_key_frame)
277         d_avg = aom_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
278 
279       sum = s_avg - d_avg;
280       sse = sum * sum;
281     }
282     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
283   }
284 }
285 #endif
286 
fill_variance_8x8avg_lowbd(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,VP16x16 * vst,int pixels_wide,int pixels_high,int is_key_frame)287 static AOM_INLINE void fill_variance_8x8avg_lowbd(const uint8_t *s, int sp,
288                                                   const uint8_t *d, int dp,
289                                                   int x16_idx, int y16_idx,
290                                                   VP16x16 *vst, int pixels_wide,
291                                                   int pixels_high,
292                                                   int is_key_frame) {
293   unsigned int sse[4] = { 0 };
294   int sum[4] = { 0 };
295   int d_avg[4] = { 128, 128, 128, 128 };
296   int s_avg[4];
297 
298   if (all_blks_inside(x16_idx, y16_idx, pixels_wide, pixels_high)) {
299     aom_avg_8x8_quad(s, sp, x16_idx, y16_idx, s_avg);
300     if (!is_key_frame) aom_avg_8x8_quad(d, dp, x16_idx, y16_idx, d_avg);
301     for (int k = 0; k < 4; k++) {
302       sum[k] = s_avg[k] - d_avg[k];
303       sse[k] = sum[k] * sum[k];
304     }
305   } else {
306     for (int k = 0; k < 4; k++) {
307       const int x8_idx = x16_idx + ((k & 1) << 3);
308       const int y8_idx = y16_idx + ((k >> 1) << 3);
309       if (x8_idx < pixels_wide && y8_idx < pixels_high) {
310         s_avg[k] = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
311         if (!is_key_frame) d_avg[k] = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
312         sum[k] = s_avg[k] - d_avg[k];
313         sse[k] = sum[k] * sum[k];
314       }
315     }
316   }
317 
318   for (int k = 0; k < 4; k++) {
319     fill_variance(sse[k], sum[k], 0, &vst->split[k].part_variances.none);
320   }
321 }
322 
323 // Obtain parameters required to calculate variance (such as sum, sse, etc,.)
324 // at 8x8 sub-block level for a given 16x16 block.
fill_variance_8x8avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,VP16x16 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame)325 static AOM_INLINE void fill_variance_8x8avg(const uint8_t *s, int sp,
326                                             const uint8_t *d, int dp,
327                                             int x16_idx, int y16_idx,
328                                             VP16x16 *vst, int highbd_flag,
329                                             int pixels_wide, int pixels_high,
330                                             int is_key_frame) {
331 #if CONFIG_AV1_HIGHBITDEPTH
332   if (highbd_flag) {
333     fill_variance_8x8avg_highbd(s, sp, d, dp, x16_idx, y16_idx, vst,
334                                 pixels_wide, pixels_high, is_key_frame);
335     return;
336   }
337 #else
338   (void)highbd_flag;
339 #endif  // CONFIG_AV1_HIGHBITDEPTH
340   fill_variance_8x8avg_lowbd(s, sp, d, dp, x16_idx, y16_idx, vst, pixels_wide,
341                              pixels_high, is_key_frame);
342 }
343 
compute_minmax_8x8(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,int highbd_flag,int pixels_wide,int pixels_high)344 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
345                               int dp, int x16_idx, int y16_idx,
346 #if CONFIG_AV1_HIGHBITDEPTH
347                               int highbd_flag,
348 #endif
349                               int pixels_wide, int pixels_high) {
350   int k;
351   int minmax_max = 0;
352   int minmax_min = 255;
353   // Loop over the 4 8x8 subblocks.
354   for (k = 0; k < 4; k++) {
355     int x8_idx = x16_idx + ((k & 1) << 3);
356     int y8_idx = y16_idx + ((k >> 1) << 3);
357     int min = 0;
358     int max = 0;
359     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
360 #if CONFIG_AV1_HIGHBITDEPTH
361       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
362         aom_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
363                               d + y8_idx * dp + x8_idx, dp, &min, &max);
364       } else {
365         aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
366                        dp, &min, &max);
367       }
368 #else
369       aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
370                      &min, &max);
371 #endif
372       if ((max - min) > minmax_max) minmax_max = (max - min);
373       if ((max - min) < minmax_min) minmax_min = (max - min);
374     }
375   }
376   return (minmax_max - minmax_min);
377 }
378 
fill_variance_4x4avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x8_idx,int y8_idx,VP8x8 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame,int border_offset_4x4)379 static AOM_INLINE void fill_variance_4x4avg(const uint8_t *s, int sp,
380                                             const uint8_t *d, int dp,
381                                             int x8_idx, int y8_idx, VP8x8 *vst,
382 #if CONFIG_AV1_HIGHBITDEPTH
383                                             int highbd_flag,
384 #endif
385                                             int pixels_wide, int pixels_high,
386                                             int is_key_frame,
387                                             int border_offset_4x4) {
388   int k;
389   for (k = 0; k < 4; k++) {
390     int x4_idx = x8_idx + ((k & 1) << 2);
391     int y4_idx = y8_idx + ((k >> 1) << 2);
392     unsigned int sse = 0;
393     int sum = 0;
394     if (x4_idx < pixels_wide - border_offset_4x4 &&
395         y4_idx < pixels_high - border_offset_4x4) {
396       int s_avg;
397       int d_avg = 128;
398 #if CONFIG_AV1_HIGHBITDEPTH
399       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
400         s_avg = aom_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
401         if (!is_key_frame)
402           d_avg = aom_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
403       } else {
404         s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
405         if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
406       }
407 #else
408       s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
409       if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
410 #endif
411 
412       sum = s_avg - d_avg;
413       sse = sum * sum;
414     }
415     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
416   }
417 }
418 
419 // TODO(kyslov) Bring back threshold adjustment based on content state
scale_part_thresh_content(int64_t threshold_base,int speed,int width,int height,int non_reference_frame)420 static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
421                                          int width, int height,
422                                          int non_reference_frame) {
423   (void)width;
424   (void)height;
425   int64_t threshold = threshold_base;
426   if (non_reference_frame) threshold = (3 * threshold) >> 1;
427   if (speed >= 8) {
428     return (5 * threshold) >> 2;
429   }
430   return threshold;
431 }
432 
tune_thresh_based_on_qindex_window(int qindex,int th,int win,int fac,int64_t thresholds[])433 static AOM_INLINE void tune_thresh_based_on_qindex_window(
434     int qindex, int th, int win, int fac, int64_t thresholds[]) {
435   double weight;
436 
437   if (qindex < th - win)
438     weight = 1.0;
439   else if (qindex > th + win)
440     weight = 0.0;
441   else
442     weight = 1.0 - (qindex - th + win) / (2 * win);
443   thresholds[1] =
444       (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
445   thresholds[2] =
446       (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
447   thresholds[3] =
448       (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]);
449 }
450 
set_vbp_thresholds(AV1_COMP * cpi,int64_t thresholds[],int q,int content_lowsumdiff,int source_sad_nonrd,int source_sad_rd,int segment_id,uint64_t blk_sad,int lighting_change)451 static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
452                                           int q, int content_lowsumdiff,
453                                           int source_sad_nonrd,
454                                           int source_sad_rd, int segment_id,
455                                           uint64_t blk_sad,
456                                           int lighting_change) {
457   AV1_COMMON *const cm = &cpi->common;
458   const int is_key_frame = frame_is_intra_only(cm);
459   const int threshold_multiplier = is_key_frame ? 120 : 1;
460   const int ac_q = av1_ac_quant_QTX(q, 0, cm->seq_params->bit_depth);
461   int64_t threshold_base = (int64_t)(threshold_multiplier * ac_q);
462   const int current_qindex = cm->quant_params.base_qindex;
463   const int threshold_left_shift = cpi->sf.rt_sf.var_part_split_threshold_shift;
464 
465   if (is_key_frame) {
466     if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
467       const int shift_steps =
468           threshold_left_shift - (cpi->oxcf.mode == ALLINTRA ? 7 : 8);
469       assert(shift_steps >= 0);
470       threshold_base <<= shift_steps;
471     }
472     thresholds[0] = threshold_base;
473     thresholds[1] = threshold_base;
474     if (cm->width * cm->height < 1280 * 720) {
475       thresholds[2] = threshold_base / 3;
476       thresholds[3] = threshold_base >> 1;
477     } else {
478       int shift_val = 2;
479       if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
480         shift_val = 0;
481       }
482 
483       thresholds[2] = threshold_base >> shift_val;
484       thresholds[3] = threshold_base >> shift_val;
485     }
486     thresholds[4] = threshold_base << 2;
487     return;
488   }
489 
490   // Increase partition thresholds for noisy content. Apply it only for
491   // superblocks where sumdiff is low, as we assume the sumdiff of superblock
492   // whose only change is due to noise will be low (i.e, noise will average
493   // out over large block).
494   if (cpi->noise_estimate.enabled && content_lowsumdiff &&
495       (cm->width * cm->height > 640 * 480) &&
496       cm->current_frame.frame_number > 60) {
497     NOISE_LEVEL noise_level =
498         av1_noise_estimate_extract_level(&cpi->noise_estimate);
499     if (noise_level == kHigh)
500       threshold_base = (5 * threshold_base) >> 1;
501     else if (noise_level == kMedium &&
502              !cpi->sf.rt_sf.prefer_large_partition_blocks)
503       threshold_base = (5 * threshold_base) >> 2;
504   }
505   // TODO(kyslov) Enable var based partition adjusment on temporal denoising
506 #if 0  // CONFIG_AV1_TEMPORAL_DENOISING
507   if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
508       cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
509       threshold_base =
510           av1_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
511                                 content_state, cpi->svc.temporal_layer_id);
512   else
513     threshold_base =
514         scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
515                                   cm->height, cpi->ppi->rtc_ref.non_reference_frame);
516 #else
517   // Increase base variance threshold based on content_state/sum_diff level.
518   threshold_base = scale_part_thresh_content(
519       threshold_base, cpi->oxcf.speed, cm->width, cm->height,
520       cpi->ppi->rtc_ref.non_reference_frame);
521 #endif
522   thresholds[0] = threshold_base >> 1;
523   thresholds[1] = threshold_base;
524   thresholds[3] = threshold_base << threshold_left_shift;
525   if (cm->width >= 1280 && cm->height >= 720)
526     thresholds[3] = thresholds[3] << 1;
527   if (cm->width * cm->height <= 352 * 288) {
528     const int qindex_thr[5][2] = {
529       { 200, 220 }, { 140, 170 }, { 120, 150 }, { 200, 210 }, { 170, 220 },
530     };
531     int th_idx = 0;
532     if (cpi->sf.rt_sf.var_part_based_on_qidx >= 1)
533       th_idx =
534           (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0;
535     if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3)
536       th_idx = cpi->sf.rt_sf.var_part_based_on_qidx;
537     const int qindex_low_thr = qindex_thr[th_idx][0];
538     const int qindex_high_thr = qindex_thr[th_idx][1];
539     if (current_qindex >= qindex_high_thr) {
540       threshold_base = (5 * threshold_base) >> 1;
541       thresholds[1] = threshold_base >> 3;
542       thresholds[2] = threshold_base << 2;
543       thresholds[3] = threshold_base << 5;
544     } else if (current_qindex < qindex_low_thr) {
545       thresholds[1] = threshold_base >> 3;
546       thresholds[2] = threshold_base >> 1;
547       thresholds[3] = threshold_base << 3;
548     } else {
549       int64_t qi_diff_low = current_qindex - qindex_low_thr;
550       int64_t qi_diff_high = qindex_high_thr - current_qindex;
551       int64_t threshold_diff = qindex_high_thr - qindex_low_thr;
552       int64_t threshold_base_high = (5 * threshold_base) >> 1;
553 
554       threshold_diff = threshold_diff > 0 ? threshold_diff : 1;
555       threshold_base =
556           (qi_diff_low * threshold_base_high + qi_diff_high * threshold_base) /
557           threshold_diff;
558       thresholds[1] = threshold_base >> 3;
559       thresholds[2] = ((qi_diff_low * threshold_base) +
560                        qi_diff_high * (threshold_base >> 1)) /
561                       threshold_diff;
562       thresholds[3] = ((qi_diff_low * (threshold_base << 5)) +
563                        qi_diff_high * (threshold_base << 3)) /
564                       threshold_diff;
565     }
566   } else if (cm->width < 1280 && cm->height < 720) {
567     thresholds[2] = (5 * threshold_base) >> 2;
568   } else if (cm->width < 1920 && cm->height < 1080) {
569     thresholds[2] = threshold_base << 1;
570   } else if (cm->width < 2560 && cm->height < 1440) {
571     thresholds[2] = (5 * threshold_base) >> 1;
572   } else {
573     thresholds[2] = (7 * threshold_base) >> 1;
574   }
575   // Tune thresholds less or more aggressively to prefer larger partitions
576   if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) {
577     double weight;
578     const int win = 20;
579     if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
580       weight = 1.0;
581     else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win)
582       weight = 0.0;
583     else
584       weight =
585           1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win);
586     if (cm->width * cm->height > 640 * 480) {
587       for (int i = 0; i < 4; i++) {
588         thresholds[i] <<= 1;
589       }
590     }
591     if (cm->width * cm->height <= 352 * 288) {
592       thresholds[3] = INT64_MAX;
593       if (segment_id == 0) {
594         thresholds[1] <<= 2;
595         thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4;
596       } else {
597         thresholds[1] <<= 1;
598         thresholds[2] <<= 3;
599       }
600       // Allow for split to 8x8 for superblocks where part of it has
601       // moving boundary. So allow for sb with source_sad above threshold,
602       // and avoid very large source_sad or high source content, to avoid
603       // too many 8x8 within superblock.
604       if (segment_id == 0 && cpi->rc.avg_source_sad < 25000 &&
605           blk_sad > 25000 && blk_sad < 50000 && !lighting_change) {
606         thresholds[2] = (3 * thresholds[2]) >> 2;
607         thresholds[3] = thresholds[2] << 3;
608       }
609       // Condition the increase of partition thresholds on the segment
610       // and the content. Avoid the increase for superblocks which have
611       // high source sad, unless the whole frame has very high motion
612       // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
613       // have high source sad).
614     } else if (cm->width * cm->height > 640 * 480 && segment_id == 0 &&
615                (source_sad_nonrd != kHighSad ||
616                 cpi->rc.avg_source_sad > 50000)) {
617       thresholds[0] = (3 * thresholds[0]) >> 1;
618       thresholds[3] = INT64_MAX;
619       if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
620         thresholds[1] =
621             (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
622         thresholds[2] =
623             (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
624       }
625     } else if (current_qindex > QINDEX_LARGE_BLOCK_THR && segment_id == 0 &&
626                (source_sad_nonrd != kHighSad ||
627                 cpi->rc.avg_source_sad > 50000)) {
628       thresholds[1] =
629           (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
630       thresholds[2] =
631           (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
632       thresholds[3] = INT64_MAX;
633     }
634   } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) {
635     thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0;
636     thresholds[2] =
637         (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2];
638   } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) {
639     const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1;
640     tune_thresh_based_on_qindex_window(current_qindex, QINDEX_LARGE_BLOCK_THR,
641                                        45, fac, thresholds);
642   }
643   if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128))
644     thresholds[3] = INT64_MAX;
645 }
646 
647 // Set temporal variance low flag for superblock 64x64.
648 // Only first 25 in the array are used in this case.
set_low_temp_var_flag_64x64(CommonModeInfoParams * mi_params,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP64x64 * vt,const int64_t thresholds[],int mi_col,int mi_row)649 static AOM_INLINE void set_low_temp_var_flag_64x64(
650     CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
651     MACROBLOCKD *xd, VP64x64 *vt, const int64_t thresholds[], int mi_col,
652     int mi_row) {
653   if (xd->mi[0]->bsize == BLOCK_64X64) {
654     if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
655       part_info->variance_low[0] = 1;
656   } else if (xd->mi[0]->bsize == BLOCK_64X32) {
657     for (int i = 0; i < 2; i++) {
658       if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
659         part_info->variance_low[i + 1] = 1;
660     }
661   } else if (xd->mi[0]->bsize == BLOCK_32X64) {
662     for (int i = 0; i < 2; i++) {
663       if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
664         part_info->variance_low[i + 3] = 1;
665     }
666   } else {
667     static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
668     for (int i = 0; i < 4; i++) {
669       const int idx_str =
670           mi_params->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
671       MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str;
672 
673       if (mi_params->mi_cols <= mi_col + idx[i][1] ||
674           mi_params->mi_rows <= mi_row + idx[i][0])
675         continue;
676 
677       if (*this_mi == NULL) continue;
678 
679       if ((*this_mi)->bsize == BLOCK_32X32) {
680         int64_t threshold_32x32 = (5 * thresholds[1]) >> 3;
681         if (vt->split[i].part_variances.none.variance < threshold_32x32)
682           part_info->variance_low[i + 5] = 1;
683       } else {
684         // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
685         // inside.
686         if ((*this_mi)->bsize == BLOCK_16X16 ||
687             (*this_mi)->bsize == BLOCK_32X16 ||
688             (*this_mi)->bsize == BLOCK_16X32) {
689           for (int j = 0; j < 4; j++) {
690             if (vt->split[i].split[j].part_variances.none.variance <
691                 (thresholds[2] >> 8))
692               part_info->variance_low[(i << 2) + j + 9] = 1;
693           }
694         }
695       }
696     }
697   }
698 }
699 
set_low_temp_var_flag_128x128(CommonModeInfoParams * mi_params,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP128x128 * vt,const int64_t thresholds[],int mi_col,int mi_row)700 static AOM_INLINE void set_low_temp_var_flag_128x128(
701     CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
702     MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col,
703     int mi_row) {
704   if (xd->mi[0]->bsize == BLOCK_128X128) {
705     if (vt->part_variances.none.variance < (thresholds[0] >> 1))
706       part_info->variance_low[0] = 1;
707   } else if (xd->mi[0]->bsize == BLOCK_128X64) {
708     for (int i = 0; i < 2; i++) {
709       if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
710         part_info->variance_low[i + 1] = 1;
711     }
712   } else if (xd->mi[0]->bsize == BLOCK_64X128) {
713     for (int i = 0; i < 2; i++) {
714       if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
715         part_info->variance_low[i + 3] = 1;
716     }
717   } else {
718     static const int idx64[4][2] = {
719       { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 }
720     };
721     static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
722     for (int i = 0; i < 4; i++) {
723       const int idx_str =
724           mi_params->mi_stride * (mi_row + idx64[i][0]) + mi_col + idx64[i][1];
725       MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str;
726       if (*mi_64 == NULL) continue;
727       if (mi_params->mi_cols <= mi_col + idx64[i][1] ||
728           mi_params->mi_rows <= mi_row + idx64[i][0])
729         continue;
730       const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3;
731       if ((*mi_64)->bsize == BLOCK_64X64) {
732         if (vt->split[i].part_variances.none.variance < threshold_64x64)
733           part_info->variance_low[5 + i] = 1;
734       } else if ((*mi_64)->bsize == BLOCK_64X32) {
735         for (int j = 0; j < 2; j++)
736           if (vt->split[i].part_variances.horz[j].variance <
737               (threshold_64x64 >> 1))
738             part_info->variance_low[9 + (i << 1) + j] = 1;
739       } else if ((*mi_64)->bsize == BLOCK_32X64) {
740         for (int j = 0; j < 2; j++)
741           if (vt->split[i].part_variances.vert[j].variance <
742               (threshold_64x64 >> 1))
743             part_info->variance_low[17 + (i << 1) + j] = 1;
744       } else {
745         for (int k = 0; k < 4; k++) {
746           const int idx_str1 = mi_params->mi_stride * idx32[k][0] + idx32[k][1];
747           MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1;
748           if (*mi_32 == NULL) continue;
749 
750           if (mi_params->mi_cols <= mi_col + idx64[i][1] + idx32[k][1] ||
751               mi_params->mi_rows <= mi_row + idx64[i][0] + idx32[k][0])
752             continue;
753           const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3;
754           if ((*mi_32)->bsize == BLOCK_32X32) {
755             if (vt->split[i].split[k].part_variances.none.variance <
756                 threshold_32x32)
757               part_info->variance_low[25 + (i << 2) + k] = 1;
758           } else {
759             // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
760             // inside.
761             if ((*mi_32)->bsize == BLOCK_16X16 ||
762                 (*mi_32)->bsize == BLOCK_32X16 ||
763                 (*mi_32)->bsize == BLOCK_16X32) {
764               for (int j = 0; j < 4; j++) {
765                 if (vt->split[i]
766                         .split[k]
767                         .split[j]
768                         .part_variances.none.variance < (thresholds[3] >> 8))
769                   part_info->variance_low[41 + (i << 4) + (k << 2) + j] = 1;
770               }
771             }
772           }
773         }
774       }
775     }
776   }
777 }
778 
set_low_temp_var_flag(AV1_COMP * cpi,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP128x128 * vt,int64_t thresholds[],MV_REFERENCE_FRAME ref_frame_partition,int mi_col,int mi_row)779 static AOM_INLINE void set_low_temp_var_flag(
780     AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd,
781     VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition,
782     int mi_col, int mi_row) {
783   AV1_COMMON *const cm = &cpi->common;
784   // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected.
785   // If the temporal variance is small set the flag
786   // variance_low for the block. The variance threshold can be adjusted, the
787   // higher the more aggressive.
788   if (ref_frame_partition == LAST_FRAME) {
789     const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
790     if (is_small_sb)
791       set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd,
792                                   &(vt->split[0]), thresholds, mi_col, mi_row);
793     else
794       set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt,
795                                     thresholds, mi_col, mi_row);
796   }
797 }
798 
799 static const int pos_shift_16x16[4][4] = {
800   { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
801 };
802 
av1_get_force_skip_low_temp_var_small_sb(const uint8_t * variance_low,int mi_row,int mi_col,BLOCK_SIZE bsize)803 int av1_get_force_skip_low_temp_var_small_sb(const uint8_t *variance_low,
804                                              int mi_row, int mi_col,
805                                              BLOCK_SIZE bsize) {
806   // Relative indices of MB inside the superblock.
807   const int mi_x = mi_row & 0xF;
808   const int mi_y = mi_col & 0xF;
809   // Relative indices of 16x16 block inside the superblock.
810   const int i = mi_x >> 2;
811   const int j = mi_y >> 2;
812   int force_skip_low_temp_var = 0;
813   // Set force_skip_low_temp_var based on the block size and block offset.
814   switch (bsize) {
815     case BLOCK_64X64: force_skip_low_temp_var = variance_low[0]; break;
816     case BLOCK_64X32:
817       if (!mi_y && !mi_x) {
818         force_skip_low_temp_var = variance_low[1];
819       } else if (!mi_y && mi_x) {
820         force_skip_low_temp_var = variance_low[2];
821       }
822       break;
823     case BLOCK_32X64:
824       if (!mi_y && !mi_x) {
825         force_skip_low_temp_var = variance_low[3];
826       } else if (mi_y && !mi_x) {
827         force_skip_low_temp_var = variance_low[4];
828       }
829       break;
830     case BLOCK_32X32:
831       if (!mi_y && !mi_x) {
832         force_skip_low_temp_var = variance_low[5];
833       } else if (mi_y && !mi_x) {
834         force_skip_low_temp_var = variance_low[6];
835       } else if (!mi_y && mi_x) {
836         force_skip_low_temp_var = variance_low[7];
837       } else if (mi_y && mi_x) {
838         force_skip_low_temp_var = variance_low[8];
839       }
840       break;
841     case BLOCK_32X16:
842     case BLOCK_16X32:
843     case BLOCK_16X16:
844       force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
845       break;
846     default: break;
847   }
848 
849   return force_skip_low_temp_var;
850 }
851 
av1_get_force_skip_low_temp_var(const uint8_t * variance_low,int mi_row,int mi_col,BLOCK_SIZE bsize)852 int av1_get_force_skip_low_temp_var(const uint8_t *variance_low, int mi_row,
853                                     int mi_col, BLOCK_SIZE bsize) {
854   int force_skip_low_temp_var = 0;
855   int x, y;
856   x = (mi_col & 0x1F) >> 4;
857   // y = (mi_row & 0x1F) >> 4;
858   // const int idx64 = (y << 1) + x;
859   y = (mi_row & 0x17) >> 3;
860   const int idx64 = y + x;
861 
862   x = (mi_col & 0xF) >> 3;
863   // y = (mi_row & 0xF) >> 3;
864   // const int idx32 = (y << 1) + x;
865   y = (mi_row & 0xB) >> 2;
866   const int idx32 = y + x;
867 
868   x = (mi_col & 0x7) >> 2;
869   // y = (mi_row & 0x7) >> 2;
870   // const int idx16 = (y << 1) + x;
871   y = (mi_row & 0x5) >> 1;
872   const int idx16 = y + x;
873   // Set force_skip_low_temp_var based on the block size and block offset.
874   switch (bsize) {
875     case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break;
876     case BLOCK_128X64:
877       assert((mi_col & 0x1F) == 0);
878       force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)];
879       break;
880     case BLOCK_64X128:
881       assert((mi_row & 0x1F) == 0);
882       force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)];
883       break;
884     case BLOCK_64X64:
885       // Location of this 64x64 block inside the 128x128 superblock
886       force_skip_low_temp_var = variance_low[5 + idx64];
887       break;
888     case BLOCK_64X32:
889       x = (mi_col & 0x1F) >> 4;
890       y = (mi_row & 0x1F) >> 3;
891       /*
892       .---------------.---------------.
893       | x=0,y=0,idx=0 | x=0,y=0,idx=2 |
894       :---------------+---------------:
895       | x=0,y=1,idx=1 | x=1,y=1,idx=3 |
896       :---------------+---------------:
897       | x=0,y=2,idx=4 | x=1,y=2,idx=6 |
898       :---------------+---------------:
899       | x=0,y=3,idx=5 | x=1,y=3,idx=7 |
900       '---------------'---------------'
901       */
902       const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2);
903       force_skip_low_temp_var = variance_low[9 + idx64x32];
904       break;
905     case BLOCK_32X64:
906       x = (mi_col & 0x1F) >> 3;
907       y = (mi_row & 0x1F) >> 4;
908       const int idx32x64 = (y << 2) + x;
909       force_skip_low_temp_var = variance_low[17 + idx32x64];
910       break;
911     case BLOCK_32X32:
912       force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32];
913       break;
914     case BLOCK_32X16:
915     case BLOCK_16X32:
916     case BLOCK_16X16:
917       force_skip_low_temp_var =
918           variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16];
919       break;
920     default: break;
921   }
922   return force_skip_low_temp_var;
923 }
924 
av1_set_variance_partition_thresholds(AV1_COMP * cpi,int q,int content_lowsumdiff)925 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
926                                            int content_lowsumdiff) {
927   SPEED_FEATURES *const sf = &cpi->sf;
928   if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) {
929     return;
930   } else {
931     set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, q, content_lowsumdiff, 0,
932                        0, 0, 0, 0);
933     // The threshold below is not changed locally.
934     cpi->vbp_info.threshold_minmax = 15 + (q >> 3);
935   }
936 }
937 
chroma_check(AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int y_sad,unsigned int y_sad_g,int is_key_frame,int zero_motion,unsigned int * uv_sad)938 static AOM_INLINE void chroma_check(AV1_COMP *cpi, MACROBLOCK *x,
939                                     BLOCK_SIZE bsize, unsigned int y_sad,
940                                     unsigned int y_sad_g, int is_key_frame,
941                                     int zero_motion, unsigned int *uv_sad) {
942   int i;
943   MACROBLOCKD *xd = &x->e_mbd;
944   int shift = 3;
945   if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return;
946 
947   if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
948       cpi->rc.high_source_sad)
949     shift = 5;
950 
951   MB_MODE_INFO *mi = xd->mi[0];
952   const AV1_COMMON *const cm = &cpi->common;
953   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
954   const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
955   const struct scale_factors *const sf =
956       get_ref_scale_factors_const(cm, LAST_FRAME);
957   struct buf_2d dst;
958   unsigned int uv_sad_g = 0;
959 
960   for (i = 1; i <= 2; ++i) {
961     struct macroblock_plane *p = &x->plane[i];
962     struct macroblockd_plane *pd = &xd->plane[i];
963     const BLOCK_SIZE bs =
964         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
965 
966     if (bs != BLOCK_INVALID) {
967       // For last:
968       if (zero_motion) {
969         if (mi->ref_frame[0] == LAST_FRAME) {
970           uv_sad[i - 1] = cpi->ppi->fn_ptr[bs].sdf(
971               p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride);
972         } else {
973           uint8_t *src = (i == 1) ? yv12->u_buffer : yv12->v_buffer;
974           setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12->uv_crop_width,
975                            yv12->uv_crop_height, yv12->uv_stride, xd->mi_row,
976                            xd->mi_col, sf, xd->plane[i].subsampling_x,
977                            xd->plane[i].subsampling_y);
978 
979           uv_sad[i - 1] = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
980                                                    dst.buf, dst.stride);
981         }
982       } else {
983         uv_sad[i - 1] = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
984                                                  pd->dst.buf, pd->dst.stride);
985       }
986 
987       // For golden:
988       if (y_sad_g != UINT_MAX) {
989         uint8_t *src = (i == 1) ? yv12_g->u_buffer : yv12_g->v_buffer;
990         setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width,
991                          yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row,
992                          xd->mi_col, sf, xd->plane[i].subsampling_x,
993                          xd->plane[i].subsampling_y);
994         uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf,
995                                             dst.stride);
996       }
997     }
998 
999     if (uv_sad[i - 1] > (y_sad >> 1))
1000       x->color_sensitivity_sb[i - 1] = 1;
1001     else if (uv_sad[i - 1] < (y_sad >> shift))
1002       x->color_sensitivity_sb[i - 1] = 0;
1003     // Borderline case: to be refined at coding block level in nonrd_pickmode,
1004     // for coding block size < sb_size.
1005     else
1006       x->color_sensitivity_sb[i - 1] = 2;
1007 
1008     x->color_sensitivity_sb_g[i - 1] = uv_sad_g > y_sad_g / 6;
1009   }
1010 }
1011 
fill_variance_tree_leaves(AV1_COMP * cpi,MACROBLOCK * x,VP128x128 * vt,VP16x16 * vt2,PART_EVAL_STATUS * force_split,int avg_16x16[][4],int maxvar_16x16[][4],int minvar_16x16[][4],int * variance4x4downsample,int64_t * thresholds,uint8_t * src,int src_stride,const uint8_t * dst,int dst_stride)1012 static void fill_variance_tree_leaves(
1013     AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, VP16x16 *vt2,
1014     PART_EVAL_STATUS *force_split, int avg_16x16[][4], int maxvar_16x16[][4],
1015     int minvar_16x16[][4], int *variance4x4downsample, int64_t *thresholds,
1016     uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride) {
1017   AV1_COMMON *cm = &cpi->common;
1018   MACROBLOCKD *xd = &x->e_mbd;
1019   const int is_key_frame = frame_is_intra_only(cm);
1020   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
1021   const int num_64x64_blocks = is_small_sb ? 1 : 4;
1022   // TODO(kyslov) Bring back compute_minmax_variance with content type detection
1023   const int compute_minmax_variance = 0;
1024   const int segment_id = xd->mi[0]->segment_id;
1025   int pixels_wide = 128, pixels_high = 128;
1026   int border_offset_4x4 = 0;
1027   int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf;
1028   if (is_small_sb) {
1029     pixels_wide = 64;
1030     pixels_high = 64;
1031   }
1032   if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
1033   if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
1034 #if CONFIG_AV1_TEMPORAL_DENOISING
1035   temporal_denoising |= cpi->oxcf.noise_sensitivity;
1036 #endif
1037   // For temporal filtering or temporal denoiser enabled: since the source
1038   // is modified we need to avoid 4x4 avg along superblock boundary, since
1039   // simd code will load 8 pixels for 4x4 avg and so can access source
1040   // data outside superblock (while its being modified by temporal filter).
1041   // Temporal filtering is never done on key frames.
1042   if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4;
1043   for (int m = 0; m < num_64x64_blocks; m++) {
1044     const int x64_idx = ((m & 1) << 6);
1045     const int y64_idx = ((m >> 1) << 6);
1046     const int m2 = m << 2;
1047     force_split[m + 1] = PART_EVAL_ALL;
1048 
1049     for (int i = 0; i < 4; i++) {
1050       const int x32_idx = x64_idx + ((i & 1) << 5);
1051       const int y32_idx = y64_idx + ((i >> 1) << 5);
1052       const int i2 = (m2 + i) << 2;
1053       force_split[5 + m2 + i] = PART_EVAL_ALL;
1054       avg_16x16[m][i] = 0;
1055       maxvar_16x16[m][i] = 0;
1056       minvar_16x16[m][i] = INT_MAX;
1057       for (int j = 0; j < 4; j++) {
1058         const int x16_idx = x32_idx + ((j & 1) << 4);
1059         const int y16_idx = y32_idx + ((j >> 1) << 4);
1060         const int split_index = 21 + i2 + j;
1061         VP16x16 *vst = &vt->split[m].split[i].split[j];
1062         force_split[split_index] = PART_EVAL_ALL;
1063         variance4x4downsample[i2 + j] = 0;
1064         if (!is_key_frame) {
1065           fill_variance_8x8avg(src, src_stride, dst, dst_stride, x16_idx,
1066                                y16_idx, vst, is_cur_buf_hbd(xd), pixels_wide,
1067                                pixels_high, is_key_frame);
1068 
1069           fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16);
1070           get_variance(&vt->split[m].split[i].split[j].part_variances.none);
1071           avg_16x16[m][i] +=
1072               vt->split[m].split[i].split[j].part_variances.none.variance;
1073           if (vt->split[m].split[i].split[j].part_variances.none.variance <
1074               minvar_16x16[m][i])
1075             minvar_16x16[m][i] =
1076                 vt->split[m].split[i].split[j].part_variances.none.variance;
1077           if (vt->split[m].split[i].split[j].part_variances.none.variance >
1078               maxvar_16x16[m][i])
1079             maxvar_16x16[m][i] =
1080                 vt->split[m].split[i].split[j].part_variances.none.variance;
1081           if (vt->split[m].split[i].split[j].part_variances.none.variance >
1082               thresholds[3]) {
1083             // 16X16 variance is above threshold for split, so force split to
1084             // 8x8 for this 16x16 block (this also forces splits for upper
1085             // levels).
1086             force_split[split_index] = PART_EVAL_ONLY_SPLIT;
1087             force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
1088             force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
1089             force_split[0] = PART_EVAL_ONLY_SPLIT;
1090           } else if (!cyclic_refresh_segment_id_boosted(segment_id) &&
1091                      compute_minmax_variance &&
1092                      vt->split[m]
1093                              .split[i]
1094                              .split[j]
1095                              .part_variances.none.variance > thresholds[2]) {
1096             // We have some nominal amount of 16x16 variance (based on average),
1097             // compute the minmax over the 8x8 sub-blocks, and if above
1098             // threshold, force split to 8x8 block for this 16x16 block.
1099             int minmax = compute_minmax_8x8(src, src_stride, dst, dst_stride,
1100                                             x16_idx, y16_idx,
1101 #if CONFIG_AV1_HIGHBITDEPTH
1102                                             xd->cur_buf->flags,
1103 #endif
1104                                             pixels_wide, pixels_high);
1105             int thresh_minmax = (int)cpi->vbp_info.threshold_minmax;
1106             if (minmax > thresh_minmax) {
1107               force_split[split_index] = PART_EVAL_ONLY_SPLIT;
1108               force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
1109               force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
1110               force_split[0] = PART_EVAL_ONLY_SPLIT;
1111             }
1112           }
1113         }
1114         if (is_key_frame) {
1115           force_split[split_index] = PART_EVAL_ALL;
1116           // Go down to 4x4 down-sampling for variance.
1117           variance4x4downsample[i2 + j] = 1;
1118           for (int k = 0; k < 4; k++) {
1119             int x8_idx = x16_idx + ((k & 1) << 3);
1120             int y8_idx = y16_idx + ((k >> 1) << 3);
1121             VP8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
1122             fill_variance_4x4avg(
1123                 src, src_stride, dst, dst_stride, x8_idx, y8_idx, vst2,
1124 #if CONFIG_AV1_HIGHBITDEPTH
1125                 xd->cur_buf->flags,
1126 #endif
1127                 pixels_wide, pixels_high, is_key_frame, border_offset_4x4);
1128           }
1129         }
1130       }
1131     }
1132   }
1133 }
1134 
setup_planes(AV1_COMP * cpi,MACROBLOCK * x,unsigned int * y_sad,unsigned int * y_sad_g,unsigned int * y_sad_alt,unsigned int * y_sad_last,MV_REFERENCE_FRAME * ref_frame_partition,int mi_row,int mi_col)1135 static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
1136                          unsigned int *y_sad_g, unsigned int *y_sad_alt,
1137                          unsigned int *y_sad_last,
1138                          MV_REFERENCE_FRAME *ref_frame_partition, int mi_row,
1139                          int mi_col) {
1140   AV1_COMMON *const cm = &cpi->common;
1141   MACROBLOCKD *xd = &x->e_mbd;
1142   const int num_planes = av1_num_planes(cm);
1143   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
1144   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1145   MB_MODE_INFO *mi = xd->mi[0];
1146   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
1147   assert(yv12 != NULL);
1148   const YV12_BUFFER_CONFIG *yv12_g = NULL;
1149   const YV12_BUFFER_CONFIG *yv12_alt = NULL;
1150   // Check if LAST is a reference. For spatial layers always use it as
1151   // reference scaling (golden or altref being lower resolution) is not
1152   // handled/check here.
1153   int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) ||
1154                      cpi->svc.number_spatial_layers > 1;
1155   int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG;
1156   int use_alt_ref = cpi->ppi->rtc_ref.set_ref_frame_config ||
1157                     cpi->sf.rt_sf.use_nonrd_altref_frame;
1158 
1159   // For 1 spatial layer: GOLDEN is another temporal reference.
1160   // Check if it should be used as reference for partitioning.
1161   if (cpi->svc.number_spatial_layers == 1 && use_golden_ref &&
1162       (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
1163     yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
1164     if (yv12_g && yv12_g != yv12) {
1165       av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1166                            get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
1167       *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf(
1168           x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
1169           xd->plane[0].pre[0].stride);
1170     }
1171   }
1172 
1173   // For 1 spatial layer: ALTREF is another temporal reference.
1174   // Check if it should be used as reference for partitioning.
1175   if (cpi->svc.number_spatial_layers == 1 && use_alt_ref &&
1176       (cpi->ref_frame_flags & AOM_ALT_FLAG) &&
1177       (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
1178     yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
1179     if (yv12_alt && yv12_alt != yv12) {
1180       av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
1181                            get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
1182       *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf(
1183           x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
1184           xd->plane[0].pre[0].stride);
1185     }
1186   }
1187 
1188   if (use_last_ref) {
1189     av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
1190                          get_ref_scale_factors(cm, LAST_FRAME), num_planes);
1191     mi->ref_frame[0] = LAST_FRAME;
1192     mi->ref_frame[1] = NONE_FRAME;
1193     mi->bsize = cm->seq_params->sb_size;
1194     mi->mv[0].as_int = 0;
1195     mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
1196     if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition) {
1197       if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
1198         const MV dummy_mv = { 0, 0 };
1199         *y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params->sb_size,
1200                                                mi_row, mi_col, &dummy_mv);
1201       }
1202     }
1203     if (*y_sad == UINT_MAX) {
1204       *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1205           x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
1206           xd->plane[0].pre[0].stride);
1207     }
1208     *y_sad_last = *y_sad;
1209   }
1210 
1211   // Pick the ref frame for partitioning, use golden or altref frame only if
1212   // its lower sad, bias to LAST with factor 0.9.
1213   if (*y_sad_g < 0.9 * *y_sad && *y_sad_g < *y_sad_alt) {
1214     av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1215                          get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
1216     mi->ref_frame[0] = GOLDEN_FRAME;
1217     mi->mv[0].as_int = 0;
1218     *y_sad = *y_sad_g;
1219     *ref_frame_partition = GOLDEN_FRAME;
1220     x->nonrd_prune_ref_frame_search = 0;
1221   } else if (*y_sad_alt < 0.9 * *y_sad && *y_sad_alt < *y_sad_g) {
1222     av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
1223                          get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
1224     mi->ref_frame[0] = ALTREF_FRAME;
1225     mi->mv[0].as_int = 0;
1226     *y_sad = *y_sad_alt;
1227     *ref_frame_partition = ALTREF_FRAME;
1228     x->nonrd_prune_ref_frame_search = 0;
1229   } else {
1230     *ref_frame_partition = LAST_FRAME;
1231     x->nonrd_prune_ref_frame_search =
1232         cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1233   }
1234 
1235   // Only calculate the predictor for non-zero MV.
1236   if (mi->mv[0].as_int != 0) {
1237     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
1238     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
1239                                   cm->seq_params->sb_size, AOM_PLANE_Y,
1240                                   num_planes - 1);
1241   }
1242 }
1243 
1244 // Decides whether to split or merge a 16x16 partition block in variance based
1245 // partitioning based on the 8x8 sub-block variances.
get_part_eval_based_on_sub_blk_var(VP16x16 * var_16x16_info,int64_t threshold16)1246 static AOM_INLINE PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var(
1247     VP16x16 *var_16x16_info, int64_t threshold16) {
1248   int max_8x8_var = 0, min_8x8_var = INT_MAX;
1249   for (int k = 0; k < 4; k++) {
1250     get_variance(&var_16x16_info->split[k].part_variances.none);
1251     int this_8x8_var = var_16x16_info->split[k].part_variances.none.variance;
1252     max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var);
1253     min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var);
1254   }
1255   // If the difference between maximum and minimum sub-block variances is high,
1256   // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate
1257   // only PARTITION_NONE. The shift factor for threshold16 has been derived
1258   // empirically.
1259   return ((max_8x8_var - min_8x8_var) > (threshold16 << 2))
1260              ? PART_EVAL_ONLY_SPLIT
1261              : PART_EVAL_ONLY_NONE;
1262 }
1263 
is_set_force_zeromv_skip_based_on_src_sad(int set_zeromv_skip_based_on_source_sad,SOURCE_SAD source_sad_nonrd)1264 static AOM_INLINE bool is_set_force_zeromv_skip_based_on_src_sad(
1265     int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) {
1266   if (set_zeromv_skip_based_on_source_sad == 0) return false;
1267 
1268   if (set_zeromv_skip_based_on_source_sad >= 2)
1269     return source_sad_nonrd <= kVeryLowSad;
1270   else if (set_zeromv_skip_based_on_source_sad >= 1)
1271     return source_sad_nonrd == kZeroSad;
1272 
1273   return false;
1274 }
1275 
av1_choose_var_based_partitioning(AV1_COMP * cpi,const TileInfo * const tile,ThreadData * td,MACROBLOCK * x,int mi_row,int mi_col)1276 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
1277                                       ThreadData *td, MACROBLOCK *x, int mi_row,
1278                                       int mi_col) {
1279 #if CONFIG_COLLECT_COMPONENT_TIMING
1280   start_timing(cpi, choose_var_based_partitioning_time);
1281 #endif
1282   AV1_COMMON *const cm = &cpi->common;
1283   MACROBLOCKD *xd = &x->e_mbd;
1284   const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds;
1285 
1286   int i, j, k, m;
1287   VP128x128 *vt;
1288   VP16x16 *vt2 = NULL;
1289   PART_EVAL_STATUS force_split[85];
1290   int avg_64x64;
1291   int max_var_32x32[4];
1292   int min_var_32x32[4];
1293   int var_32x32;
1294   int var_64x64;
1295   int min_var_64x64 = INT_MAX;
1296   int max_var_64x64 = 0;
1297   int avg_16x16[4][4];
1298   int maxvar_16x16[4][4];
1299   int minvar_16x16[4][4];
1300   int64_t threshold_4x4avg;
1301   uint8_t *s;
1302   const uint8_t *d;
1303   int sp;
1304   int dp;
1305   unsigned int uv_sad[2];
1306   NOISE_LEVEL noise_level = kLow;
1307   int zero_motion = 1;
1308 
1309   int is_key_frame =
1310       (frame_is_intra_only(cm) ||
1311        (cpi->ppi->use_svc &&
1312         cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
1313 
1314   assert(cm->seq_params->sb_size == BLOCK_64X64 ||
1315          cm->seq_params->sb_size == BLOCK_128X128);
1316   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
1317   const int num_64x64_blocks = is_small_sb ? 1 : 4;
1318 
1319   unsigned int y_sad = UINT_MAX;
1320   unsigned int y_sad_g = UINT_MAX;
1321   unsigned int y_sad_alt = UINT_MAX;
1322   unsigned int y_sad_last = UINT_MAX;
1323   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1324 
1325   // Ref frame used in partitioning.
1326   MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
1327 
1328   CHECK_MEM_ERROR(cm, vt, aom_malloc(sizeof(*vt)));
1329 
1330   vt->split = td->vt64x64;
1331 
1332   int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1],
1333                             vbp_thresholds[2], vbp_thresholds[3],
1334                             vbp_thresholds[4] };
1335 
1336   const int low_res = (cm->width <= 352 && cm->height <= 288);
1337   int variance4x4downsample[64];
1338   const int segment_id = xd->mi[0]->segment_id;
1339   uint64_t blk_sad = 0;
1340   if (cpi->src_sad_blk_64x64 != NULL && !cpi->ppi->use_svc) {
1341     const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
1342                                   ? (cm->seq_params->mib_size >> 1)
1343                                   : cm->seq_params->mib_size;
1344     const int sb_cols =
1345         (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
1346     const int sbi_col = mi_col / sb_size_by_mb;
1347     const int sbi_row = mi_row / sb_size_by_mb;
1348     blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
1349   }
1350 
1351   if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
1352       cyclic_refresh_segment_id_boosted(segment_id)) {
1353     const int q =
1354         av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
1355     set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff,
1356                        x->content_state_sb.source_sad_nonrd,
1357                        x->content_state_sb.source_sad_rd, 1, blk_sad,
1358                        x->content_state_sb.lighting_change);
1359   } else {
1360     set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
1361                        x->content_state_sb.low_sumdiff,
1362                        x->content_state_sb.source_sad_nonrd,
1363                        x->content_state_sb.source_sad_rd, 0, blk_sad,
1364                        x->content_state_sb.lighting_change);
1365   }
1366 
1367   // For non keyframes, disable 4x4 average for low resolution when speed = 8
1368   threshold_4x4avg = INT64_MAX;
1369 
1370   s = x->plane[0].src.buf;
1371   sp = x->plane[0].src.stride;
1372 
1373   // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
1374   // 5-20 for the 16x16 blocks.
1375   force_split[0] = PART_EVAL_ALL;
1376   memset(x->part_search_info.variance_low, 0,
1377          sizeof(x->part_search_info.variance_low));
1378 
1379   // Check if LAST frame is NULL or if the resolution of LAST is
1380   // different than the current frame resolution, and if so, treat this frame
1381   // as a key frame, for the purpose of the superblock partitioning.
1382   // LAST == NULL can happen in cases where enhancement spatial layers are
1383   // enabled dyanmically and the only reference is the spatial(GOLDEN).
1384   // TODO(marpan): Check se of scaled references for the different resoln.
1385   if (!frame_is_intra_only(cm)) {
1386     const YV12_BUFFER_CONFIG *const ref =
1387         get_ref_frame_yv12_buf(cm, LAST_FRAME);
1388     if (ref == NULL || ref->y_crop_height != cm->height ||
1389         ref->y_crop_width != cm->width) {
1390       is_key_frame = 1;
1391     }
1392   }
1393 
1394   if (!is_key_frame) {
1395     setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last,
1396                  &ref_frame_partition, mi_row, mi_col);
1397 
1398     MB_MODE_INFO *mi = xd->mi[0];
1399     // Use reference SB directly for zero mv.
1400     if (mi->mv[0].as_int != 0) {
1401       d = xd->plane[0].dst.buf;
1402       dp = xd->plane[0].dst.stride;
1403       zero_motion = 0;
1404     } else {
1405       d = xd->plane[0].pre[0].buf;
1406       dp = xd->plane[0].pre[0].stride;
1407     }
1408   } else {
1409     d = AV1_VAR_OFFS;
1410     dp = 0;
1411   }
1412 
1413   uv_sad[0] = 0;
1414   uv_sad[1] = 0;
1415   chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, is_key_frame, zero_motion,
1416                uv_sad);
1417 
1418   x->force_zeromv_skip_for_sb = 0;
1419   const bool is_set_force_zeromv_skip =
1420       is_set_force_zeromv_skip_based_on_src_sad(
1421           cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
1422           x->content_state_sb.source_sad_nonrd);
1423 
1424   // If the superblock is completely static (zero source sad) and
1425   // the y_sad (relative to LAST ref) is very small, take the sb_size partition
1426   // and exit, and force zeromv_last skip mode for nonrd_pickmode.
1427   // Only do this on the base segment (so the QP-boosted segment, if applied,
1428   // can still continue cleaning/ramping up the quality).
1429   // Condition on color uv_sad is also added.
1430   if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv &&
1431       cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE &&
1432       is_set_force_zeromv_skip && ref_frame_partition == LAST_FRAME &&
1433       xd->mi[0]->mv[0].as_int == 0) {
1434     const int block_width = mi_size_wide[cm->seq_params->sb_size];
1435     const int block_height = mi_size_high[cm->seq_params->sb_size];
1436     const unsigned int thresh_exit_part_y =
1437         cpi->zeromv_skip_thresh_exit_part[bsize];
1438     const unsigned int thresh_exit_part_uv =
1439         CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y);
1440     if (mi_col + block_width <= tile->mi_col_end &&
1441         mi_row + block_height <= tile->mi_row_end &&
1442         y_sad < thresh_exit_part_y && uv_sad[0] < thresh_exit_part_uv &&
1443         uv_sad[1] < thresh_exit_part_uv) {
1444       set_block_size(cpi, mi_row, mi_col, bsize);
1445       x->force_zeromv_skip_for_sb = 1;
1446       if (vt2) aom_free(vt2);
1447       if (vt) aom_free(vt);
1448       return 0;
1449     } else if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
1450                cpi->sf.rt_sf.part_early_exit_zeromv >= 2) {
1451       x->force_zeromv_skip_for_sb = 2;
1452     }
1453   }
1454 
1455   if (cpi->noise_estimate.enabled)
1456     noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
1457 
1458   if (low_res && threshold_4x4avg < INT64_MAX)
1459     CHECK_MEM_ERROR(cm, vt2, aom_malloc(sizeof(*vt2)));
1460   // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
1461   // for splits.
1462   fill_variance_tree_leaves(cpi, x, vt, vt2, force_split, avg_16x16,
1463                             maxvar_16x16, minvar_16x16, variance4x4downsample,
1464                             thresholds, s, sp, d, dp);
1465 
1466   avg_64x64 = 0;
1467   for (m = 0; m < num_64x64_blocks; ++m) {
1468     max_var_32x32[m] = 0;
1469     min_var_32x32[m] = INT_MAX;
1470     const int m2 = m << 2;
1471     for (i = 0; i < 4; i++) {
1472       const int i2 = (m2 + i) << 2;
1473       for (j = 0; j < 4; j++) {
1474         const int split_index = 21 + i2 + j;
1475         if (variance4x4downsample[i2 + j] == 1) {
1476           VP16x16 *vtemp =
1477               (!is_key_frame) ? &vt2[i2 + j] : &vt->split[m].split[i].split[j];
1478           for (k = 0; k < 4; k++)
1479             fill_variance_tree(&vtemp->split[k], BLOCK_8X8);
1480           fill_variance_tree(vtemp, BLOCK_16X16);
1481           // If variance of this 16x16 block is above the threshold, force block
1482           // to split. This also forces a split on the upper levels.
1483           get_variance(&vtemp->part_variances.none);
1484           if (vtemp->part_variances.none.variance > thresholds[3]) {
1485             force_split[split_index] =
1486                 cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var
1487                     ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3])
1488                     : PART_EVAL_ONLY_SPLIT;
1489             force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
1490             force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
1491             force_split[0] = PART_EVAL_ONLY_SPLIT;
1492           }
1493         }
1494       }
1495       fill_variance_tree(&vt->split[m].split[i], BLOCK_32X32);
1496       // If variance of this 32x32 block is above the threshold, or if its above
1497       // (some threshold of) the average variance over the sub-16x16 blocks,
1498       // then force this block to split. This also forces a split on the upper
1499       // (64x64) level.
1500       uint64_t frame_sad_thresh = 20000;
1501       if (cpi->svc.number_temporal_layers > 2 &&
1502           cpi->svc.temporal_layer_id == 0)
1503         frame_sad_thresh = frame_sad_thresh << 1;
1504       if (force_split[5 + m2 + i] == PART_EVAL_ALL) {
1505         get_variance(&vt->split[m].split[i].part_variances.none);
1506         var_32x32 = vt->split[m].split[i].part_variances.none.variance;
1507         max_var_32x32[m] = AOMMAX(var_32x32, max_var_32x32[m]);
1508         min_var_32x32[m] = AOMMIN(var_32x32, min_var_32x32[m]);
1509         if (vt->split[m].split[i].part_variances.none.variance >
1510                 thresholds[2] ||
1511             (!is_key_frame &&
1512              vt->split[m].split[i].part_variances.none.variance >
1513                  (thresholds[2] >> 1) &&
1514              vt->split[m].split[i].part_variances.none.variance >
1515                  (avg_16x16[m][i] >> 1))) {
1516           force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
1517           force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
1518           force_split[0] = PART_EVAL_ONLY_SPLIT;
1519         } else if (!is_key_frame && (cm->width * cm->height <= 640 * 360) &&
1520                    (((maxvar_16x16[m][i] - minvar_16x16[m][i]) >
1521                          (thresholds[2] >> 1) &&
1522                      maxvar_16x16[m][i] > thresholds[2]) ||
1523                     (cpi->sf.rt_sf.prefer_large_partition_blocks &&
1524                      x->content_state_sb.source_sad_nonrd > kLowSad &&
1525                      cpi->rc.frame_source_sad < frame_sad_thresh &&
1526                      maxvar_16x16[m][i] > (thresholds[2] >> 4) &&
1527                      maxvar_16x16[m][i] > (minvar_16x16[m][i] << 2)))) {
1528           force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
1529           force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
1530           force_split[0] = PART_EVAL_ONLY_SPLIT;
1531         }
1532       }
1533     }
1534     if (force_split[1 + m] == PART_EVAL_ALL) {
1535       fill_variance_tree(&vt->split[m], BLOCK_64X64);
1536       get_variance(&vt->split[m].part_variances.none);
1537       var_64x64 = vt->split[m].part_variances.none.variance;
1538       max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
1539       min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
1540       // If the difference of the max-min variances of sub-blocks or max
1541       // variance of a sub-block is above some threshold of then force this
1542       // block to split. Only checking this for noise level >= medium, if
1543       // encoder is in SVC or if we already forced large blocks.
1544 
1545       if (!is_key_frame &&
1546           (max_var_32x32[m] - min_var_32x32[m]) > 3 * (thresholds[1] >> 3) &&
1547           max_var_32x32[m] > thresholds[1] >> 1 &&
1548           (noise_level >= kMedium || cpi->ppi->use_svc ||
1549            cpi->sf.rt_sf.prefer_large_partition_blocks)) {
1550         force_split[1 + m] = PART_EVAL_ONLY_SPLIT;
1551         force_split[0] = PART_EVAL_ONLY_SPLIT;
1552       }
1553       avg_64x64 += var_64x64;
1554     }
1555     if (is_small_sb) force_split[0] = PART_EVAL_ONLY_SPLIT;
1556   }
1557 
1558   if (force_split[0] == PART_EVAL_ALL) {
1559     fill_variance_tree(vt, BLOCK_128X128);
1560     get_variance(&vt->part_variances.none);
1561     if (!is_key_frame &&
1562         vt->part_variances.none.variance > (9 * avg_64x64) >> 5)
1563       force_split[0] = PART_EVAL_ONLY_SPLIT;
1564 
1565     if (!is_key_frame &&
1566         (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
1567         max_var_64x64 > thresholds[0] >> 1)
1568       force_split[0] = PART_EVAL_ONLY_SPLIT;
1569   }
1570 
1571   if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end ||
1572       !set_vt_partitioning(cpi, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
1573                            thresholds[0], BLOCK_16X16, force_split[0])) {
1574     for (m = 0; m < num_64x64_blocks; ++m) {
1575       const int x64_idx = ((m & 1) << 4);
1576       const int y64_idx = ((m >> 1) << 4);
1577       const int m2 = m << 2;
1578 
1579       // Now go through the entire structure, splitting every block size until
1580       // we get to one that's got a variance lower than our threshold.
1581       if (!set_vt_partitioning(cpi, xd, tile, &vt->split[m], BLOCK_64X64,
1582                                mi_row + y64_idx, mi_col + x64_idx,
1583                                thresholds[1], BLOCK_16X16,
1584                                force_split[1 + m])) {
1585         for (i = 0; i < 4; ++i) {
1586           const int x32_idx = ((i & 1) << 3);
1587           const int y32_idx = ((i >> 1) << 3);
1588           const int i2 = (m2 + i) << 2;
1589           if (!set_vt_partitioning(cpi, xd, tile, &vt->split[m].split[i],
1590                                    BLOCK_32X32, (mi_row + y64_idx + y32_idx),
1591                                    (mi_col + x64_idx + x32_idx), thresholds[2],
1592                                    BLOCK_16X16, force_split[5 + m2 + i])) {
1593             for (j = 0; j < 4; ++j) {
1594               const int x16_idx = ((j & 1) << 2);
1595               const int y16_idx = ((j >> 1) << 2);
1596               const int split_index = 21 + i2 + j;
1597               // For inter frames: if variance4x4downsample[] == 1 for this
1598               // 16x16 block, then the variance is based on 4x4 down-sampling,
1599               // so use vt2 in set_vt_partioning(), otherwise use vt.
1600               VP16x16 *vtemp =
1601                   (!is_key_frame && variance4x4downsample[i2 + j] == 1)
1602                       ? &vt2[i2 + j]
1603                       : &vt->split[m].split[i].split[j];
1604               if (!set_vt_partitioning(cpi, xd, tile, vtemp, BLOCK_16X16,
1605                                        mi_row + y64_idx + y32_idx + y16_idx,
1606                                        mi_col + x64_idx + x32_idx + x16_idx,
1607                                        thresholds[3], BLOCK_8X8,
1608                                        force_split[split_index])) {
1609                 for (k = 0; k < 4; ++k) {
1610                   const int x8_idx = (k & 1) << 1;
1611                   const int y8_idx = (k >> 1) << 1;
1612                   set_block_size(
1613                       cpi, (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
1614                       (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
1615                       BLOCK_8X8);
1616                 }
1617               }
1618             }
1619           }
1620         }
1621       }
1622     }
1623   }
1624 
1625   if (cpi->sf.rt_sf.short_circuit_low_temp_var) {
1626     set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds,
1627                           ref_frame_partition, mi_col, mi_row);
1628   }
1629 
1630   if (vt2) aom_free(vt2);
1631   if (vt) aom_free(vt);
1632 #if CONFIG_COLLECT_COMPONENT_TIMING
1633   end_timing(cpi, choose_var_based_partitioning_time);
1634 #endif
1635   return 0;
1636 }
1637