1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13
14 #include "av1/common/reconintra.h"
15
16 #include "av1/encoder/encoder.h"
17 #include "av1/encoder/speed_features.h"
18 #include "av1/encoder/rdopt.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21
22 #define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
23 // Max speed setting for tx domain evaluation
24 #define MAX_TX_DOMAIN_EVAL_SPEED 5
25 static MESH_PATTERN
26 good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
27 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
28 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
29 { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
30 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
31 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
32 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
33 };
34
35 // TODO(huisu@google.com): These settings are pretty relaxed, tune them for
36 // each speed setting
37 static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
38 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
39 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
40 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
41 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
42 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
43 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
44 };
45
46 // Threshold values to be used for pruning the txfm_domain_distortion
47 // based on block MSE
48 // Index 0: Default mode evaluation, Winner mode processing is not
49 // applicable (Eg : IntraBc). Index 1: Mode evaluation.
50 // Index 2: Winner mode evaluation. Index 1 and 2 are applicable when
51 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
52 // TODO(any): Experiment the threshold logic based on variance metric
53 static unsigned int tx_domain_dist_thresholds[4][MODE_EVAL_TYPES] = {
54 { UINT_MAX, UINT_MAX, UINT_MAX },
55 { 22026, 22026, 22026 },
56 { 1377, 1377, 1377 },
57 { 0, 0, 0 }
58 };
59
60 // Number of different levels of aggressiveness in using transform domain
61 // distortion during the R-D evaluation based on the speed feature
62 // tx_domain_dist_level.
63 #define TX_DOMAIN_DIST_LEVELS 4
64
65 // Transform domain distortion type to be used for default, mode and winner mode
66 // evaluation Index 0: Default mode evaluation, Winner mode processing is not
67 // applicable (Eg : IntraBc). Index 1: Mode evaluation. Index 2: Winner mode
68 // evaluation. Index 1 and 2 are applicable when
69 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
70 static unsigned int
71 tx_domain_dist_types[TX_DOMAIN_DIST_LEVELS][MODE_EVAL_TYPES] = {
72 { 0, 2, 0 }, { 1, 2, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
73 };
74
75 // Threshold values to be used for disabling coeff RD-optimization
76 // based on block MSE / qstep^2.
77 // TODO(any): Experiment the threshold logic based on variance metric.
78 // Table has satd and dist threshold value index 0 : dist,index 1: satd
79 // For each row, the indices are as follows.
80 // Index 0: Default mode evaluation, Winner mode processing is not applicable
81 // (Eg : IntraBc)
82 // Index 1: Mode evaluation.
83 // Index 2: Winner mode evaluation.
84 // Index 1 and 2 are applicable when enable_winner_mode_for_coeff_opt speed
85 // feature is ON
86 // There are 7 levels with increasing speed, mapping to vertical indices.
87 static unsigned int coeff_opt_thresholds[9][MODE_EVAL_TYPES][2] = {
88 { { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX } },
89 { { 3200, UINT_MAX }, { 250, UINT_MAX }, { UINT_MAX, UINT_MAX } },
90 { { 1728, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
91 { { 864, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
92 { { 432, UINT_MAX }, { 86, UINT_MAX }, { UINT_MAX, UINT_MAX } },
93 { { 864, 97 }, { 142, 16 }, { UINT_MAX, UINT_MAX } },
94 { { 432, 97 }, { 86, 16 }, { UINT_MAX, UINT_MAX } },
95 { { 216, 25 }, { 86, 10 }, { UINT_MAX, UINT_MAX } },
96 { { 216, 25 }, { 0, 10 }, { UINT_MAX, UINT_MAX } }
97 };
98
99 // Transform size to be used for default, mode and winner mode evaluation
100 // Index 0: Default mode evaluation, Winner mode processing is not applicable
101 // (Eg : IntraBc) Index 1: Mode evaluation. Index 2: Winner mode evaluation.
102 // Index 1 and 2 are applicable when enable_winner_mode_for_tx_size_srch speed
103 // feature is ON
104 static TX_SIZE_SEARCH_METHOD tx_size_search_methods[4][MODE_EVAL_TYPES] = {
105 { USE_FULL_RD, USE_LARGESTALL, USE_FULL_RD },
106 { USE_FAST_RD, USE_LARGESTALL, USE_FULL_RD },
107 { USE_LARGESTALL, USE_LARGESTALL, USE_FULL_RD },
108 { USE_LARGESTALL, USE_LARGESTALL, USE_LARGESTALL }
109 };
110
111 // Predict transform skip levels to be used for default, mode and winner mode
112 // evaluation. Index 0: Default mode evaluation, Winner mode processing is not
113 // applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation
114 // Values indicate the aggressiveness of skip flag prediction.
115 // 0 : no early skip prediction
116 // 1 : conservative early skip prediction using DCT_DCT
117 // 2 : early skip prediction based on SSE
118 static unsigned int predict_skip_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
119 { 1, 1, 1 },
120 { 1, 2, 1 } };
121
122 // Predict skip or DC block level used during transform type search. It is
123 // indexed using the following:
124 // First index : Speed feature 'dc_blk_pred_level' (0 to 3)
125 // Second index : Mode evaluation type (DEFAULT_EVAL, MODE_EVAL and
126 // WINNER_MODE_EVAL).
127 //
128 // The values of predict_dc_levels[][] indicate the aggressiveness of predicting
129 // a block as transform skip or DC only.
130 // Type 0 : No skip block or DC only block prediction
131 // Type 1 : Prediction of skip block based on residual mean and variance
132 // Type 2 : Prediction of skip block or DC only block based on residual mean and
133 // variance
134 static unsigned int predict_dc_levels[4][MODE_EVAL_TYPES] = {
135 { 0, 0, 0 }, { 1, 1, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
136 };
137
138 #if !CONFIG_FPMT_TEST
139 // This table holds the maximum number of reference frames for global motion.
140 // The table is indexed as per the speed feature 'gm_search_type'.
141 // 0 : All reference frames are allowed.
142 // 1 : All reference frames except L2 and L3 are allowed.
143 // 2 : All reference frames except L2, L3 and ARF2 are allowed.
144 // 3 : No reference frame is allowed.
145 static int gm_available_reference_frames[GM_DISABLE_SEARCH + 1] = {
146 INTER_REFS_PER_FRAME, INTER_REFS_PER_FRAME - 2, INTER_REFS_PER_FRAME - 3, 0
147 };
148 #endif
149
150 // Qindex threshold levels used for selecting full-pel motion search.
151 // ms_qthresh[i][j][k] indicates the qindex boundary value for 'k'th qindex band
152 // for resolution index 'j' for aggressiveness level 'i'.
153 // Aggressiveness increases from i = 0 to 2.
154 // j = 0: lower than 720p resolution, j = 1: 720p or larger resolution.
155 // Currently invoked only for speed 0, 1 and 2.
156 static int ms_qindex_thresh[3][2][2] = { { { 200, 70 }, { MAXQ, 200 } },
157 { { 170, 50 }, { MAXQ, 200 } },
158 { { 170, 40 }, { 200, 40 } } };
159
160 // Full-pel search methods for aggressive search based on qindex.
161 // Index 0 is for resolutions lower than 720p, index 1 for 720p or larger
162 // resolutions. Currently invoked only for speed 1 and 2.
163 static SEARCH_METHODS motion_search_method[2] = { CLAMPED_DIAMOND, DIAMOND };
164
165 // Intra only frames, golden frames (except alt ref overlays) and
166 // alt ref frames tend to be coded at a higher than ambient quality
frame_is_boosted(const AV1_COMP * cpi)167 static int frame_is_boosted(const AV1_COMP *cpi) {
168 return frame_is_kf_gf_arf(cpi);
169 }
170
set_allintra_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)171 static void set_allintra_speed_feature_framesize_dependent(
172 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
173 const AV1_COMMON *const cm = &cpi->common;
174 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
175 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
176 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
177 const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
178 const bool use_hbd = cpi->oxcf.use_highbitdepth;
179
180 if (is_480p_or_larger) {
181 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
182 if (is_720p_or_larger)
183 sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED;
184 else
185 sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED;
186 } else {
187 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
188 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
189 if (use_hbd) sf->tx_sf.prune_tx_size_level = 1;
190 }
191
192 if (is_4k_or_larger) {
193 sf->part_sf.default_min_partition_size = BLOCK_8X8;
194 }
195
196 // TODO(huisu@google.com): train models for 720P and above.
197 if (!is_720p_or_larger) {
198 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
199 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
200 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
201 sf->part_sf.ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64
202 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
203 sf->part_sf.ml_early_term_after_part_split_level = 1;
204 }
205
206 if (is_720p_or_larger) {
207 // TODO(chiyotsai@google.com): make this speed feature adaptive based on
208 // current block's vertical texture instead of hardcoded with resolution
209 sf->mv_sf.use_downsampled_sad = 1;
210 }
211
212 if (speed >= 1) {
213 if (is_720p_or_larger) {
214 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
215 } else if (is_480p_or_larger) {
216 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
217 } else {
218 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
219 }
220
221 if (!is_720p_or_larger) {
222 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
223 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
224 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
225 sf->part_sf.ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64
226 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
227 }
228 sf->part_sf.ml_early_term_after_part_split_level = 2;
229 }
230
231 if (speed >= 2) {
232 if (is_720p_or_larger) {
233 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
234 } else if (is_480p_or_larger) {
235 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
236 } else {
237 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
238 }
239
240 if (is_720p_or_larger) {
241 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
242 sf->part_sf.partition_search_breakout_rate_thr = 120;
243 } else {
244 sf->part_sf.partition_search_breakout_dist_thr = (1 << 22);
245 sf->part_sf.partition_search_breakout_rate_thr = 100;
246 }
247
248 if (is_480p_or_larger) {
249 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1;
250 if (use_hbd) sf->tx_sf.prune_tx_size_level = 2;
251 } else {
252 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
253 }
254 }
255
256 if (speed >= 3) {
257 sf->part_sf.ml_early_term_after_part_split_level = 0;
258
259 if (is_720p_or_larger) {
260 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
261 sf->part_sf.partition_search_breakout_rate_thr = 200;
262 } else {
263 sf->part_sf.max_intra_bsize = BLOCK_32X32;
264 sf->part_sf.partition_search_breakout_dist_thr = (1 << 23);
265 sf->part_sf.partition_search_breakout_rate_thr = 120;
266 }
267 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
268 }
269
270 if (speed >= 4) {
271 if (is_720p_or_larger) {
272 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
273 } else {
274 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
275 }
276
277 if (is_480p_or_larger) {
278 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2;
279 }
280 }
281
282 if (speed >= 6) {
283 if (is_720p_or_larger) {
284 sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
285 } else if (is_480p_or_larger) {
286 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
287 }
288
289 if (is_1080p_or_larger) {
290 sf->part_sf.default_min_partition_size = BLOCK_8X8;
291 }
292
293 sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
294 }
295
296 if (speed >= 7) {
297 // TODO(kyslov): add more speed features to control speed/quality
298 }
299
300 if (speed >= 8) {
301 if (!is_480p_or_larger) {
302 sf->rt_sf.nonrd_check_partition_merge_mode = 2;
303 }
304 if (is_720p_or_larger) {
305 sf->rt_sf.force_large_partition_blocks_intra = 1;
306 }
307 }
308
309 if (speed >= 9) {
310 // TODO(kyslov): add more speed features to control speed/quality
311 if (!is_4k_or_larger) {
312 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_OFF;
313 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_OFF;
314 }
315 }
316 }
317
set_allintra_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)318 static void set_allintra_speed_features_framesize_independent(
319 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
320 const AV1_COMMON *const cm = &cpi->common;
321 const int allow_screen_content_tools =
322 cm->features.allow_screen_content_tools;
323 const int use_hbd = cpi->oxcf.use_highbitdepth;
324
325 sf->part_sf.less_rectangular_check_level = 1;
326 sf->part_sf.ml_prune_partition = 1;
327 sf->part_sf.prune_ext_partition_types_search_level = 1;
328 sf->part_sf.prune_part4_search = 2;
329 sf->part_sf.simple_motion_search_prune_rect = 1;
330 sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
331 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
332 sf->part_sf.use_best_rd_for_pruning = 1;
333
334 sf->intra_sf.intra_pruning_with_hog = 1;
335 sf->intra_sf.prune_luma_palette_size_search_level = 1;
336 sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
337 sf->intra_sf.early_term_chroma_palette_size_search = 1;
338
339 sf->tx_sf.adaptive_txb_search_level = 1;
340 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
341 sf->tx_sf.model_based_prune_tx_search_level = 1;
342 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
343
344 sf->rt_sf.use_nonrd_pick_mode = 0;
345 sf->rt_sf.use_real_time_ref_set = 0;
346
347 if (cpi->twopass_frame.fr_content_type == FC_GRAPHICS_ANIMATION ||
348 cpi->use_screen_content_tools) {
349 sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
350 } else {
351 sf->mv_sf.exhaustive_searches_thresh = (1 << 25);
352 }
353
354 sf->rd_sf.perform_coeff_opt = 1;
355 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
356
357 if (speed >= 1) {
358 sf->part_sf.intra_cnn_based_part_prune_level =
359 allow_screen_content_tools ? 0 : 2;
360 sf->part_sf.simple_motion_search_early_term_none = 1;
361 // TODO(Venkat): Clean-up frame type dependency for
362 // simple_motion_search_split in partition search function and set the
363 // speed feature accordingly
364 sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2;
365 sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3;
366 sf->part_sf.reuse_best_prediction_for_part_ab = 1;
367
368 sf->mv_sf.exhaustive_searches_thresh <<= 1;
369
370 sf->intra_sf.prune_palette_search_level = 1;
371 sf->intra_sf.prune_luma_palette_size_search_level = 2;
372 sf->intra_sf.top_intra_model_count_allowed = 3;
373
374 sf->tx_sf.adaptive_txb_search_level = 2;
375 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
376 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
377 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
378 sf->tx_sf.model_based_prune_tx_search_level = 0;
379 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
380 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
381 sf->tx_sf.tx_type_search.skip_tx_search = 1;
382
383 sf->rd_sf.perform_coeff_opt = 2;
384 sf->rd_sf.tx_domain_dist_level = 1;
385 sf->rd_sf.tx_domain_dist_thres_level = 1;
386
387 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
388 sf->lpf_sf.dual_sgr_penalty_level = 1;
389 sf->lpf_sf.enable_sgr_ep_pruning = 1;
390 }
391
392 if (speed >= 2) {
393 sf->mv_sf.auto_mv_step_size = 1;
394
395 sf->intra_sf.disable_smooth_intra = 1;
396 sf->intra_sf.intra_pruning_with_hog = 2;
397 sf->intra_sf.prune_filter_intra_level = 1;
398
399 sf->rd_sf.perform_coeff_opt = 3;
400
401 sf->lpf_sf.prune_wiener_based_on_src_var = 1;
402 sf->lpf_sf.prune_sgr_based_on_wiener = 1;
403 }
404
405 if (speed >= 3) {
406 sf->hl_sf.high_precision_mv_usage = CURRENT_Q;
407 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
408
409 sf->part_sf.less_rectangular_check_level = 2;
410 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL1;
411 sf->part_sf.prune_ext_part_using_split_info = 1;
412
413 sf->mv_sf.full_pixel_search_level = 1;
414 sf->mv_sf.search_method = DIAMOND;
415
416 // TODO(chiyotsai@google.com): the thresholds chosen for intra hog are
417 // inherited directly from luma hog with some minor tweaking. Eventually we
418 // should run this with a bayesian optimizer to find the Pareto frontier.
419 sf->intra_sf.chroma_intra_pruning_with_hog = 2;
420 sf->intra_sf.intra_pruning_with_hog = 3;
421 sf->intra_sf.prune_palette_search_level = 2;
422
423 sf->tx_sf.adaptive_txb_search_level = 2;
424 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
425
426 // TODO(any): evaluate if these lpf features can be moved to speed 2.
427 // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
428 // loss.
429 sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2;
430 sf->lpf_sf.disable_loop_restoration_chroma = 0;
431 sf->lpf_sf.reduce_wiener_window_size = 1;
432 sf->lpf_sf.prune_wiener_based_on_src_var = 2;
433 }
434
435 if (speed >= 4) {
436 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
437
438 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL2;
439 sf->part_sf.simple_motion_search_reduce_search_steps = 4;
440 sf->part_sf.prune_ext_part_using_split_info = 2;
441 sf->part_sf.early_term_after_none_split = 1;
442 sf->part_sf.ml_predict_breakout_level = 3;
443
444 sf->intra_sf.prune_chroma_modes_using_luma_winner = 1;
445
446 sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL;
447
448 sf->tpl_sf.prune_starting_mv = 2;
449 sf->tpl_sf.subpel_force_stop = HALF_PEL;
450 sf->tpl_sf.search_method = FAST_BIGDIA;
451
452 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
453 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
454 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
455 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1;
456
457 sf->rd_sf.perform_coeff_opt = 5;
458 sf->rd_sf.tx_domain_dist_thres_level = 3;
459
460 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
461 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3;
462
463 sf->mv_sf.reduce_search_range = 1;
464
465 sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1;
466 sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
467 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_DEFAULT;
468 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
469 }
470
471 if (speed >= 5) {
472 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL3;
473 sf->part_sf.ext_partition_eval_thresh =
474 allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
475 sf->part_sf.intra_cnn_based_part_prune_level =
476 allow_screen_content_tools ? 1 : 2;
477
478 sf->intra_sf.chroma_intra_pruning_with_hog = 3;
479
480 sf->lpf_sf.use_coarse_filter_level_search = 0;
481 sf->lpf_sf.disable_lr_filter = 1;
482
483 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_2;
484
485 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_FAST;
486 }
487
488 if (speed >= 6) {
489 sf->intra_sf.prune_smooth_intra_mode_for_chroma = 1;
490 sf->intra_sf.prune_filter_intra_level = 2;
491 sf->intra_sf.chroma_intra_pruning_with_hog = 4;
492 sf->intra_sf.intra_pruning_with_hog = 4;
493 sf->intra_sf.cfl_search_range = 1;
494 sf->intra_sf.top_intra_model_count_allowed = 2;
495 sf->intra_sf.adapt_top_model_rd_count_using_neighbors = 1;
496 sf->intra_sf.prune_luma_odd_delta_angles_in_intra = 1;
497
498 sf->part_sf.prune_rectangular_split_based_on_qidx =
499 allow_screen_content_tools ? 0 : 2;
500 sf->part_sf.prune_sub_8x8_partition_level =
501 allow_screen_content_tools ? 0 : 1;
502 sf->part_sf.prune_part4_search = 3;
503 // TODO(jingning): This might not be a good trade off if the
504 // target image quality is very low.
505 sf->part_sf.default_max_partition_size = BLOCK_32X32;
506
507 sf->mv_sf.use_bsize_dependent_search_method = 1;
508
509 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3;
510 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0;
511 sf->tx_sf.prune_intra_tx_depths_using_nn = true;
512
513 sf->rd_sf.perform_coeff_opt = 6;
514 sf->rd_sf.tx_domain_dist_level = 3;
515
516 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
517 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
518
519 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
520 sf->winner_mode_sf.prune_winner_mode_eval_level = 1;
521 sf->winner_mode_sf.dc_blk_pred_level = 1;
522 }
523 // The following should make all-intra mode speed 7 approximately equal
524 // to real-time speed 6,
525 // all-intra speed 8 close to real-time speed 7, and all-intra speed 9
526 // close to real-time speed 8
527 if (speed >= 7) {
528 sf->part_sf.default_min_partition_size = BLOCK_8X8;
529 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
530 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
531 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
532 sf->rt_sf.var_part_split_threshold_shift = 7;
533 }
534
535 if (speed >= 8) {
536 sf->rt_sf.hybrid_intra_pickmode = 1;
537 sf->rt_sf.use_nonrd_pick_mode = 1;
538 sf->rt_sf.nonrd_check_partition_merge_mode = 1;
539 sf->rt_sf.var_part_split_threshold_shift = 8;
540 // Set mask for intra modes.
541 for (int i = 0; i < BLOCK_SIZES; ++i)
542 if (i >= BLOCK_32X32)
543 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
544 else
545 // Use DC, H, V intra mode for block sizes < 32X32.
546 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
547 }
548
549 if (speed >= 9) {
550 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW;
551 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW;
552
553 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
554 sf->rt_sf.hybrid_intra_pickmode = 0;
555 sf->rt_sf.var_part_split_threshold_shift = 9;
556 sf->rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var = true;
557 sf->rt_sf.prune_h_pred_using_best_mode_so_far = true;
558 }
559
560 // As the speed feature prune_chroma_modes_using_luma_winner already
561 // constrains the number of chroma directional mode evaluations to a maximum
562 // of 1, the HOG computation and the associated pruning logic does not seem to
563 // help speed-up the chroma mode evaluations. Hence disable the speed feature
564 // chroma_intra_pruning_with_hog when prune_chroma_modes_using_luma_winner is
565 // enabled.
566 if (sf->intra_sf.prune_chroma_modes_using_luma_winner)
567 sf->intra_sf.chroma_intra_pruning_with_hog = 0;
568 }
569
set_good_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)570 static void set_good_speed_feature_framesize_dependent(
571 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
572 const AV1_COMMON *const cm = &cpi->common;
573 const int is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480;
574 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
575 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
576 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
577 const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
578 const bool use_hbd = cpi->oxcf.use_highbitdepth;
579 const int boosted = frame_is_boosted(cpi);
580 const int is_boosted_arf2_bwd_type =
581 boosted ||
582 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
583 const int is_lf_frame =
584 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == LF_UPDATE;
585
586 if (is_480p_or_larger) {
587 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
588 if (is_720p_or_larger)
589 sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED;
590 else
591 sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED;
592 } else {
593 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
594 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
595 if (use_hbd) sf->tx_sf.prune_tx_size_level = 1;
596 }
597
598 if (is_4k_or_larger) {
599 sf->part_sf.default_min_partition_size = BLOCK_8X8;
600 }
601
602 // TODO(huisu@google.com): train models for 720P and above.
603 if (!is_720p_or_larger) {
604 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
605 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
606 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
607 sf->part_sf.ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64
608 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
609 sf->part_sf.ml_early_term_after_part_split_level = 1;
610 }
611
612 if (is_720p_or_larger) {
613 // TODO(chiyotsai@google.com): make this speed feature adaptive based on
614 // current block's vertical texture instead of hardcoded with resolution
615 sf->mv_sf.use_downsampled_sad = 1;
616 }
617
618 if (!is_720p_or_larger) {
619 const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg;
620 const int rate_tolerance =
621 AOMMIN(rc_cfg->under_shoot_pct, rc_cfg->over_shoot_pct);
622 sf->hl_sf.recode_tolerance = 25 + (rate_tolerance >> 2);
623 }
624
625 if (speed >= 1) {
626 if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 1;
627
628 if (is_720p_or_larger) {
629 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
630 } else if (is_480p_or_larger) {
631 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
632 } else {
633 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
634 }
635
636 if (!is_720p_or_larger) {
637 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
638 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
639 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
640 sf->part_sf.ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64
641 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
642 }
643 sf->part_sf.ml_early_term_after_part_split_level = 2;
644
645 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
646 }
647
648 if (speed >= 2) {
649 if (is_720p_or_larger) {
650 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
651 } else if (is_480p_or_larger) {
652 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
653 } else {
654 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
655 }
656
657 if (is_720p_or_larger) {
658 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
659 sf->part_sf.partition_search_breakout_rate_thr = 120;
660 } else {
661 sf->part_sf.partition_search_breakout_dist_thr = (1 << 22);
662 sf->part_sf.partition_search_breakout_rate_thr = 100;
663 }
664
665 if (is_720p_or_larger) {
666 sf->inter_sf.prune_obmc_prob_thresh = 16;
667 } else {
668 sf->inter_sf.prune_obmc_prob_thresh = 8;
669 }
670
671 if (is_480p_or_larger) {
672 sf->inter_sf.disable_interintra_wedge_var_thresh = 100;
673 } else {
674 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
675 }
676
677 if (is_480p_or_lesser) sf->inter_sf.skip_ext_comp_nearmv_mode = 1;
678
679 if (is_720p_or_larger) {
680 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 1 : 0;
681 } else {
682 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 2 : 0;
683 }
684
685 if (is_480p_or_larger) {
686 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1;
687 if (use_hbd) sf->tx_sf.prune_tx_size_level = 2;
688 } else {
689 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
690 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = boosted ? 0 : 1;
691 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = boosted ? 0 : 1;
692 }
693
694 if (!is_720p_or_larger) {
695 sf->mv_sf.disable_second_mv = 1;
696 sf->mv_sf.auto_mv_step_size = 2;
697 } else {
698 sf->mv_sf.disable_second_mv = boosted ? 0 : 2;
699 sf->mv_sf.auto_mv_step_size = 1;
700 }
701
702 if (!is_720p_or_larger) {
703 sf->hl_sf.recode_tolerance = 50;
704 sf->inter_sf.disable_interinter_wedge_newmv_search =
705 is_boosted_arf2_bwd_type ? 0 : 1;
706 sf->inter_sf.enable_fast_wedge_mask_search = 1;
707 }
708 }
709
710 if (speed >= 3) {
711 sf->inter_sf.enable_fast_wedge_mask_search = 1;
712 sf->inter_sf.skip_newmv_in_drl = 2;
713 sf->inter_sf.skip_ext_comp_nearmv_mode = 1;
714 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 3 : 0;
715 sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
716 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
717 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch =
718 frame_is_intra_only(&cpi->common) ? 0 : 1;
719
720 sf->part_sf.ml_early_term_after_part_split_level = 0;
721
722 if (is_720p_or_larger) {
723 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
724 sf->part_sf.partition_search_breakout_rate_thr = 200;
725 sf->part_sf.skip_non_sq_part_based_on_none = is_lf_frame ? 2 : 0;
726 } else {
727 sf->part_sf.max_intra_bsize = BLOCK_32X32;
728 sf->part_sf.partition_search_breakout_dist_thr = (1 << 23);
729 sf->part_sf.partition_search_breakout_rate_thr = 120;
730 sf->part_sf.skip_non_sq_part_based_on_none = is_lf_frame ? 1 : 0;
731 }
732 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
733
734 if (is_480p_or_larger) {
735 sf->part_sf.early_term_after_none_split = 1;
736 } else {
737 sf->part_sf.early_term_after_none_split = 0;
738 }
739 if (is_720p_or_larger) {
740 sf->intra_sf.skip_intra_in_interframe = boosted ? 1 : 2;
741 } else {
742 sf->intra_sf.skip_intra_in_interframe = boosted ? 1 : 3;
743 }
744
745 if (is_720p_or_larger) {
746 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
747 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 1;
748 } else {
749 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
750 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 2;
751 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL2;
752 }
753
754 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
755 }
756
757 if (speed >= 4) {
758 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
759 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
760 if (is_720p_or_larger) {
761 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
762 } else {
763 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
764 }
765 sf->part_sf.early_term_after_none_split = 1;
766
767 if (is_480p_or_larger) {
768 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2;
769 }
770
771 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
772 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
773 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 2;
774 if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 3;
775
776 if (is_720p_or_larger) {
777 sf->inter_sf.prune_comp_ref_frames = 1;
778 } else if (is_480p_or_larger) {
779 sf->inter_sf.prune_comp_ref_frames = is_boosted_arf2_bwd_type ? 0 : 1;
780 }
781
782 if (is_720p_or_larger)
783 sf->hl_sf.recode_tolerance = 32;
784 else
785 sf->hl_sf.recode_tolerance = 55;
786
787 sf->intra_sf.skip_intra_in_interframe = 4;
788
789 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3;
790 }
791
792 if (speed >= 5) {
793 if (is_720p_or_larger) {
794 sf->inter_sf.prune_warped_prob_thresh = 16;
795 } else if (is_480p_or_larger) {
796 sf->inter_sf.prune_warped_prob_thresh = 8;
797 }
798 if (is_720p_or_larger) sf->hl_sf.recode_tolerance = 40;
799
800 sf->inter_sf.skip_newmv_in_drl = 4;
801 sf->inter_sf.prune_comp_ref_frames = 1;
802
803 if (!is_720p_or_larger) {
804 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW_SET;
805 }
806
807 if (!is_480p_or_larger) {
808 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh =
809 boosted ? INT_MAX : 250;
810 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
811 }
812
813 if (is_480p_or_lesser) {
814 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL1;
815 } else {
816 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL2;
817 }
818 }
819
820 if (speed >= 6) {
821 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4;
822 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL3;
823 sf->inter_sf.prune_comp_ref_frames = 2;
824 if (is_720p_or_larger) {
825 sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
826 } else if (is_480p_or_larger) {
827 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
828 }
829
830 if (is_1080p_or_larger) {
831 sf->part_sf.default_min_partition_size = BLOCK_8X8;
832 }
833
834 if (is_720p_or_larger) {
835 sf->inter_sf.disable_masked_comp = 1;
836 }
837
838 if (!is_720p_or_larger) {
839 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW;
840 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW;
841 }
842
843 if (is_720p_or_larger) {
844 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
845 sf->part_sf.partition_search_breakout_dist_thr = (1 << 28);
846 } else {
847 sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
848 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
849 }
850
851 if (is_720p_or_larger) {
852 sf->inter_sf.prune_ref_mv_idx_search = 2;
853 } else {
854 sf->inter_sf.prune_ref_mv_idx_search = 1;
855 }
856
857 if (!is_720p_or_larger) {
858 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh = 150;
859 }
860
861 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
862
863 if (!is_480p_or_larger) sf->hl_sf.num_frames_used_in_tf = 3;
864 }
865 }
866
set_good_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)867 static void set_good_speed_features_framesize_independent(
868 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
869 const AV1_COMMON *const cm = &cpi->common;
870 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
871 const int boosted = frame_is_boosted(cpi);
872 const int is_boosted_arf2_bwd_type =
873 boosted || gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
874 const int is_inter_frame =
875 gf_group->frame_type[cpi->gf_frame_index] == INTER_FRAME;
876 const int allow_screen_content_tools =
877 cm->features.allow_screen_content_tools;
878 const int use_hbd = cpi->oxcf.use_highbitdepth;
879 if (!cpi->oxcf.tile_cfg.enable_large_scale_tile) {
880 sf->hl_sf.high_precision_mv_usage = LAST_MV_DATA;
881 }
882
883 // Speed 0 for all speed features that give neutral coding performance change.
884 sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
885
886 sf->part_sf.less_rectangular_check_level = 1;
887 sf->part_sf.ml_prune_partition = 1;
888 sf->part_sf.prune_ext_partition_types_search_level = 1;
889 sf->part_sf.prune_part4_search = 2;
890 sf->part_sf.simple_motion_search_prune_rect = 1;
891 sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
892 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
893 sf->part_sf.use_best_rd_for_pruning = 1;
894 sf->part_sf.simple_motion_search_prune_agg =
895 allow_screen_content_tools ? NO_PRUNING : SIMPLE_AGG_LVL0;
896
897 // TODO(debargha): Test, tweak and turn on either 1 or 2
898 sf->inter_sf.inter_mode_rd_model_estimation = 1;
899 sf->inter_sf.model_based_post_interp_filter_breakout = 1;
900 sf->inter_sf.prune_compound_using_single_ref = 1;
901 sf->inter_sf.prune_mode_search_simple_translation = 1;
902 sf->inter_sf.prune_ref_frame_for_rect_partitions =
903 (boosted || (allow_screen_content_tools))
904 ? 0
905 : (is_boosted_arf2_bwd_type ? 1 : 2);
906 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 2;
907 sf->inter_sf.selective_ref_frame = 1;
908 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
909
910 sf->interp_sf.use_fast_interpolation_filter_search = 1;
911
912 sf->intra_sf.intra_pruning_with_hog = 1;
913
914 sf->tx_sf.adaptive_txb_search_level = 1;
915 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
916 sf->tx_sf.model_based_prune_tx_search_level = 1;
917 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
918
919 sf->tpl_sf.search_method = NSTEP_8PT;
920
921 sf->rt_sf.use_nonrd_pick_mode = 0;
922 sf->rt_sf.use_real_time_ref_set = 0;
923
924 if (cpi->twopass_frame.fr_content_type == FC_GRAPHICS_ANIMATION ||
925 cpi->use_screen_content_tools) {
926 sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
927 } else {
928 sf->mv_sf.exhaustive_searches_thresh = (1 << 25);
929 }
930
931 sf->rd_sf.perform_coeff_opt = 1;
932 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
933
934 if (speed >= 1) {
935 sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
936 sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1;
937
938 sf->part_sf.intra_cnn_based_part_prune_level =
939 allow_screen_content_tools ? 0 : 2;
940 sf->part_sf.simple_motion_search_early_term_none = 1;
941 // TODO(Venkat): Clean-up frame type dependency for
942 // simple_motion_search_split in partition search function and set the
943 // speed feature accordingly
944 sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2;
945 sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3;
946
947 sf->mv_sf.exhaustive_searches_thresh <<= 1;
948 sf->mv_sf.obmc_full_pixel_search_level = 1;
949 sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS;
950 sf->mv_sf.disable_extensive_joint_motion_search = 1;
951
952 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 2 : 1;
953 sf->inter_sf.prune_comp_type_by_comp_avg = 1;
954 sf->inter_sf.prune_comp_type_by_model_rd = boosted ? 0 : 1;
955 sf->inter_sf.prune_ref_frame_for_rect_partitions =
956 (frame_is_intra_only(&cpi->common) || (allow_screen_content_tools))
957 ? 0
958 : (boosted ? 1 : 2);
959 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3;
960 sf->inter_sf.reuse_inter_intra_mode = 1;
961 sf->inter_sf.selective_ref_frame = 2;
962 sf->inter_sf.skip_arf_compound = 1;
963
964 sf->interp_sf.use_interp_filter = 1;
965
966 sf->intra_sf.prune_palette_search_level = 1;
967
968 sf->tx_sf.adaptive_txb_search_level = 2;
969 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
970 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
971 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
972 sf->tx_sf.model_based_prune_tx_search_level = 0;
973 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
974 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
975 sf->tx_sf.tx_type_search.skip_tx_search = 1;
976
977 sf->rd_sf.perform_coeff_opt = boosted ? 2 : 3;
978 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
979 sf->rd_sf.tx_domain_dist_thres_level = 1;
980
981 sf->lpf_sf.dual_sgr_penalty_level = 1;
982 sf->lpf_sf.enable_sgr_ep_pruning = 1;
983
984 // TODO(any, yunqing): move this feature to speed 0.
985 sf->tpl_sf.skip_alike_starting_mv = 1;
986 }
987
988 if (speed >= 2) {
989 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
990
991 sf->fp_sf.skip_motion_search_threshold = 25;
992
993 sf->gm_sf.disable_gm_search_based_on_stats = 1;
994
995 sf->part_sf.reuse_best_prediction_for_part_ab =
996 !frame_is_intra_only(&cpi->common);
997
998 sf->mv_sf.simple_motion_subpel_force_stop = QUARTER_PEL;
999 sf->mv_sf.subpel_iters_per_step = 1;
1000 sf->mv_sf.reduce_search_range = 1;
1001
1002 // TODO(chiyotsai@google.com): We can get 10% speed up if we move
1003 // adaptive_rd_thresh to speed 1. But currently it performs poorly on some
1004 // clips (e.g. 5% loss on dinner_1080p). We need to examine the sequence a
1005 // bit more closely to figure out why.
1006 sf->inter_sf.adaptive_rd_thresh = 1;
1007 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
1008 sf->inter_sf.fast_interintra_wedge_search = 1;
1009 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1;
1010 sf->inter_sf.prune_ext_comp_using_neighbors = 1;
1011 sf->inter_sf.prune_comp_using_best_single_mode_ref = 2;
1012 sf->inter_sf.prune_comp_type_by_comp_avg = 2;
1013 sf->inter_sf.selective_ref_frame = 3;
1014 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
1015 // Enable fast search only for COMPOUND_DIFFWTD type.
1016 sf->inter_sf.enable_fast_compound_mode_search = 1;
1017 sf->inter_sf.reuse_mask_search_results = 1;
1018 sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 1;
1019 sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 1;
1020 sf->inter_sf.alt_ref_search_fp = 1;
1021
1022 sf->interp_sf.adaptive_interp_filter_search = 1;
1023 sf->interp_sf.disable_dual_filter = 1;
1024
1025 sf->intra_sf.disable_smooth_intra =
1026 !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key > 1);
1027 sf->intra_sf.intra_pruning_with_hog = 2;
1028 sf->intra_sf.skip_intra_in_interframe = is_inter_frame ? 2 : 1;
1029 sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
1030
1031 sf->tpl_sf.prune_starting_mv = 1;
1032 sf->tpl_sf.search_method = DIAMOND;
1033
1034 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 3 : 4;
1035 sf->rd_sf.use_mb_rd_hash = 1;
1036
1037 sf->lpf_sf.prune_wiener_based_on_src_var = 1;
1038 sf->lpf_sf.prune_sgr_based_on_wiener = 1;
1039 sf->lpf_sf.disable_loop_restoration_chroma = boosted ? 0 : 1;
1040 sf->lpf_sf.reduce_wiener_window_size = boosted ? 0 : 1;
1041
1042 // TODO(any): Re-evaluate this feature set to 1 in speed 2.
1043 sf->tpl_sf.allow_compound_pred = 0;
1044 sf->tpl_sf.prune_ref_frames_in_tpl = 1;
1045 }
1046
1047 if (speed >= 3) {
1048 sf->hl_sf.high_precision_mv_usage = CURRENT_Q;
1049
1050 sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
1051 sf->gm_sf.prune_zero_mv_with_sse = 1;
1052
1053 sf->part_sf.less_rectangular_check_level = 2;
1054 sf->part_sf.simple_motion_search_prune_agg =
1055 allow_screen_content_tools
1056 ? SIMPLE_AGG_LVL0
1057 : (boosted ? SIMPLE_AGG_LVL1 : QIDX_BASED_AGG_LVL1);
1058 sf->part_sf.prune_ext_part_using_split_info = 1;
1059 sf->part_sf.simple_motion_search_rect_split = 1;
1060
1061 sf->mv_sf.full_pixel_search_level = 1;
1062 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
1063 sf->mv_sf.search_method = DIAMOND;
1064 sf->mv_sf.disable_second_mv = 2;
1065 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_1;
1066
1067 sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
1068 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
1069 sf->inter_sf.disable_onesided_comp = 1;
1070 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
1071 // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2
1072 // and clean-up the speed feature
1073 sf->inter_sf.perform_best_rd_based_gating_for_chroma = 1;
1074 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 1;
1075 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 2;
1076 sf->inter_sf.selective_ref_frame = 5;
1077 sf->inter_sf.skip_repeated_ref_mv = 1;
1078 sf->inter_sf.reuse_compound_type_decision = 1;
1079 sf->inter_sf.txfm_rd_gate_level =
1080 boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2);
1081 sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 2;
1082
1083 sf->interp_sf.adaptive_interp_filter_search = 2;
1084
1085 // TODO(chiyotsai@google.com): the thresholds chosen for intra hog are
1086 // inherited directly from luma hog with some minor tweaking. Eventually we
1087 // should run this with a bayesian optimizer to find the Pareto frontier.
1088 sf->intra_sf.chroma_intra_pruning_with_hog = 2;
1089 sf->intra_sf.intra_pruning_with_hog = 3;
1090 sf->intra_sf.prune_palette_search_level = 2;
1091 sf->intra_sf.top_intra_model_count_allowed = 2;
1092
1093 sf->tpl_sf.prune_starting_mv = 2;
1094 sf->tpl_sf.skip_alike_starting_mv = 2;
1095 sf->tpl_sf.prune_intra_modes = 1;
1096 sf->tpl_sf.reduce_first_step_size = 6;
1097 sf->tpl_sf.subpel_force_stop = QUARTER_PEL;
1098 sf->tpl_sf.gop_length_decision_method = 1;
1099
1100 sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3;
1101 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
1102 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
1103
1104 // TODO(any): Refactor the code related to following winner mode speed
1105 // features
1106 sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1;
1107 sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
1108 sf->winner_mode_sf.motion_mode_for_winner_cand =
1109 boosted ? 0
1110 : gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE ? 1
1111 : 2;
1112 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 4;
1113
1114 // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
1115 // loss.
1116 sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2;
1117 sf->lpf_sf.prune_wiener_based_on_src_var = 2;
1118 sf->lpf_sf.use_coarse_filter_level_search =
1119 frame_is_intra_only(&cpi->common) ? 0 : 1;
1120 sf->lpf_sf.use_downsampled_wiener_stats = 1;
1121 }
1122
1123 if (speed >= 4) {
1124 sf->gm_sf.prune_zero_mv_with_sse = 2;
1125
1126 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1127
1128 sf->part_sf.simple_motion_search_prune_agg =
1129 allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL2;
1130 sf->part_sf.simple_motion_search_reduce_search_steps = 4;
1131 sf->part_sf.prune_ext_part_using_split_info = 2;
1132 sf->part_sf.ml_predict_breakout_level = 3;
1133 sf->part_sf.prune_rectangular_split_based_on_qidx =
1134 (allow_screen_content_tools || frame_is_intra_only(&cpi->common)) ? 0
1135 : 1;
1136
1137 sf->inter_sf.alt_ref_search_fp = 2;
1138 sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 3;
1139
1140 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 2;
1141 sf->inter_sf.prune_ext_comp_using_neighbors = 2;
1142 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
1143 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
1144
1145 sf->interp_sf.cb_pred_filter_search = 1;
1146 sf->interp_sf.skip_sharp_interp_filter_search = 1;
1147 sf->interp_sf.use_interp_filter = 2;
1148
1149 sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
1150 sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
1151 sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
1152 // TODO(any): "intra_y_mode_mask" doesn't help much at speed 4.
1153 // sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
1154 // sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
1155 // sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
1156 sf->intra_sf.skip_intra_in_interframe = 4;
1157
1158 sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL;
1159 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_2;
1160
1161 sf->tpl_sf.subpel_force_stop = HALF_PEL;
1162 sf->tpl_sf.search_method = FAST_BIGDIA;
1163
1164 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
1165
1166 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 5 : 7;
1167
1168 // TODO(any): Extend multi-winner mode processing support for inter frames
1169 sf->winner_mode_sf.multi_winner_mode_type =
1170 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_DEFAULT
1171 : MULTI_WINNER_MODE_OFF;
1172 sf->winner_mode_sf.dc_blk_pred_level = boosted ? 0 : 2;
1173
1174 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
1175 }
1176
1177 if (speed >= 5) {
1178 sf->fp_sf.reduce_mv_step_param = 4;
1179
1180 sf->part_sf.simple_motion_search_prune_agg =
1181 allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL3;
1182 sf->part_sf.ext_partition_eval_thresh =
1183 allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
1184 sf->part_sf.prune_sub_8x8_partition_level =
1185 (allow_screen_content_tools || frame_is_intra_only(&cpi->common)) ? 0
1186 : 2;
1187
1188 sf->inter_sf.prune_inter_modes_if_skippable = 1;
1189 sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 4;
1190 // Enable fast search for all valid compound modes.
1191 sf->inter_sf.enable_fast_compound_mode_search = 2;
1192
1193 sf->intra_sf.chroma_intra_pruning_with_hog = 3;
1194
1195 // TODO(any): Extend multi-winner mode processing support for inter frames
1196 sf->winner_mode_sf.multi_winner_mode_type =
1197 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_FAST
1198 : MULTI_WINNER_MODE_OFF;
1199
1200 sf->lpf_sf.disable_lr_filter = 1;
1201
1202 sf->tpl_sf.prune_starting_mv = 3;
1203 sf->tpl_sf.use_y_only_rate_distortion = 1;
1204 sf->tpl_sf.subpel_force_stop = FULL_PEL;
1205 sf->tpl_sf.gop_length_decision_method = 2;
1206
1207 sf->winner_mode_sf.dc_blk_pred_level = 2;
1208
1209 sf->fp_sf.disable_recon = 1;
1210 }
1211
1212 if (speed >= 6) {
1213 sf->hl_sf.disable_extra_sc_testing = 1;
1214 sf->hl_sf.second_alt_ref_filtering = 0;
1215 sf->hl_sf.recode_tolerance = 55;
1216
1217 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 3;
1218 sf->inter_sf.selective_ref_frame = 6;
1219 sf->inter_sf.prune_ext_comp_using_neighbors = 3;
1220
1221 sf->intra_sf.chroma_intra_pruning_with_hog = 4;
1222 sf->intra_sf.intra_pruning_with_hog = 4;
1223 sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC;
1224 sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC;
1225 sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC;
1226 sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC;
1227 sf->intra_sf.early_term_chroma_palette_size_search = 1;
1228
1229 sf->part_sf.prune_rectangular_split_based_on_qidx =
1230 boosted || allow_screen_content_tools ? 0 : 2;
1231 sf->part_sf.prune_sub_8x8_partition_level =
1232 allow_screen_content_tools ? 0
1233 : frame_is_intra_only(&cpi->common) ? 1
1234 : 2;
1235 sf->part_sf.prune_part4_search = 3;
1236
1237 sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
1238 sf->mv_sf.use_bsize_dependent_search_method = 1;
1239 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1;
1240
1241 sf->tpl_sf.gop_length_decision_method = 3;
1242 sf->tpl_sf.disable_filtered_key_tpl = 1;
1243
1244 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 6 : 8;
1245
1246 sf->winner_mode_sf.dc_blk_pred_level = 3;
1247 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
1248
1249 sf->fp_sf.skip_zeromv_motion_search = 1;
1250 }
1251 }
1252
set_rt_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)1253 static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
1254 SPEED_FEATURES *const sf,
1255 int speed) {
1256 const AV1_COMMON *const cm = &cpi->common;
1257 const int boosted = frame_is_boosted(cpi);
1258 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
1259 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
1260 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
1261 const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
1262
1263 if (!is_360p_or_larger) {
1264 sf->rt_sf.prune_intra_mode_based_on_mv_range = 1;
1265 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1;
1266 if (speed >= 6)
1267 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2;
1268 if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 2;
1269 if (speed >= 7) {
1270 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
1271 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
1272 sf->rt_sf.use_rtc_tf = 2;
1273 }
1274 if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 1;
1275 if (speed >= 8) {
1276 sf->rt_sf.use_nonrd_filter_search = 1;
1277 sf->rt_sf.tx_size_level_based_on_qstep = 1;
1278 }
1279 if (speed >= 9) {
1280 sf->rt_sf.use_comp_ref_nonrd = 0;
1281 sf->rt_sf.nonrd_aggressive_skip = 1;
1282 sf->rt_sf.skip_intra_pred = 1;
1283 // Only turn on enable_ref_short_signaling for low resolution when only
1284 // LAST and GOLDEN ref frames are used.
1285 sf->rt_sf.enable_ref_short_signaling =
1286 (!sf->rt_sf.use_nonrd_altref_frame &&
1287 (!sf->rt_sf.use_comp_ref_nonrd ||
1288 (!sf->rt_sf.ref_frame_comp_nonrd[1] &&
1289 !sf->rt_sf.ref_frame_comp_nonrd[2])));
1290
1291 // TODO(kyslov) Re-enable when AV1 models are trained
1292 #if 0
1293 #if CONFIG_RT_ML_PARTITIONING
1294 if (!frame_is_intra_only(cm)) {
1295 sf->part_sf.partition_search_type = ML_BASED_PARTITION;
1296 sf->rt_sf.reuse_inter_pred_nonrd = 0;
1297 }
1298 #endif
1299 #endif
1300 sf->rt_sf.use_adaptive_subpel_search = false;
1301 }
1302 if (speed >= 10) {
1303 sf->rt_sf.skip_intra_pred = 2;
1304 sf->rt_sf.hybrid_intra_pickmode = 3;
1305 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
1306 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
1307 sf->rt_sf.use_nonrd_filter_search = 0;
1308 }
1309 } else {
1310 sf->rt_sf.prune_intra_mode_based_on_mv_range = 2;
1311 sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
1312 if (speed <= 5) {
1313 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh =
1314 boosted ? INT_MAX : 350;
1315 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2;
1316 }
1317 if (speed == 6) sf->part_sf.disable_8x8_part_based_on_qidx = 1;
1318 if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 2;
1319 if (speed == 7) {
1320 sf->rt_sf.prefer_large_partition_blocks = 1;
1321 // Enable this feature for [360p, 720p] resolution range initially.
1322 if (!cpi->rc.rtc_external_ratectrl &&
1323 AOMMIN(cm->width, cm->height) <= 720)
1324 sf->hl_sf.accurate_bit_estimate = cpi->oxcf.q_cfg.aq_mode == NO_AQ;
1325 }
1326 if (speed >= 7) {
1327 sf->rt_sf.use_rtc_tf = 1;
1328 }
1329 if (speed == 8 && !cpi->ppi->use_svc) {
1330 sf->rt_sf.short_circuit_low_temp_var = 0;
1331 sf->rt_sf.use_nonrd_altref_frame = 1;
1332 }
1333 if (speed >= 8) sf->rt_sf.tx_size_level_based_on_qstep = 2;
1334 if (speed >= 9) {
1335 sf->rt_sf.gf_length_lvl = 1;
1336 sf->rt_sf.skip_cdef_sb = 1;
1337 sf->rt_sf.sad_based_adp_altref_lag = 2;
1338 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
1339 sf->rt_sf.use_adaptive_subpel_search = true;
1340 sf->interp_sf.cb_pred_filter_search = 1;
1341 }
1342 if (speed >= 10) {
1343 sf->rt_sf.hybrid_intra_pickmode = 2;
1344 sf->rt_sf.sad_based_adp_altref_lag = 4;
1345 sf->rt_sf.tx_size_level_based_on_qstep = 0;
1346 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1347 sf->rt_sf.use_adaptive_subpel_search = false;
1348 sf->interp_sf.cb_pred_filter_search = 2;
1349 }
1350 }
1351 if (!is_480p_or_larger) {
1352 if (speed == 7) {
1353 sf->rt_sf.nonrd_check_partition_merge_mode = 2;
1354 }
1355 if (speed >= 8) {
1356 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
1357 }
1358 if (speed >= 9) {
1359 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1360 }
1361 }
1362 if (!is_720p_or_larger) {
1363 if (speed >= 9) {
1364 sf->rt_sf.force_large_partition_blocks_intra = 1;
1365 }
1366 } else {
1367 if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 3;
1368 if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 0;
1369 if (speed >= 7) {
1370 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 2;
1371 sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
1372 }
1373 if (speed >= 9) {
1374 sf->rt_sf.sad_based_adp_altref_lag = 1;
1375 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 0;
1376 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
1377 }
1378 if (speed >= 10) {
1379 sf->rt_sf.sad_based_adp_altref_lag = 3;
1380 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1381 }
1382 }
1383 // TODO(Any): Check/Tune settings of other sfs for 1080p.
1384 if (is_1080p_or_larger) {
1385 if (speed >= 7) {
1386 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
1387 sf->rt_sf.use_adaptive_subpel_search = 0;
1388 }
1389 if (speed >= 9) sf->interp_sf.cb_pred_filter_search = 0;
1390 } else {
1391 if (speed >= 9) sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
1392 if (speed >= 10) sf->rt_sf.nonrd_aggressive_skip = 1;
1393 }
1394
1395 // Setting for SVC, or when the ref_frame_config control is
1396 // used to set the reference structure.
1397 if (cpi->ppi->use_svc || cpi->ppi->rtc_ref.set_ref_frame_config) {
1398 const RTC_REF *const rtc_ref = &cpi->ppi->rtc_ref;
1399 // For SVC: for greater than 2 temporal layers, use better mv search on
1400 // base temporal layers, and only on base spatial layer if highest
1401 // resolution is above 640x360.
1402 if (cpi->svc.number_temporal_layers > 2 &&
1403 cpi->svc.temporal_layer_id == 0 &&
1404 (cpi->svc.spatial_layer_id == 0 ||
1405 cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
1406 640 * 360)) {
1407 sf->mv_sf.search_method = NSTEP;
1408 sf->mv_sf.subpel_search_method = SUBPEL_TREE;
1409 sf->rt_sf.fullpel_search_step_param = 6;
1410 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
1411 }
1412 if (speed >= 8) {
1413 sf->rt_sf.disable_cdf_update_non_reference_frame = true;
1414 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1415 if (rtc_ref->non_reference_frame) {
1416 sf->rt_sf.nonrd_aggressive_skip = 1;
1417 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1418 }
1419 }
1420 if (speed <= 9 && cpi->svc.number_temporal_layers > 2 &&
1421 cpi->svc.temporal_layer_id == 0)
1422 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = false;
1423 else
1424 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
1425 // Compound mode enabling.
1426 if (rtc_ref->ref_frame_comp[0] || rtc_ref->ref_frame_comp[1] ||
1427 rtc_ref->ref_frame_comp[2]) {
1428 sf->rt_sf.use_comp_ref_nonrd = 1;
1429 sf->rt_sf.ref_frame_comp_nonrd[0] =
1430 rtc_ref->ref_frame_comp[0] && rtc_ref->reference[GOLDEN_FRAME - 1];
1431 sf->rt_sf.ref_frame_comp_nonrd[1] =
1432 rtc_ref->ref_frame_comp[1] && rtc_ref->reference[LAST2_FRAME - 1];
1433 sf->rt_sf.ref_frame_comp_nonrd[2] =
1434 rtc_ref->ref_frame_comp[2] && rtc_ref->reference[ALTREF_FRAME - 1];
1435 } else {
1436 sf->rt_sf.use_comp_ref_nonrd = 0;
1437 }
1438
1439 if (cpi->svc.number_spatial_layers > 1 ||
1440 cpi->svc.number_temporal_layers > 1)
1441 sf->hl_sf.accurate_bit_estimate = 0;
1442 }
1443 // Screen settings.
1444 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
1445 // TODO(marpan): Check settings for speed 7 and 8.
1446 if (speed >= 7) {
1447 sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
1448 sf->mv_sf.use_bsize_dependent_search_method = 0;
1449 }
1450 if (speed >= 8) {
1451 sf->rt_sf.nonrd_check_partition_merge_mode = 3;
1452 sf->rt_sf.nonrd_prune_ref_frame_search = 1;
1453 sf->rt_sf.use_nonrd_filter_search = 0;
1454 sf->rt_sf.prune_hv_pred_modes_using_src_sad = false;
1455 }
1456 if (speed >= 9) {
1457 sf->rt_sf.prune_idtx_nonrd = 1;
1458 sf->rt_sf.part_early_exit_zeromv = 2;
1459 sf->rt_sf.skip_lf_screen = 1;
1460 sf->rt_sf.nonrd_prune_ref_frame_search = 3;
1461 sf->rt_sf.var_part_split_threshold_shift = 10;
1462 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1463 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1464 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
1465 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
1466 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
1467 sf->interp_sf.cb_pred_filter_search = 0;
1468 }
1469 if (speed >= 10) {
1470 if (cm->width * cm->height > 1920 * 1080)
1471 sf->part_sf.disable_8x8_part_based_on_qidx = 1;
1472 sf->rt_sf.set_zeromv_skip_based_on_source_sad = 2;
1473 sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80;
1474 sf->rt_sf.part_early_exit_zeromv = 1;
1475 sf->rt_sf.nonrd_aggressive_skip = 1;
1476 }
1477 sf->rt_sf.use_nonrd_altref_frame = 0;
1478 sf->rt_sf.skip_cdef_sb = 1;
1479 sf->rt_sf.use_rtc_tf = 0;
1480 sf->rt_sf.use_comp_ref_nonrd = 0;
1481 sf->rt_sf.source_metrics_sb_nonrd = 1;
1482 if (cpi->rc.high_source_sad == 1) {
1483 sf->rt_sf.prefer_large_partition_blocks = 0;
1484 sf->part_sf.max_intra_bsize = BLOCK_128X128;
1485 for (int i = 0; i < BLOCK_SIZES; ++i) {
1486 if (i > BLOCK_32X32)
1487 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1488 else
1489 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
1490 }
1491 }
1492 if (cpi->rc.max_block_source_sad > 20000 &&
1493 cpi->rc.frame_source_sad > 100 &&
1494 cpi->rc.percent_blocks_with_motion > 1 && speed >= 6) {
1495 sf->mv_sf.search_method = NSTEP;
1496 sf->rt_sf.fullpel_search_step_param = 2;
1497 }
1498 sf->rt_sf.partition_direct_merging = 0;
1499 sf->hl_sf.accurate_bit_estimate = 0;
1500 }
1501 }
1502
1503 // TODO(kyslov): now this is very similar to
1504 // set_good_speed_features_framesize_independent
1505 // except it sets non-rd flag on speed 8. This function will likely
1506 // be modified in the future with RT-specific speed features.
set_rt_speed_features_framesize_independent(AV1_COMP * cpi,SPEED_FEATURES * sf,int speed)1507 static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
1508 SPEED_FEATURES *sf,
1509 int speed) {
1510 AV1_COMMON *const cm = &cpi->common;
1511 const int boosted = frame_is_boosted(cpi);
1512
1513 // Currently, rt speed 0, 1, 2, 3, 4, 5 are the same.
1514 // Following set of speed features are not impacting encoder's decisions as
1515 // the relevant tools are disabled by default.
1516 sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
1517 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
1518 sf->inter_sf.reuse_inter_intra_mode = 1;
1519 sf->inter_sf.prune_compound_using_single_ref = 0;
1520 sf->inter_sf.prune_comp_search_by_single_result = 2;
1521 sf->inter_sf.prune_comp_type_by_comp_avg = 2;
1522 sf->inter_sf.fast_wedge_sign_estimate = 1;
1523 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
1524 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
1525 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
1526 sf->interp_sf.cb_pred_filter_search = 0;
1527 sf->part_sf.ml_prune_partition = 1;
1528 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
1529 sf->part_sf.prune_ext_partition_types_search_level = 2;
1530 sf->part_sf.less_rectangular_check_level = 2;
1531 sf->mv_sf.obmc_full_pixel_search_level = 1;
1532 sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
1533 sf->tx_sf.model_based_prune_tx_search_level = 0;
1534 sf->lpf_sf.dual_sgr_penalty_level = 1;
1535 sf->lpf_sf.disable_lr_filter = 1;
1536 sf->rt_sf.skip_interp_filter_search = 1;
1537 sf->intra_sf.prune_palette_search_level = 2;
1538 sf->intra_sf.prune_luma_palette_size_search_level = 2;
1539 sf->intra_sf.early_term_chroma_palette_size_search = 1;
1540
1541 // End of set
1542
1543 // TODO(any, yunqing): tune these features for real-time use cases.
1544 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_SOLO;
1545 sf->hl_sf.frame_parameter_update = 0;
1546
1547 sf->inter_sf.model_based_post_interp_filter_breakout = 1;
1548 // TODO(any): As per the experiments, this speed feature is doing redundant
1549 // computation since the model rd based pruning logic is similar to model rd
1550 // based gating when inter_mode_rd_model_estimation = 2. Enable this SF if
1551 // either of the condition becomes true.
1552 // (1) inter_mode_rd_model_estimation != 2
1553 // (2) skip_interp_filter_search == 0
1554 // (3) Motion mode or compound mode is enabled */
1555 sf->inter_sf.prune_mode_search_simple_translation = 0;
1556 sf->inter_sf.prune_ref_frame_for_rect_partitions = !boosted;
1557 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
1558 sf->inter_sf.selective_ref_frame = 4;
1559 sf->inter_sf.alt_ref_search_fp = 2;
1560 sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 4;
1561 sf->inter_sf.limit_txfm_eval_per_mode = 3;
1562
1563 sf->inter_sf.adaptive_rd_thresh = 4;
1564 sf->inter_sf.inter_mode_rd_model_estimation = 2;
1565 sf->inter_sf.prune_inter_modes_if_skippable = 1;
1566 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL3;
1567 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3;
1568 sf->inter_sf.skip_newmv_in_drl = 4;
1569
1570 sf->interp_sf.use_fast_interpolation_filter_search = 1;
1571 sf->interp_sf.use_interp_filter = 1;
1572 sf->interp_sf.adaptive_interp_filter_search = 1;
1573 sf->interp_sf.disable_dual_filter = 1;
1574
1575 sf->part_sf.default_max_partition_size = BLOCK_128X128;
1576 sf->part_sf.default_min_partition_size = BLOCK_8X8;
1577 sf->part_sf.use_best_rd_for_pruning = 1;
1578 sf->part_sf.early_term_after_none_split = 1;
1579 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
1580 sf->part_sf.max_intra_bsize = BLOCK_16X16;
1581 sf->part_sf.partition_search_breakout_rate_thr = 500;
1582 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
1583 sf->part_sf.adjust_var_based_rd_partitioning = 2;
1584
1585 sf->mv_sf.full_pixel_search_level = 1;
1586 sf->mv_sf.exhaustive_searches_thresh = INT_MAX;
1587 sf->mv_sf.auto_mv_step_size = 1;
1588 sf->mv_sf.subpel_iters_per_step = 1;
1589 sf->mv_sf.use_accurate_subpel_search = USE_2_TAPS;
1590 sf->mv_sf.search_method = FAST_DIAMOND;
1591 sf->mv_sf.subpel_force_stop = EIGHTH_PEL;
1592 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
1593
1594 for (int i = 0; i < TX_SIZES; ++i) {
1595 sf->intra_sf.intra_y_mode_mask[i] = INTRA_DC;
1596 sf->intra_sf.intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
1597 }
1598 sf->intra_sf.skip_intra_in_interframe = 5;
1599 sf->intra_sf.disable_smooth_intra = 1;
1600 sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
1601
1602 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
1603 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
1604 sf->tx_sf.adaptive_txb_search_level = 2;
1605 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
1606 sf->tx_sf.tx_size_search_lgr_block = 1;
1607 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
1608 sf->tx_sf.tx_type_search.skip_tx_search = 1;
1609 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
1610 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
1611 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
1612 sf->tx_sf.refine_fast_tx_search_results = 0;
1613 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
1614 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
1615 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4;
1616
1617 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
1618 sf->rd_sf.simple_model_rd_from_var = 1;
1619 sf->rd_sf.tx_domain_dist_level = 2;
1620 sf->rd_sf.tx_domain_dist_thres_level = 2;
1621
1622 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
1623 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
1624
1625 sf->winner_mode_sf.dc_blk_pred_level = frame_is_intra_only(cm) ? 0 : 3;
1626 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
1627 sf->winner_mode_sf.tx_size_search_level = 1;
1628 sf->winner_mode_sf.winner_mode_ifs = 1;
1629
1630 sf->rt_sf.check_intra_pred_nonrd = 1;
1631 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
1632 sf->rt_sf.hybrid_intra_pickmode = 1;
1633 sf->rt_sf.use_comp_ref_nonrd = 0;
1634 sf->rt_sf.ref_frame_comp_nonrd[0] = 0;
1635 sf->rt_sf.ref_frame_comp_nonrd[1] = 0;
1636 sf->rt_sf.ref_frame_comp_nonrd[2] = 0;
1637 sf->rt_sf.use_nonrd_filter_search = 1;
1638 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
1639 sf->rt_sf.num_inter_modes_for_tx_search = 5;
1640 sf->rt_sf.prune_inter_modes_using_temp_var = 1;
1641 sf->rt_sf.use_real_time_ref_set = 1;
1642 sf->rt_sf.use_simple_rd_model = 1;
1643 sf->rt_sf.prune_inter_modes_with_golden_ref = boosted ? 0 : 1;
1644 // TODO(any): This sf could be removed.
1645 sf->rt_sf.short_circuit_low_temp_var = 1;
1646 sf->rt_sf.check_scene_detection = 1;
1647 if (cpi->rc.rtc_external_ratectrl) sf->rt_sf.check_scene_detection = 0;
1648 if (cm->current_frame.frame_type != KEY_FRAME &&
1649 cpi->oxcf.rc_cfg.mode == AOM_CBR)
1650 sf->rt_sf.overshoot_detection_cbr = FAST_DETECTION_MAXQ;
1651 // Enable noise estimation only for high resolutions for now.
1652 //
1653 // Since use_temporal_noise_estimate has no effect for all-intra frame
1654 // encoding, it is disabled for this case.
1655 if (cpi->oxcf.kf_cfg.key_freq_max != 0 && cm->width * cm->height > 640 * 480)
1656 sf->rt_sf.use_temporal_noise_estimate = 1;
1657 sf->rt_sf.skip_tx_no_split_var_based_partition = 1;
1658 sf->rt_sf.skip_newmv_mode_based_on_sse = 1;
1659 sf->rt_sf.mode_search_skip_flags =
1660 (cm->current_frame.frame_type == KEY_FRAME)
1661 ? 0
1662 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
1663 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
1664 FLAG_EARLY_TERMINATE;
1665 sf->rt_sf.var_part_split_threshold_shift = 5;
1666 if (!frame_is_intra_only(&cpi->common)) sf->rt_sf.var_part_based_on_qidx = 1;
1667
1668 if (speed >= 6) {
1669 sf->mv_sf.use_fullpel_costlist = 1;
1670
1671 sf->rd_sf.tx_domain_dist_thres_level = 3;
1672
1673 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh = 0;
1674 sf->inter_sf.limit_inter_mode_cands = 4;
1675 sf->inter_sf.prune_warped_prob_thresh = 8;
1676 sf->inter_sf.extra_prune_warped = 1;
1677
1678 sf->rt_sf.gf_refresh_based_on_qp = 1;
1679 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1;
1680 sf->rt_sf.var_part_split_threshold_shift = 7;
1681 if (!frame_is_intra_only(&cpi->common))
1682 sf->rt_sf.var_part_based_on_qidx = 2;
1683
1684 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 3;
1685 }
1686
1687 if (speed >= 7) {
1688 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_1;
1689 sf->rt_sf.use_comp_ref_nonrd = 1;
1690 sf->rt_sf.ref_frame_comp_nonrd[2] = 1; // LAST_ALTREF
1691 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 2;
1692 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
1693 sf->part_sf.max_intra_bsize = BLOCK_32X32;
1694
1695 sf->mv_sf.search_method = FAST_DIAMOND;
1696 sf->mv_sf.subpel_force_stop = QUARTER_PEL;
1697
1698 sf->inter_sf.inter_mode_rd_model_estimation = 2;
1699 // This sf is not applicable in non-rd path.
1700 sf->inter_sf.skip_newmv_in_drl = 0;
1701
1702 // Disable intra_y_mode_mask pruning since the performance at speed 7 isn't
1703 // good. May need more study.
1704 for (int i = 0; i < TX_SIZES; ++i) {
1705 sf->intra_sf.intra_y_mode_mask[i] = INTRA_ALL;
1706 }
1707
1708 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
1709 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL5;
1710
1711 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
1712 sf->rt_sf.nonrd_prune_ref_frame_search = 1;
1713 // This is for rd path only.
1714 sf->rt_sf.prune_inter_modes_using_temp_var = 0;
1715 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
1716 sf->rt_sf.prune_intra_mode_based_on_mv_range = 0;
1717 #if !CONFIG_REALTIME_ONLY
1718 sf->rt_sf.reuse_inter_pred_nonrd =
1719 (cpi->oxcf.motion_mode_cfg.enable_warped_motion == 0);
1720 #else
1721 sf->rt_sf.reuse_inter_pred_nonrd = 1;
1722 #endif
1723 #if CONFIG_AV1_TEMPORAL_DENOISING
1724 sf->rt_sf.reuse_inter_pred_nonrd = (cpi->oxcf.noise_sensitivity == 0);
1725 #endif
1726 sf->rt_sf.short_circuit_low_temp_var = 0;
1727 sf->rt_sf.skip_interp_filter_search = 0;
1728 // For spatial layers, only LAST and GOLDEN are currently used in the SVC
1729 // for nonrd. The flag use_nonrd_altref_frame can disable GOLDEN in the
1730 // get_ref_frame_flags() for some patterns, so disable it here for
1731 // spatial layers.
1732 sf->rt_sf.use_nonrd_altref_frame =
1733 (cpi->svc.number_spatial_layers > 1) ? 0 : 1;
1734 sf->rt_sf.use_nonrd_pick_mode = 1;
1735 sf->rt_sf.nonrd_check_partition_merge_mode = 3;
1736 sf->rt_sf.skip_intra_pred = 1;
1737 sf->rt_sf.source_metrics_sb_nonrd = 1;
1738 // Set mask for intra modes.
1739 for (int i = 0; i < BLOCK_SIZES; ++i)
1740 if (i >= BLOCK_32X32)
1741 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1742 else
1743 // Use DC, H, V intra mode for block sizes < 32X32.
1744 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
1745
1746 sf->winner_mode_sf.dc_blk_pred_level = 0;
1747 sf->rt_sf.var_part_based_on_qidx = 3;
1748 sf->rt_sf.prune_compoundmode_with_singlecompound_var = true;
1749 sf->rt_sf.prune_compoundmode_with_singlemode_var = true;
1750 sf->rt_sf.skip_compound_based_on_var = true;
1751 sf->rt_sf.use_adaptive_subpel_search = true;
1752 }
1753
1754 if (speed >= 8) {
1755 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_2;
1756 sf->intra_sf.intra_pruning_with_hog = 1;
1757 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
1758 sf->rt_sf.short_circuit_low_temp_var = 1;
1759 sf->rt_sf.use_nonrd_altref_frame = 0;
1760 sf->rt_sf.nonrd_prune_ref_frame_search = 2;
1761 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
1762 sf->rt_sf.var_part_split_threshold_shift = 8;
1763 sf->rt_sf.var_part_based_on_qidx = 4;
1764 sf->rt_sf.partition_direct_merging = 1;
1765 sf->rt_sf.prune_compoundmode_with_singlemode_var = false;
1766 sf->mv_sf.use_bsize_dependent_search_method = 2;
1767 sf->rt_sf.prune_hv_pred_modes_using_src_sad = true;
1768 }
1769 if (speed >= 9) {
1770 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
1771 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1772 sf->rt_sf.prefer_large_partition_blocks = 3;
1773 sf->rt_sf.skip_intra_pred = 2;
1774 sf->rt_sf.var_part_split_threshold_shift = 9;
1775 for (int i = 0; i < BLOCK_SIZES; ++i)
1776 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1777 sf->rt_sf.var_part_based_on_qidx = 0;
1778 sf->rt_sf.frame_level_mode_cost_update = true;
1779 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
1780 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
1781 sf->rt_sf.use_adaptive_subpel_search = true;
1782 sf->mv_sf.use_bsize_dependent_search_method = 0;
1783 }
1784 if (speed >= 10) {
1785 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_4;
1786 sf->rt_sf.nonrd_prune_ref_frame_search = 3;
1787 sf->rt_sf.var_part_split_threshold_shift = 10;
1788 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1789 }
1790 }
1791
init_hl_sf(HIGH_LEVEL_SPEED_FEATURES * hl_sf)1792 static AOM_INLINE void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {
1793 // best quality defaults
1794 hl_sf->frame_parameter_update = 1;
1795 hl_sf->recode_loop = ALLOW_RECODE;
1796 // Recode loop tolerance %.
1797 hl_sf->recode_tolerance = 25;
1798 hl_sf->high_precision_mv_usage = CURRENT_Q;
1799 hl_sf->superres_auto_search_type = SUPERRES_AUTO_ALL;
1800 hl_sf->disable_extra_sc_testing = 0;
1801 hl_sf->second_alt_ref_filtering = 1;
1802 hl_sf->num_frames_used_in_tf = INT_MAX;
1803 hl_sf->accurate_bit_estimate = 0;
1804 }
1805
init_fp_sf(FIRST_PASS_SPEED_FEATURES * fp_sf)1806 static AOM_INLINE void init_fp_sf(FIRST_PASS_SPEED_FEATURES *fp_sf) {
1807 fp_sf->reduce_mv_step_param = 3;
1808 fp_sf->skip_motion_search_threshold = 0;
1809 fp_sf->disable_recon = 0;
1810 fp_sf->skip_zeromv_motion_search = 0;
1811 }
1812
init_tpl_sf(TPL_SPEED_FEATURES * tpl_sf)1813 static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
1814 tpl_sf->gop_length_decision_method = 0;
1815 tpl_sf->prune_intra_modes = 0;
1816 tpl_sf->prune_starting_mv = 0;
1817 tpl_sf->reduce_first_step_size = 0;
1818 tpl_sf->skip_alike_starting_mv = 0;
1819 tpl_sf->subpel_force_stop = EIGHTH_PEL;
1820 tpl_sf->search_method = NSTEP;
1821 tpl_sf->disable_filtered_key_tpl = 0;
1822 tpl_sf->prune_ref_frames_in_tpl = 0;
1823 tpl_sf->allow_compound_pred = 1;
1824 tpl_sf->use_y_only_rate_distortion = 0;
1825 }
1826
init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES * gm_sf)1827 static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
1828 gm_sf->gm_search_type = GM_FULL_SEARCH;
1829 gm_sf->prune_ref_frame_for_gm_search = 0;
1830 gm_sf->prune_zero_mv_with_sse = 0;
1831 gm_sf->disable_gm_search_based_on_stats = 0;
1832 }
1833
init_part_sf(PARTITION_SPEED_FEATURES * part_sf)1834 static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
1835 part_sf->partition_search_type = SEARCH_PARTITION;
1836 part_sf->less_rectangular_check_level = 0;
1837 part_sf->use_square_partition_only_threshold = BLOCK_128X128;
1838 part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
1839 part_sf->default_max_partition_size = BLOCK_LARGEST;
1840 part_sf->default_min_partition_size = BLOCK_4X4;
1841 part_sf->adjust_var_based_rd_partitioning = 0;
1842 part_sf->max_intra_bsize = BLOCK_LARGEST;
1843 // This setting only takes effect when partition_search_type is set
1844 // to FIXED_PARTITION.
1845 part_sf->fixed_partition_size = BLOCK_16X16;
1846 // Recode loop tolerance %.
1847 part_sf->partition_search_breakout_dist_thr = 0;
1848 part_sf->partition_search_breakout_rate_thr = 0;
1849 part_sf->prune_ext_partition_types_search_level = 0;
1850 part_sf->prune_part4_search = 0;
1851 part_sf->ml_prune_partition = 0;
1852 part_sf->ml_early_term_after_part_split_level = 0;
1853 for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
1854 part_sf->ml_partition_search_breakout_thresh[i] =
1855 -1; // -1 means not enabled.
1856 }
1857 part_sf->simple_motion_search_prune_agg = SIMPLE_AGG_LVL0;
1858 part_sf->simple_motion_search_split = 0;
1859 part_sf->simple_motion_search_prune_rect = 0;
1860 part_sf->simple_motion_search_early_term_none = 0;
1861 part_sf->simple_motion_search_reduce_search_steps = 0;
1862 part_sf->intra_cnn_based_part_prune_level = 0;
1863 part_sf->ext_partition_eval_thresh = BLOCK_8X8;
1864 part_sf->rect_partition_eval_thresh = BLOCK_128X128;
1865 part_sf->prune_ext_part_using_split_info = 0;
1866 part_sf->prune_rectangular_split_based_on_qidx = 0;
1867 part_sf->early_term_after_none_split = 0;
1868 part_sf->ml_predict_breakout_level = 0;
1869 part_sf->prune_sub_8x8_partition_level = 0;
1870 part_sf->simple_motion_search_rect_split = 0;
1871 part_sf->reuse_prev_rd_results_for_part_ab = 0;
1872 part_sf->reuse_best_prediction_for_part_ab = 0;
1873 part_sf->use_best_rd_for_pruning = 0;
1874 part_sf->skip_non_sq_part_based_on_none = 0;
1875 part_sf->disable_8x8_part_based_on_qidx = 0;
1876 }
1877
init_mv_sf(MV_SPEED_FEATURES * mv_sf)1878 static AOM_INLINE void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
1879 mv_sf->full_pixel_search_level = 0;
1880 mv_sf->auto_mv_step_size = 0;
1881 mv_sf->exhaustive_searches_thresh = 0;
1882 mv_sf->obmc_full_pixel_search_level = 0;
1883 mv_sf->prune_mesh_search = PRUNE_MESH_SEARCH_DISABLED;
1884 mv_sf->reduce_search_range = 0;
1885 mv_sf->search_method = NSTEP;
1886 mv_sf->simple_motion_subpel_force_stop = EIGHTH_PEL;
1887 mv_sf->subpel_force_stop = EIGHTH_PEL;
1888 mv_sf->subpel_iters_per_step = 2;
1889 mv_sf->subpel_search_method = SUBPEL_TREE;
1890 mv_sf->use_accurate_subpel_search = USE_8_TAPS;
1891 mv_sf->use_bsize_dependent_search_method = 0;
1892 mv_sf->use_fullpel_costlist = 0;
1893 mv_sf->use_downsampled_sad = 0;
1894 mv_sf->disable_extensive_joint_motion_search = 0;
1895 mv_sf->disable_second_mv = 0;
1896 mv_sf->skip_fullpel_search_using_startmv = 0;
1897 }
1898
init_inter_sf(INTER_MODE_SPEED_FEATURES * inter_sf)1899 static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
1900 inter_sf->adaptive_rd_thresh = 0;
1901 inter_sf->model_based_post_interp_filter_breakout = 0;
1902 inter_sf->reduce_inter_modes = 0;
1903 inter_sf->alt_ref_search_fp = 0;
1904 inter_sf->prune_comp_ref_frames = 0;
1905 inter_sf->selective_ref_frame = 0;
1906 inter_sf->prune_ref_frame_for_rect_partitions = 0;
1907 inter_sf->fast_wedge_sign_estimate = 0;
1908 inter_sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED;
1909 inter_sf->reuse_inter_intra_mode = 0;
1910 inter_sf->mv_cost_upd_level = INTERNAL_COST_UPD_SB;
1911 inter_sf->coeff_cost_upd_level = INTERNAL_COST_UPD_SB;
1912 inter_sf->mode_cost_upd_level = INTERNAL_COST_UPD_SB;
1913 inter_sf->prune_inter_modes_based_on_tpl = 0;
1914 inter_sf->prune_nearmv_using_neighbors = PRUNE_NEARMV_OFF;
1915 inter_sf->prune_comp_search_by_single_result = 0;
1916 inter_sf->skip_repeated_ref_mv = 0;
1917 inter_sf->skip_newmv_in_drl = 0;
1918 inter_sf->inter_mode_rd_model_estimation = 0;
1919 inter_sf->prune_compound_using_single_ref = 0;
1920 inter_sf->prune_ext_comp_using_neighbors = 0;
1921 inter_sf->skip_ext_comp_nearmv_mode = 0;
1922 inter_sf->prune_comp_using_best_single_mode_ref = 0;
1923 inter_sf->prune_nearest_near_mv_using_refmv_weight = 0;
1924 inter_sf->disable_onesided_comp = 0;
1925 inter_sf->prune_mode_search_simple_translation = 0;
1926 inter_sf->prune_comp_type_by_comp_avg = 0;
1927 inter_sf->disable_interinter_wedge_newmv_search = 0;
1928 inter_sf->fast_interintra_wedge_search = 0;
1929 inter_sf->prune_comp_type_by_model_rd = 0;
1930 inter_sf->perform_best_rd_based_gating_for_chroma = 0;
1931 inter_sf->prune_obmc_prob_thresh = 0;
1932 inter_sf->disable_interinter_wedge_var_thresh = 0;
1933 inter_sf->disable_interintra_wedge_var_thresh = 0;
1934 inter_sf->prune_ref_mv_idx_search = 0;
1935 inter_sf->prune_warped_prob_thresh = 0;
1936 inter_sf->reuse_compound_type_decision = 0;
1937 inter_sf->txfm_rd_gate_level = 0;
1938 inter_sf->prune_inter_modes_if_skippable = 0;
1939 inter_sf->disable_masked_comp = 0;
1940 inter_sf->enable_fast_compound_mode_search = 0;
1941 inter_sf->reuse_mask_search_results = 0;
1942 inter_sf->enable_fast_wedge_mask_search = 0;
1943 inter_sf->inter_mode_txfm_breakout = 0;
1944 inter_sf->limit_inter_mode_cands = 0;
1945 inter_sf->limit_txfm_eval_per_mode = 0;
1946 inter_sf->skip_arf_compound = 0;
1947 }
1948
init_interp_sf(INTERP_FILTER_SPEED_FEATURES * interp_sf)1949 static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
1950 interp_sf->adaptive_interp_filter_search = 0;
1951 interp_sf->cb_pred_filter_search = 0;
1952 interp_sf->disable_dual_filter = 0;
1953 interp_sf->skip_sharp_interp_filter_search = 0;
1954 interp_sf->use_fast_interpolation_filter_search = 0;
1955 interp_sf->use_interp_filter = 0;
1956 }
1957
init_intra_sf(INTRA_MODE_SPEED_FEATURES * intra_sf)1958 static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) {
1959 intra_sf->dv_cost_upd_level = INTERNAL_COST_UPD_SB;
1960 intra_sf->skip_intra_in_interframe = 1;
1961 intra_sf->intra_pruning_with_hog = 0;
1962 intra_sf->chroma_intra_pruning_with_hog = 0;
1963 intra_sf->prune_palette_search_level = 0;
1964 intra_sf->prune_luma_palette_size_search_level = 0;
1965
1966 for (int i = 0; i < TX_SIZES; i++) {
1967 intra_sf->intra_y_mode_mask[i] = INTRA_ALL;
1968 intra_sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
1969 }
1970 intra_sf->disable_smooth_intra = 0;
1971 intra_sf->prune_smooth_intra_mode_for_chroma = 0;
1972 intra_sf->prune_filter_intra_level = 0;
1973 intra_sf->prune_chroma_modes_using_luma_winner = 0;
1974 intra_sf->cfl_search_range = 3;
1975 intra_sf->top_intra_model_count_allowed = TOP_INTRA_MODEL_COUNT;
1976 intra_sf->adapt_top_model_rd_count_using_neighbors = 0;
1977 intra_sf->early_term_chroma_palette_size_search = 0;
1978 intra_sf->skip_filter_intra_in_inter_frames = 0;
1979 intra_sf->prune_luma_odd_delta_angles_in_intra = 0;
1980 }
1981
init_tx_sf(TX_SPEED_FEATURES * tx_sf)1982 static AOM_INLINE void init_tx_sf(TX_SPEED_FEATURES *tx_sf) {
1983 tx_sf->inter_tx_size_search_init_depth_sqr = 0;
1984 tx_sf->inter_tx_size_search_init_depth_rect = 0;
1985 tx_sf->intra_tx_size_search_init_depth_rect = 0;
1986 tx_sf->intra_tx_size_search_init_depth_sqr = 0;
1987 tx_sf->tx_size_search_lgr_block = 0;
1988 tx_sf->model_based_prune_tx_search_level = 0;
1989 tx_sf->tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_1;
1990 tx_sf->tx_type_search.ml_tx_split_thresh = 8500;
1991 tx_sf->tx_type_search.use_skip_flag_prediction = 1;
1992 tx_sf->tx_type_search.use_reduced_intra_txset = 0;
1993 tx_sf->tx_type_search.fast_intra_tx_type_search = 0;
1994 tx_sf->tx_type_search.fast_inter_tx_type_prob_thresh = INT_MAX;
1995 tx_sf->tx_type_search.skip_tx_search = 0;
1996 tx_sf->tx_type_search.prune_tx_type_using_stats = 0;
1997 tx_sf->tx_type_search.prune_tx_type_est_rd = 0;
1998 tx_sf->tx_type_search.winner_mode_tx_type_pruning = 0;
1999 tx_sf->txb_split_cap = 1;
2000 tx_sf->adaptive_txb_search_level = 0;
2001 tx_sf->refine_fast_tx_search_results = 1;
2002 tx_sf->prune_tx_size_level = 0;
2003 tx_sf->prune_intra_tx_depths_using_nn = false;
2004 }
2005
init_rd_sf(RD_CALC_SPEED_FEATURES * rd_sf,const AV1EncoderConfig * oxcf)2006 static AOM_INLINE void init_rd_sf(RD_CALC_SPEED_FEATURES *rd_sf,
2007 const AV1EncoderConfig *oxcf) {
2008 const int disable_trellis_quant = oxcf->algo_cfg.disable_trellis_quant;
2009 if (disable_trellis_quant == 3) {
2010 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
2011 ? NO_ESTIMATE_YRD_TRELLIS_OPT
2012 : NO_TRELLIS_OPT;
2013 } else if (disable_trellis_quant == 2) {
2014 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
2015 ? FINAL_PASS_TRELLIS_OPT
2016 : NO_TRELLIS_OPT;
2017 } else if (disable_trellis_quant == 0) {
2018 if (is_lossless_requested(&oxcf->rc_cfg)) {
2019 rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
2020 } else {
2021 rd_sf->optimize_coefficients = FULL_TRELLIS_OPT;
2022 }
2023 } else if (disable_trellis_quant == 1) {
2024 rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
2025 } else {
2026 assert(0 && "Invalid disable_trellis_quant value");
2027 }
2028 rd_sf->use_mb_rd_hash = 0;
2029 rd_sf->simple_model_rd_from_var = 0;
2030 rd_sf->tx_domain_dist_level = 0;
2031 rd_sf->tx_domain_dist_thres_level = 0;
2032 rd_sf->perform_coeff_opt = 0;
2033 }
2034
init_winner_mode_sf(WINNER_MODE_SPEED_FEATURES * winner_mode_sf)2035 static AOM_INLINE void init_winner_mode_sf(
2036 WINNER_MODE_SPEED_FEATURES *winner_mode_sf) {
2037 winner_mode_sf->motion_mode_for_winner_cand = 0;
2038 // Set this at the appropriate speed levels
2039 winner_mode_sf->tx_size_search_level = 0;
2040 winner_mode_sf->enable_winner_mode_for_coeff_opt = 0;
2041 winner_mode_sf->enable_winner_mode_for_tx_size_srch = 0;
2042 winner_mode_sf->enable_winner_mode_for_use_tx_domain_dist = 0;
2043 winner_mode_sf->multi_winner_mode_type = 0;
2044 winner_mode_sf->dc_blk_pred_level = 0;
2045 winner_mode_sf->winner_mode_ifs = 0;
2046 winner_mode_sf->prune_winner_mode_eval_level = 0;
2047 }
2048
init_lpf_sf(LOOP_FILTER_SPEED_FEATURES * lpf_sf)2049 static AOM_INLINE void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
2050 lpf_sf->disable_loop_restoration_chroma = 0;
2051 lpf_sf->disable_loop_restoration_luma = 0;
2052 lpf_sf->prune_wiener_based_on_src_var = 0;
2053 lpf_sf->prune_sgr_based_on_wiener = 0;
2054 lpf_sf->enable_sgr_ep_pruning = 0;
2055 lpf_sf->reduce_wiener_window_size = 0;
2056 lpf_sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
2057 lpf_sf->use_coarse_filter_level_search = 0;
2058 lpf_sf->cdef_pick_method = CDEF_FULL_SEARCH;
2059 // Set decoder side speed feature to use less dual sgr modes
2060 lpf_sf->dual_sgr_penalty_level = 0;
2061 lpf_sf->disable_lr_filter = 0;
2062 lpf_sf->use_downsampled_wiener_stats = 0;
2063 }
2064
init_rt_sf(REAL_TIME_SPEED_FEATURES * rt_sf)2065 static AOM_INLINE void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
2066 rt_sf->check_intra_pred_nonrd = 0;
2067 rt_sf->skip_intra_pred = 0;
2068 rt_sf->estimate_motion_for_var_based_partition = 0;
2069 rt_sf->nonrd_check_partition_merge_mode = 0;
2070 rt_sf->nonrd_check_partition_split = 0;
2071 rt_sf->mode_search_skip_flags = 0;
2072 rt_sf->nonrd_prune_ref_frame_search = 0;
2073 rt_sf->use_nonrd_pick_mode = 0;
2074 rt_sf->use_nonrd_altref_frame = 0;
2075 rt_sf->use_comp_ref_nonrd = 0;
2076 rt_sf->use_real_time_ref_set = 0;
2077 rt_sf->short_circuit_low_temp_var = 0;
2078 rt_sf->reuse_inter_pred_nonrd = 0;
2079 rt_sf->num_inter_modes_for_tx_search = INT_MAX;
2080 rt_sf->use_nonrd_filter_search = 0;
2081 rt_sf->use_simple_rd_model = 0;
2082 rt_sf->skip_interp_filter_search = 0;
2083 rt_sf->hybrid_intra_pickmode = 0;
2084 rt_sf->source_metrics_sb_nonrd = 0;
2085 rt_sf->overshoot_detection_cbr = NO_DETECTION;
2086 rt_sf->check_scene_detection = 0;
2087 rt_sf->prefer_large_partition_blocks = 0;
2088 rt_sf->use_temporal_noise_estimate = 0;
2089 rt_sf->fullpel_search_step_param = 0;
2090 for (int i = 0; i < BLOCK_SIZES; ++i)
2091 rt_sf->intra_y_mode_bsize_mask_nrd[i] = INTRA_ALL;
2092 rt_sf->prune_hv_pred_modes_using_src_sad = false;
2093 rt_sf->nonrd_aggressive_skip = 0;
2094 rt_sf->skip_cdef_sb = 0;
2095 rt_sf->force_large_partition_blocks_intra = 0;
2096 rt_sf->skip_tx_no_split_var_based_partition = 0;
2097 rt_sf->skip_newmv_mode_based_on_sse = 0;
2098 rt_sf->gf_length_lvl = 0;
2099 rt_sf->prune_inter_modes_with_golden_ref = 0;
2100 rt_sf->prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
2101 rt_sf->prune_inter_modes_using_temp_var = 0;
2102 rt_sf->reduce_mv_pel_precision_highmotion = 0;
2103 rt_sf->reduce_mv_pel_precision_lowcomplex = 0;
2104 rt_sf->prune_intra_mode_based_on_mv_range = 0;
2105 rt_sf->var_part_split_threshold_shift = 7;
2106 rt_sf->gf_refresh_based_on_qp = 0;
2107 rt_sf->use_rtc_tf = 0;
2108 rt_sf->prune_idtx_nonrd = 0;
2109 rt_sf->part_early_exit_zeromv = 0;
2110 rt_sf->sse_early_term_inter_search = EARLY_TERM_DISABLED;
2111 rt_sf->skip_lf_screen = 0;
2112 rt_sf->sad_based_adp_altref_lag = 0;
2113 rt_sf->partition_direct_merging = 0;
2114 rt_sf->var_part_based_on_qidx = 0;
2115 rt_sf->tx_size_level_based_on_qstep = 0;
2116 rt_sf->vbp_prune_16x16_split_using_min_max_sub_blk_var = false;
2117 rt_sf->prune_compoundmode_with_singlecompound_var = false;
2118 rt_sf->frame_level_mode_cost_update = false;
2119 rt_sf->prune_h_pred_using_best_mode_so_far = false;
2120 rt_sf->check_only_zero_zeromv_on_large_blocks = false;
2121 rt_sf->disable_cdf_update_non_reference_frame = false;
2122 rt_sf->prune_compoundmode_with_singlemode_var = false;
2123 rt_sf->skip_compound_based_on_var = false;
2124 rt_sf->set_zeromv_skip_based_on_source_sad = 1;
2125 rt_sf->use_adaptive_subpel_search = false;
2126 rt_sf->screen_content_cdef_filter_qindex_thresh = 0;
2127 rt_sf->enable_ref_short_signaling = false;
2128 rt_sf->check_globalmv_on_single_ref = true;
2129 }
2130
2131 // Populate appropriate sub-pel search method based on speed feature and user
2132 // specified settings
set_subpel_search_method(MotionVectorSearchParams * mv_search_params,unsigned int motion_vector_unit_test,SUBPEL_SEARCH_METHODS subpel_search_method)2133 static void set_subpel_search_method(
2134 MotionVectorSearchParams *mv_search_params,
2135 unsigned int motion_vector_unit_test,
2136 SUBPEL_SEARCH_METHODS subpel_search_method) {
2137 if (subpel_search_method == SUBPEL_TREE) {
2138 mv_search_params->find_fractional_mv_step = av1_find_best_sub_pixel_tree;
2139 } else if (subpel_search_method == SUBPEL_TREE_PRUNED) {
2140 mv_search_params->find_fractional_mv_step =
2141 av1_find_best_sub_pixel_tree_pruned;
2142 } else if (subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
2143 mv_search_params->find_fractional_mv_step =
2144 av1_find_best_sub_pixel_tree_pruned_more;
2145 } else {
2146 assert(0);
2147 }
2148
2149 // This is only used in motion vector unit test.
2150 if (motion_vector_unit_test == 1)
2151 mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
2152 else if (motion_vector_unit_test == 2)
2153 mv_search_params->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
2154 }
2155
av1_set_speed_features_framesize_dependent(AV1_COMP * cpi,int speed)2156 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
2157 SPEED_FEATURES *const sf = &cpi->sf;
2158 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2159
2160 switch (oxcf->mode) {
2161 case GOOD:
2162 set_good_speed_feature_framesize_dependent(cpi, sf, speed);
2163 break;
2164 case ALLINTRA:
2165 set_allintra_speed_feature_framesize_dependent(cpi, sf, speed);
2166 break;
2167 case REALTIME:
2168 set_rt_speed_feature_framesize_dependent(cpi, sf, speed);
2169 break;
2170 }
2171
2172 if (!cpi->ppi->seq_params_locked) {
2173 cpi->common.seq_params->enable_masked_compound &=
2174 !sf->inter_sf.disable_masked_comp;
2175 cpi->common.seq_params->enable_interintra_compound &=
2176 (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
2177 }
2178
2179 set_subpel_search_method(&cpi->mv_search_params,
2180 cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
2181 sf->mv_sf.subpel_search_method);
2182
2183 // For multi-thread use case with row_mt enabled, cost update for a set of
2184 // SB rows is not desirable. Hence, the sf mv_cost_upd_level is set to
2185 // INTERNAL_COST_UPD_SBROW in such cases.
2186 if ((cpi->oxcf.row_mt == 1) && (cpi->mt_info.num_workers > 1)) {
2187 if (sf->inter_sf.mv_cost_upd_level == INTERNAL_COST_UPD_SBROW_SET) {
2188 // Set mv_cost_upd_level to use row level update.
2189 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
2190 }
2191 }
2192 }
2193
av1_set_speed_features_framesize_independent(AV1_COMP * cpi,int speed)2194 void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
2195 SPEED_FEATURES *const sf = &cpi->sf;
2196 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
2197 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2198 int i;
2199
2200 init_hl_sf(&sf->hl_sf);
2201 init_fp_sf(&sf->fp_sf);
2202 init_tpl_sf(&sf->tpl_sf);
2203 init_gm_sf(&sf->gm_sf);
2204 init_part_sf(&sf->part_sf);
2205 init_mv_sf(&sf->mv_sf);
2206 init_inter_sf(&sf->inter_sf);
2207 init_interp_sf(&sf->interp_sf);
2208 init_intra_sf(&sf->intra_sf);
2209 init_tx_sf(&sf->tx_sf);
2210 init_rd_sf(&sf->rd_sf, oxcf);
2211 init_winner_mode_sf(&sf->winner_mode_sf);
2212 init_lpf_sf(&sf->lpf_sf);
2213 init_rt_sf(&sf->rt_sf);
2214
2215 switch (oxcf->mode) {
2216 case GOOD:
2217 set_good_speed_features_framesize_independent(cpi, sf, speed);
2218 break;
2219 case ALLINTRA:
2220 set_allintra_speed_features_framesize_independent(cpi, sf, speed);
2221 break;
2222 case REALTIME:
2223 set_rt_speed_features_framesize_independent(cpi, sf, speed);
2224 break;
2225 }
2226
2227 // Note: when use_nonrd_pick_mode is true, the transform size is the
2228 // minimum of 16x16 and the largest possible size of the current block,
2229 // which conflicts with the speed feature "enable_tx_size_search".
2230 if (!oxcf->txfm_cfg.enable_tx_size_search &&
2231 sf->rt_sf.use_nonrd_pick_mode == 0) {
2232 sf->winner_mode_sf.tx_size_search_level = 3;
2233 }
2234
2235 if (!cpi->ppi->seq_params_locked) {
2236 cpi->common.seq_params->order_hint_info.enable_dist_wtd_comp &=
2237 (sf->inter_sf.use_dist_wtd_comp_flag != DIST_WTD_COMP_DISABLED);
2238 cpi->common.seq_params->enable_dual_filter &=
2239 !sf->interp_sf.disable_dual_filter;
2240 cpi->common.seq_params->enable_restoration &= !sf->lpf_sf.disable_lr_filter;
2241
2242 cpi->common.seq_params->enable_interintra_compound &=
2243 (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
2244 }
2245
2246 const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
2247 for (i = 0; i < MAX_MESH_STEP; ++i) {
2248 sf->mv_sf.mesh_patterns[i].range =
2249 good_quality_mesh_patterns[mesh_speed][i].range;
2250 sf->mv_sf.mesh_patterns[i].interval =
2251 good_quality_mesh_patterns[mesh_speed][i].interval;
2252 }
2253
2254 // Update the mesh pattern of exhaustive motion search for intraBC
2255 // Though intraBC mesh pattern is populated for all frame types, it is used
2256 // only for intra frames of screen contents
2257 for (i = 0; i < MAX_MESH_STEP; ++i) {
2258 sf->mv_sf.intrabc_mesh_patterns[i].range =
2259 intrabc_mesh_patterns[mesh_speed][i].range;
2260 sf->mv_sf.intrabc_mesh_patterns[i].interval =
2261 intrabc_mesh_patterns[mesh_speed][i].interval;
2262 }
2263
2264 // Slow quant, dct and trellis not worthwhile for first pass
2265 // so make sure they are always turned off.
2266 if (is_stat_generation_stage(cpi))
2267 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
2268
2269 // No recode for 1 pass.
2270 if (oxcf->pass == AOM_RC_ONE_PASS && has_no_stats_stage(cpi))
2271 sf->hl_sf.recode_loop = DISALLOW_RECODE;
2272
2273 set_subpel_search_method(&cpi->mv_search_params,
2274 cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
2275 sf->mv_sf.subpel_search_method);
2276
2277 // assert ensures that tx_domain_dist_level is accessed correctly
2278 assert(cpi->sf.rd_sf.tx_domain_dist_thres_level >= 0 &&
2279 cpi->sf.rd_sf.tx_domain_dist_thres_level < 4);
2280 memcpy(winner_mode_params->tx_domain_dist_threshold,
2281 tx_domain_dist_thresholds[cpi->sf.rd_sf.tx_domain_dist_thres_level],
2282 sizeof(winner_mode_params->tx_domain_dist_threshold));
2283
2284 assert(cpi->sf.rd_sf.tx_domain_dist_level >= 0 &&
2285 cpi->sf.rd_sf.tx_domain_dist_level < TX_DOMAIN_DIST_LEVELS);
2286 memcpy(winner_mode_params->use_transform_domain_distortion,
2287 tx_domain_dist_types[cpi->sf.rd_sf.tx_domain_dist_level],
2288 sizeof(winner_mode_params->use_transform_domain_distortion));
2289
2290 // assert ensures that coeff_opt_thresholds is accessed correctly
2291 assert(cpi->sf.rd_sf.perform_coeff_opt >= 0 &&
2292 cpi->sf.rd_sf.perform_coeff_opt < 9);
2293 memcpy(winner_mode_params->coeff_opt_thresholds,
2294 &coeff_opt_thresholds[cpi->sf.rd_sf.perform_coeff_opt],
2295 sizeof(winner_mode_params->coeff_opt_thresholds));
2296
2297 // assert ensures that predict_skip_levels is accessed correctly
2298 assert(cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction >= 0 &&
2299 cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction < 3);
2300 memcpy(winner_mode_params->skip_txfm_level,
2301 predict_skip_levels[cpi->sf.tx_sf.tx_type_search
2302 .use_skip_flag_prediction],
2303 sizeof(winner_mode_params->skip_txfm_level));
2304
2305 // assert ensures that tx_size_search_level is accessed correctly
2306 assert(cpi->sf.winner_mode_sf.tx_size_search_level >= 0 &&
2307 cpi->sf.winner_mode_sf.tx_size_search_level <= 3);
2308 memcpy(winner_mode_params->tx_size_search_methods,
2309 tx_size_search_methods[cpi->sf.winner_mode_sf.tx_size_search_level],
2310 sizeof(winner_mode_params->tx_size_search_methods));
2311 memcpy(winner_mode_params->predict_dc_level,
2312 predict_dc_levels[cpi->sf.winner_mode_sf.dc_blk_pred_level],
2313 sizeof(winner_mode_params->predict_dc_level));
2314
2315 if (cpi->oxcf.row_mt == 1 && (cpi->mt_info.num_workers > 1)) {
2316 if (sf->inter_sf.inter_mode_rd_model_estimation == 1) {
2317 // Revert to type 2
2318 sf->inter_sf.inter_mode_rd_model_estimation = 2;
2319 }
2320
2321 #if !CONFIG_FPMT_TEST
2322 // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve
2323 // better parallelism when number of threads available are greater than or
2324 // equal to maximum number of reference frames allowed for global motion.
2325 if (sf->gm_sf.gm_search_type != GM_DISABLE_SEARCH &&
2326 (cpi->mt_info.num_workers >=
2327 gm_available_reference_frames[sf->gm_sf.gm_search_type]))
2328 sf->gm_sf.prune_ref_frame_for_gm_search = 0;
2329 #endif
2330 }
2331
2332 // This only applies to the real time mode. Adaptive gf refresh is disabled if
2333 // gf_cbr_boost_pct that is set by the user is larger than 0.
2334 if (cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 0)
2335 sf->rt_sf.gf_refresh_based_on_qp = 0;
2336 }
2337
2338 // Override some speed features based on qindex
av1_set_speed_features_qindex_dependent(AV1_COMP * cpi,int speed)2339 void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
2340 AV1_COMMON *const cm = &cpi->common;
2341 SPEED_FEATURES *const sf = &cpi->sf;
2342 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
2343 const int boosted = frame_is_boosted(cpi);
2344 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
2345 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
2346 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
2347 const int is_arf2_bwd_type =
2348 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
2349
2350 if (cpi->oxcf.mode == REALTIME) {
2351 if (speed >= 6) {
2352 const int qindex_thresh = boosted ? 190 : (is_720p_or_larger ? 120 : 150);
2353 sf->part_sf.adjust_var_based_rd_partitioning =
2354 frame_is_intra_only(cm)
2355 ? 0
2356 : cm->quant_params.base_qindex > qindex_thresh;
2357 }
2358 return;
2359 }
2360
2361 if (speed == 0) {
2362 // qindex_thresh for resolution < 720p
2363 const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140);
2364 if (!is_720p_or_larger && cm->quant_params.base_qindex <= qindex_thresh) {
2365 sf->part_sf.simple_motion_search_split =
2366 cm->features.allow_screen_content_tools ? 1 : 2;
2367 sf->part_sf.simple_motion_search_early_term_none = 1;
2368 sf->tx_sf.model_based_prune_tx_search_level = 0;
2369 }
2370
2371 if (is_720p_or_larger && cm->quant_params.base_qindex <= 128) {
2372 sf->rd_sf.perform_coeff_opt = 2 + is_1080p_or_larger;
2373 memcpy(winner_mode_params->coeff_opt_thresholds,
2374 &coeff_opt_thresholds[sf->rd_sf.perform_coeff_opt],
2375 sizeof(winner_mode_params->coeff_opt_thresholds));
2376 sf->part_sf.simple_motion_search_split =
2377 cm->features.allow_screen_content_tools ? 1 : 2;
2378 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
2379 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
2380 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
2381 sf->tx_sf.model_based_prune_tx_search_level = 0;
2382
2383 if (is_1080p_or_larger && cm->quant_params.base_qindex <= 108) {
2384 sf->inter_sf.selective_ref_frame = 2;
2385 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
2386 sf->rd_sf.tx_domain_dist_thres_level = 1;
2387 sf->part_sf.simple_motion_search_early_term_none = 1;
2388 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
2389 sf->interp_sf.cb_pred_filter_search = 0;
2390 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
2391 sf->tx_sf.tx_type_search.skip_tx_search = 1;
2392 }
2393 }
2394 }
2395
2396 if (speed >= 2) {
2397 // Disable extended partitions for lower quantizers
2398 const int aggr = AOMMIN(3, speed - 2);
2399 const int qindex_thresh1[4] = { 50, 50, 80, 100 };
2400 const int qindex_thresh2[4] = { 80, 100, 120, 160 };
2401 int qindex_thresh;
2402 int disable_ext_part;
2403 if (aggr <= 1) {
2404 const int qthresh2 =
2405 (!aggr && !is_480p_or_larger) ? 70 : qindex_thresh2[aggr];
2406 qindex_thresh = cm->features.allow_screen_content_tools
2407 ? qindex_thresh1[aggr]
2408 : qthresh2;
2409 disable_ext_part = !boosted;
2410 } else {
2411 qindex_thresh = boosted ? qindex_thresh1[aggr] : qindex_thresh2[aggr];
2412 disable_ext_part = !frame_is_intra_only(cm);
2413 }
2414 if (cm->quant_params.base_qindex <= qindex_thresh && disable_ext_part) {
2415 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2416 }
2417 }
2418
2419 if (speed >= 4) {
2420 // Disable rectangular partitions for lower quantizers
2421 const int aggr = AOMMIN(1, speed - 4);
2422 const int qindex_thresh[2] = { 65, 80 };
2423 int disable_rect_part;
2424 disable_rect_part = !boosted;
2425 if (cm->quant_params.base_qindex <= qindex_thresh[aggr] &&
2426 disable_rect_part && is_480p_or_larger) {
2427 sf->part_sf.rect_partition_eval_thresh = BLOCK_8X8;
2428 }
2429 }
2430
2431 if (speed <= 2) {
2432 if (!is_stat_generation_stage(cpi)) {
2433 // Use faster full-pel motion search for high quantizers.
2434 // Also use reduced total search range for low resolutions at high
2435 // quantizers.
2436 const int aggr = speed;
2437 const int qindex_thresh1 = ms_qindex_thresh[aggr][is_720p_or_larger][0];
2438 const int qindex_thresh2 = ms_qindex_thresh[aggr][is_720p_or_larger][1];
2439 const SEARCH_METHODS search_method =
2440 motion_search_method[is_720p_or_larger];
2441 if (cm->quant_params.base_qindex > qindex_thresh1) {
2442 sf->mv_sf.search_method = search_method;
2443 sf->tpl_sf.search_method = search_method;
2444 } else if (cm->quant_params.base_qindex > qindex_thresh2) {
2445 sf->mv_sf.search_method = NSTEP_8PT;
2446 }
2447 }
2448 }
2449
2450 if (speed >= 4) {
2451 // Disable LR search at low and high quantizers and enable only for
2452 // mid-quantizer range.
2453 if (!boosted && !is_arf2_bwd_type) {
2454 const int qindex_low[2] = { 100, 60 };
2455 const int qindex_high[2] = { 180, 160 };
2456 if (cm->quant_params.base_qindex <= qindex_low[is_720p_or_larger] ||
2457 cm->quant_params.base_qindex > qindex_high[is_720p_or_larger]) {
2458 sf->lpf_sf.disable_loop_restoration_luma = 1;
2459 }
2460 }
2461 }
2462
2463 if (speed == 1) {
2464 // Reuse interinter wedge mask search from first search for non-boosted
2465 // non-internal-arf frames, except at very high quantizers.
2466 if (cm->quant_params.base_qindex <= 200) {
2467 if (!boosted && !is_arf2_bwd_type)
2468 sf->inter_sf.reuse_mask_search_results = 1;
2469 }
2470 }
2471
2472 set_subpel_search_method(&cpi->mv_search_params,
2473 cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
2474 sf->mv_sf.subpel_search_method);
2475 }
2476