1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13
14 #include "av1/common/reconintra.h"
15
16 #include "av1/encoder/encoder.h"
17 #include "av1/encoder/speed_features.h"
18 #include "av1/encoder/rdopt.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21
22 #define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
23 // Max speed setting for tx domain evaluation
24 #define MAX_TX_DOMAIN_EVAL_SPEED 5
25 static MESH_PATTERN
26 good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
27 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
28 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
29 { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
30 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
31 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
32 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
33 };
34
35 // TODO(huisu@google.com): These settings are pretty relaxed, tune them for
36 // each speed setting
37 static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
38 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
39 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
40 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
41 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
42 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
43 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
44 };
45
46 // Threshold values to be used for pruning the txfm_domain_distortion
47 // based on block MSE
48 // Index 0: Default mode evaluation, Winner mode processing is not
49 // applicable (Eg : IntraBc). Index 1: Mode evaluation.
50 // Index 2: Winner mode evaluation. Index 1 and 2 are applicable when
51 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
52 // TODO(any): Experiment the threshold logic based on variance metric
53 static unsigned int tx_domain_dist_thresholds[4][MODE_EVAL_TYPES] = {
54 { UINT_MAX, UINT_MAX, UINT_MAX },
55 { 22026, 22026, 22026 },
56 { 1377, 1377, 1377 },
57 { 0, 0, 0 }
58 };
59
60 // Number of different levels of aggressiveness in using transform domain
61 // distortion during the R-D evaluation based on the speed feature
62 // tx_domain_dist_level.
63 #define TX_DOMAIN_DIST_LEVELS 4
64
65 // Transform domain distortion type to be used for default, mode and winner mode
66 // evaluation Index 0: Default mode evaluation, Winner mode processing is not
67 // applicable (Eg : IntraBc). Index 1: Mode evaluation. Index 2: Winner mode
68 // evaluation. Index 1 and 2 are applicable when
69 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
70 static unsigned int
71 tx_domain_dist_types[TX_DOMAIN_DIST_LEVELS][MODE_EVAL_TYPES] = {
72 { 0, 2, 0 }, { 1, 2, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
73 };
74
75 // Threshold values to be used for disabling coeff RD-optimization
76 // based on block MSE / qstep^2.
77 // TODO(any): Experiment the threshold logic based on variance metric.
78 // Table has satd and dist threshold value index 0 : dist,index 1: satd
79 // For each row, the indices are as follows.
80 // Index 0: Default mode evaluation, Winner mode processing is not applicable
81 // (Eg : IntraBc)
82 // Index 1: Mode evaluation.
83 // Index 2: Winner mode evaluation.
84 // Index 1 and 2 are applicable when enable_winner_mode_for_coeff_opt speed
85 // feature is ON
86 // There are 7 levels with increasing speed, mapping to vertical indices.
87 static unsigned int coeff_opt_thresholds[9][MODE_EVAL_TYPES][2] = {
88 { { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX } },
89 { { 3200, UINT_MAX }, { 250, UINT_MAX }, { UINT_MAX, UINT_MAX } },
90 { { 1728, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
91 { { 864, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
92 { { 432, UINT_MAX }, { 86, UINT_MAX }, { UINT_MAX, UINT_MAX } },
93 { { 864, 97 }, { 142, 16 }, { UINT_MAX, UINT_MAX } },
94 { { 432, 97 }, { 86, 16 }, { UINT_MAX, UINT_MAX } },
95 { { 216, 25 }, { 86, 10 }, { UINT_MAX, UINT_MAX } },
96 { { 216, 25 }, { 0, 10 }, { UINT_MAX, UINT_MAX } }
97 };
98
99 // Transform size to be used for default, mode and winner mode evaluation
100 // Index 0: Default mode evaluation, Winner mode processing is not applicable
101 // (Eg : IntraBc) Index 1: Mode evaluation. Index 2: Winner mode evaluation.
102 // Index 1 and 2 are applicable when enable_winner_mode_for_tx_size_srch speed
103 // feature is ON
104 static TX_SIZE_SEARCH_METHOD tx_size_search_methods[4][MODE_EVAL_TYPES] = {
105 { USE_FULL_RD, USE_LARGESTALL, USE_FULL_RD },
106 { USE_FAST_RD, USE_LARGESTALL, USE_FULL_RD },
107 { USE_LARGESTALL, USE_LARGESTALL, USE_FULL_RD },
108 { USE_LARGESTALL, USE_LARGESTALL, USE_LARGESTALL }
109 };
110
111 // Predict transform skip levels to be used for default, mode and winner mode
112 // evaluation. Index 0: Default mode evaluation, Winner mode processing is not
113 // applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation
114 // Values indicate the aggressiveness of skip flag prediction.
115 // 0 : no early skip prediction
116 // 1 : conservative early skip prediction using DCT_DCT
117 // 2 : early skip prediction based on SSE
118 static unsigned int predict_skip_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
119 { 1, 1, 1 },
120 { 1, 2, 1 } };
121
122 // Predict skip or DC block level used during transform type search. It is
123 // indexed using the following:
124 // First index : Speed feature 'dc_blk_pred_level' (0 to 3)
125 // Second index : Mode evaluation type (DEFAULT_EVAL, MODE_EVAL and
126 // WINNER_MODE_EVAL).
127 //
128 // The values of predict_dc_levels[][] indicate the aggressiveness of predicting
129 // a block as transform skip or DC only.
130 // Type 0 : No skip block or DC only block prediction
131 // Type 1 : Prediction of skip block based on residual mean and variance
132 // Type 2 : Prediction of skip block or DC only block based on residual mean and
133 // variance
134 static unsigned int predict_dc_levels[4][MODE_EVAL_TYPES] = {
135 { 0, 0, 0 }, { 1, 1, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
136 };
137
138 #if !CONFIG_FPMT_TEST
139 // This table holds the maximum number of reference frames for global motion.
140 // The table is indexed as per the speed feature 'gm_search_type'.
141 // 0 : All reference frames are allowed.
142 // 1 : All reference frames except L2 and L3 are allowed.
143 // 2 : All reference frames except L2, L3 and ARF2 are allowed.
144 // 3 : No reference frame is allowed.
145 static int gm_available_reference_frames[GM_DISABLE_SEARCH + 1] = {
146 INTER_REFS_PER_FRAME, INTER_REFS_PER_FRAME - 2, INTER_REFS_PER_FRAME - 3, 0
147 };
148 #endif
149
150 // Qindex threshold levels used for selecting full-pel motion search.
151 // ms_qthresh[i][j][k] indicates the qindex boundary value for 'k'th qindex band
152 // for resolution index 'j' for aggressiveness level 'i'.
153 // Aggressiveness increases from i = 0 to 2.
154 // j = 0: lower than 720p resolution, j = 1: 720p or larger resolution.
155 // Currently invoked only for speed 0, 1 and 2.
156 static int ms_qindex_thresh[3][2][2] = { { { 200, 70 }, { MAXQ, 200 } },
157 { { 170, 50 }, { MAXQ, 200 } },
158 { { 170, 40 }, { 200, 40 } } };
159
160 // Full-pel search methods for aggressive search based on qindex.
161 // Index 0 is for resolutions lower than 720p, index 1 for 720p or larger
162 // resolutions. Currently invoked only for speed 1 and 2.
163 static SEARCH_METHODS motion_search_method[2] = { CLAMPED_DIAMOND, DIAMOND };
164
165 // Intra only frames, golden frames (except alt ref overlays) and
166 // alt ref frames tend to be coded at a higher than ambient quality
frame_is_boosted(const AV1_COMP * cpi)167 static int frame_is_boosted(const AV1_COMP *cpi) {
168 return frame_is_kf_gf_arf(cpi);
169 }
170
171 // Set transform rd gate level for all transform search cases.
set_txfm_rd_gate_level(int txfm_rd_gate_level[TX_SEARCH_CASES],int level)172 static AOM_INLINE void set_txfm_rd_gate_level(
173 int txfm_rd_gate_level[TX_SEARCH_CASES], int level) {
174 assert(level <= MAX_TX_RD_GATE_LEVEL);
175 for (int idx = 0; idx < TX_SEARCH_CASES; idx++)
176 txfm_rd_gate_level[idx] = level;
177 }
178
set_allintra_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)179 static void set_allintra_speed_feature_framesize_dependent(
180 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
181 const AV1_COMMON *const cm = &cpi->common;
182 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
183 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
184 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
185 const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
186 const bool use_hbd = cpi->oxcf.use_highbitdepth;
187
188 if (is_480p_or_larger) {
189 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
190 if (is_720p_or_larger)
191 sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED;
192 else
193 sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED;
194 } else {
195 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
196 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
197 if (use_hbd) sf->tx_sf.prune_tx_size_level = 1;
198 }
199
200 if (is_4k_or_larger) {
201 sf->part_sf.default_min_partition_size = BLOCK_8X8;
202 }
203
204 // TODO(huisu@google.com): train models for 720P and above.
205 if (!is_720p_or_larger) {
206 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
207 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
208 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
209 sf->part_sf.ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64
210 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
211 sf->part_sf.ml_early_term_after_part_split_level = 1;
212 }
213
214 if (is_720p_or_larger) {
215 // TODO(chiyotsai@google.com): make this speed feature adaptive based on
216 // current block's vertical texture instead of hardcoded with resolution
217 sf->mv_sf.use_downsampled_sad = 2;
218 }
219
220 if (speed >= 1) {
221 if (is_720p_or_larger) {
222 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
223 } else if (is_480p_or_larger) {
224 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
225 } else {
226 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
227 }
228
229 if (!is_720p_or_larger) {
230 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
231 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
232 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
233 sf->part_sf.ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64
234 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
235 }
236 sf->part_sf.ml_early_term_after_part_split_level = 2;
237 }
238
239 if (speed >= 2) {
240 if (is_720p_or_larger) {
241 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
242 } else if (is_480p_or_larger) {
243 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
244 } else {
245 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
246 }
247
248 if (is_720p_or_larger) {
249 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
250 sf->part_sf.partition_search_breakout_rate_thr = 120;
251 } else {
252 sf->part_sf.partition_search_breakout_dist_thr = (1 << 22);
253 sf->part_sf.partition_search_breakout_rate_thr = 100;
254 }
255
256 if (is_480p_or_larger) {
257 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1;
258 if (use_hbd) sf->tx_sf.prune_tx_size_level = 2;
259 } else {
260 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
261 }
262 }
263
264 if (speed >= 3) {
265 sf->part_sf.ml_early_term_after_part_split_level = 0;
266
267 if (is_720p_or_larger) {
268 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
269 sf->part_sf.partition_search_breakout_rate_thr = 200;
270 } else {
271 sf->part_sf.max_intra_bsize = BLOCK_32X32;
272 sf->part_sf.partition_search_breakout_dist_thr = (1 << 23);
273 sf->part_sf.partition_search_breakout_rate_thr = 120;
274 }
275 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
276 }
277
278 if (speed >= 4) {
279 if (is_720p_or_larger) {
280 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
281 } else {
282 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
283 }
284
285 if (is_480p_or_larger) {
286 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2;
287 }
288 }
289
290 if (speed >= 6) {
291 if (is_720p_or_larger) {
292 sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
293 } else if (is_480p_or_larger) {
294 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
295 }
296
297 if (is_1080p_or_larger) {
298 sf->part_sf.default_min_partition_size = BLOCK_8X8;
299 }
300
301 sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
302 }
303
304 if (speed >= 7) {
305 // TODO(kyslov): add more speed features to control speed/quality
306 }
307
308 if (speed >= 8) {
309 if (!is_480p_or_larger) {
310 sf->rt_sf.nonrd_check_partition_merge_mode = 2;
311 }
312 if (is_720p_or_larger) {
313 sf->rt_sf.force_large_partition_blocks_intra = 1;
314 }
315 }
316
317 if (speed >= 9) {
318 // TODO(kyslov): add more speed features to control speed/quality
319 if (!is_4k_or_larger) {
320 // In av1_select_sb_size(), superblock size is set to 64x64 only for
321 // resolutions less than 4k in speed>=9, to improve the multithread
322 // performance. If cost update levels are set to INTERNAL_COST_UPD_OFF
323 // for resolutions >= 4k, the SB size setting can be modified for these
324 // resolutions as well.
325 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_OFF;
326 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_OFF;
327 }
328 }
329 }
330
set_allintra_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)331 static void set_allintra_speed_features_framesize_independent(
332 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
333 const AV1_COMMON *const cm = &cpi->common;
334 const int allow_screen_content_tools =
335 cm->features.allow_screen_content_tools;
336 const int use_hbd = cpi->oxcf.use_highbitdepth;
337
338 sf->part_sf.less_rectangular_check_level = 1;
339 sf->part_sf.ml_prune_partition = 1;
340 sf->part_sf.prune_ext_partition_types_search_level = 1;
341 sf->part_sf.prune_part4_search = 2;
342 sf->part_sf.simple_motion_search_prune_rect = 1;
343 sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
344 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
345 sf->part_sf.use_best_rd_for_pruning = 1;
346
347 sf->intra_sf.intra_pruning_with_hog = 1;
348 sf->intra_sf.prune_luma_palette_size_search_level = 1;
349 sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
350 sf->intra_sf.early_term_chroma_palette_size_search = 1;
351
352 sf->tx_sf.adaptive_txb_search_level = 1;
353 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
354 sf->tx_sf.model_based_prune_tx_search_level = 1;
355 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
356
357 sf->rt_sf.use_nonrd_pick_mode = 0;
358 sf->rt_sf.use_real_time_ref_set = 0;
359
360 if (cpi->twopass_frame.fr_content_type == FC_GRAPHICS_ANIMATION ||
361 cpi->use_screen_content_tools) {
362 sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
363 } else {
364 sf->mv_sf.exhaustive_searches_thresh = (1 << 25);
365 }
366
367 sf->rd_sf.perform_coeff_opt = 1;
368 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
369
370 if (speed >= 1) {
371 sf->part_sf.intra_cnn_based_part_prune_level =
372 allow_screen_content_tools ? 0 : 2;
373 sf->part_sf.simple_motion_search_early_term_none = 1;
374 // TODO(Venkat): Clean-up frame type dependency for
375 // simple_motion_search_split in partition search function and set the
376 // speed feature accordingly
377 sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2;
378 sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3;
379 sf->part_sf.reuse_best_prediction_for_part_ab = 1;
380
381 sf->mv_sf.exhaustive_searches_thresh <<= 1;
382
383 sf->intra_sf.prune_palette_search_level = 1;
384 sf->intra_sf.prune_luma_palette_size_search_level = 2;
385 sf->intra_sf.top_intra_model_count_allowed = 3;
386
387 sf->tx_sf.adaptive_txb_search_level = 2;
388 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
389 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
390 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
391 sf->tx_sf.model_based_prune_tx_search_level = 0;
392 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
393 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
394 sf->tx_sf.tx_type_search.skip_tx_search = 1;
395
396 sf->rd_sf.perform_coeff_opt = 2;
397 sf->rd_sf.tx_domain_dist_level = 1;
398 sf->rd_sf.tx_domain_dist_thres_level = 1;
399
400 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
401 sf->lpf_sf.dual_sgr_penalty_level = 1;
402 sf->lpf_sf.enable_sgr_ep_pruning = 1;
403 }
404
405 if (speed >= 2) {
406 sf->mv_sf.auto_mv_step_size = 1;
407
408 sf->intra_sf.disable_smooth_intra = 1;
409 sf->intra_sf.intra_pruning_with_hog = 2;
410 sf->intra_sf.prune_filter_intra_level = 1;
411
412 sf->rd_sf.perform_coeff_opt = 3;
413
414 sf->lpf_sf.prune_wiener_based_on_src_var = 1;
415 sf->lpf_sf.prune_sgr_based_on_wiener = 1;
416 }
417
418 if (speed >= 3) {
419 sf->hl_sf.high_precision_mv_usage = CURRENT_Q;
420 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
421
422 sf->part_sf.less_rectangular_check_level = 2;
423 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL1;
424 sf->part_sf.prune_ext_part_using_split_info = 1;
425
426 sf->mv_sf.full_pixel_search_level = 1;
427 sf->mv_sf.search_method = DIAMOND;
428
429 // TODO(chiyotsai@google.com): the thresholds chosen for intra hog are
430 // inherited directly from luma hog with some minor tweaking. Eventually we
431 // should run this with a bayesian optimizer to find the Pareto frontier.
432 sf->intra_sf.chroma_intra_pruning_with_hog = 2;
433 sf->intra_sf.intra_pruning_with_hog = 3;
434 sf->intra_sf.prune_palette_search_level = 2;
435
436 sf->tx_sf.adaptive_txb_search_level = 2;
437 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
438 sf->tx_sf.use_rd_based_breakout_for_intra_tx_search = true;
439
440 // TODO(any): evaluate if these lpf features can be moved to speed 2.
441 // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
442 // loss.
443 sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2;
444 sf->lpf_sf.disable_loop_restoration_chroma = 0;
445 sf->lpf_sf.reduce_wiener_window_size = 1;
446 sf->lpf_sf.prune_wiener_based_on_src_var = 2;
447 }
448
449 if (speed >= 4) {
450 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
451
452 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL2;
453 sf->part_sf.simple_motion_search_reduce_search_steps = 4;
454 sf->part_sf.prune_ext_part_using_split_info = 2;
455 sf->part_sf.early_term_after_none_split = 1;
456 sf->part_sf.ml_predict_breakout_level = 3;
457
458 sf->intra_sf.prune_chroma_modes_using_luma_winner = 1;
459
460 sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL;
461
462 sf->tpl_sf.prune_starting_mv = 2;
463 sf->tpl_sf.subpel_force_stop = HALF_PEL;
464 sf->tpl_sf.search_method = FAST_BIGDIA;
465
466 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
467 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
468 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
469 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1;
470
471 sf->rd_sf.perform_coeff_opt = 5;
472 sf->rd_sf.tx_domain_dist_thres_level = 3;
473
474 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
475 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3;
476
477 sf->mv_sf.reduce_search_range = 1;
478
479 sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1;
480 sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
481 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_DEFAULT;
482 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
483 }
484
485 if (speed >= 5) {
486 sf->part_sf.simple_motion_search_prune_agg = SIMPLE_AGG_LVL3;
487 sf->part_sf.ext_partition_eval_thresh =
488 allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
489 sf->part_sf.intra_cnn_based_part_prune_level =
490 allow_screen_content_tools ? 1 : 2;
491
492 sf->intra_sf.chroma_intra_pruning_with_hog = 3;
493
494 sf->lpf_sf.use_coarse_filter_level_search = 0;
495 // Disable Wiener and Self-guided Loop restoration filters.
496 sf->lpf_sf.disable_wiener_filter = true;
497 sf->lpf_sf.disable_sgr_filter = true;
498
499 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_2;
500
501 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_FAST;
502 }
503
504 if (speed >= 6) {
505 sf->intra_sf.prune_smooth_intra_mode_for_chroma = 1;
506 sf->intra_sf.prune_filter_intra_level = 2;
507 sf->intra_sf.chroma_intra_pruning_with_hog = 4;
508 sf->intra_sf.intra_pruning_with_hog = 4;
509 sf->intra_sf.cfl_search_range = 1;
510 sf->intra_sf.top_intra_model_count_allowed = 2;
511 sf->intra_sf.adapt_top_model_rd_count_using_neighbors = 1;
512 sf->intra_sf.prune_luma_odd_delta_angles_in_intra = 1;
513
514 sf->part_sf.prune_rectangular_split_based_on_qidx =
515 allow_screen_content_tools ? 0 : 2;
516 sf->part_sf.prune_rect_part_using_4x4_var_deviation = true;
517 sf->part_sf.prune_rect_part_using_none_pred_mode = true;
518 sf->part_sf.prune_sub_8x8_partition_level =
519 allow_screen_content_tools ? 0 : 1;
520 sf->part_sf.prune_part4_search = 3;
521 // TODO(jingning): This might not be a good trade off if the
522 // target image quality is very low.
523 sf->part_sf.default_max_partition_size = BLOCK_32X32;
524
525 sf->mv_sf.use_bsize_dependent_search_method = 1;
526
527 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3;
528 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0;
529 sf->tx_sf.prune_intra_tx_depths_using_nn = true;
530
531 sf->rd_sf.perform_coeff_opt = 6;
532 sf->rd_sf.tx_domain_dist_level = 3;
533
534 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
535 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
536
537 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
538 sf->winner_mode_sf.prune_winner_mode_eval_level = 1;
539 sf->winner_mode_sf.dc_blk_pred_level = 1;
540 }
541 // The following should make all-intra mode speed 7 approximately equal
542 // to real-time speed 6,
543 // all-intra speed 8 close to real-time speed 7, and all-intra speed 9
544 // close to real-time speed 8
545 if (speed >= 7) {
546 sf->part_sf.default_min_partition_size = BLOCK_8X8;
547 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
548 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
549 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
550 sf->rt_sf.var_part_split_threshold_shift = 7;
551 }
552
553 if (speed >= 8) {
554 sf->rt_sf.hybrid_intra_pickmode = 1;
555 sf->rt_sf.use_nonrd_pick_mode = 1;
556 sf->rt_sf.nonrd_check_partition_merge_mode = 1;
557 sf->rt_sf.var_part_split_threshold_shift = 8;
558 // Set mask for intra modes.
559 for (int i = 0; i < BLOCK_SIZES; ++i)
560 if (i >= BLOCK_32X32)
561 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
562 else
563 // Use DC, H, V intra mode for block sizes < 32X32.
564 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
565 }
566
567 if (speed >= 9) {
568 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW;
569 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW;
570
571 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
572 sf->rt_sf.hybrid_intra_pickmode = 0;
573 sf->rt_sf.var_part_split_threshold_shift = 9;
574 sf->rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var = true;
575 sf->rt_sf.prune_h_pred_using_best_mode_so_far = true;
576 sf->rt_sf.enable_intra_mode_pruning_using_neighbors = true;
577 sf->rt_sf.prune_intra_mode_using_best_sad_so_far = true;
578 }
579
580 // As the speed feature prune_chroma_modes_using_luma_winner already
581 // constrains the number of chroma directional mode evaluations to a maximum
582 // of 1, the HOG computation and the associated pruning logic does not seem to
583 // help speed-up the chroma mode evaluations. Hence disable the speed feature
584 // chroma_intra_pruning_with_hog when prune_chroma_modes_using_luma_winner is
585 // enabled.
586 if (sf->intra_sf.prune_chroma_modes_using_luma_winner)
587 sf->intra_sf.chroma_intra_pruning_with_hog = 0;
588 }
589
set_good_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)590 static void set_good_speed_feature_framesize_dependent(
591 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
592 const AV1_COMMON *const cm = &cpi->common;
593 const int is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480;
594 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
595 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
596 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
597 const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
598 const bool use_hbd = cpi->oxcf.use_highbitdepth;
599 // Speed features applicable for temporal filtering and tpl modules may be
600 // changed based on frame type at places where the sf is applied (Example :
601 // use_downsampled_sad). This is because temporal filtering and tpl modules
602 // are called before this function (except for the first key frame).
603 // TODO(deepa.kg@ittiam.com): For the speed features applicable to temporal
604 // filtering and tpl modules, modify the sf initialization appropriately
605 // before calling the modules.
606 const int boosted = frame_is_boosted(cpi);
607 const int is_boosted_arf2_bwd_type =
608 boosted ||
609 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
610 const int is_lf_frame =
611 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == LF_UPDATE;
612 const int allow_screen_content_tools =
613 cm->features.allow_screen_content_tools;
614
615 if (is_480p_or_larger) {
616 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
617 if (is_720p_or_larger)
618 sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED;
619 else
620 sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED;
621 } else {
622 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
623 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
624 if (use_hbd) sf->tx_sf.prune_tx_size_level = 1;
625 }
626
627 if (is_4k_or_larger) {
628 sf->part_sf.default_min_partition_size = BLOCK_8X8;
629 }
630
631 // TODO(huisu@google.com): train models for 720P and above.
632 if (!is_720p_or_larger) {
633 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
634 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
635 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
636 sf->part_sf.ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64
637 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
638 sf->part_sf.ml_early_term_after_part_split_level = 1;
639 }
640
641 if (is_720p_or_larger) {
642 // TODO(chiyotsai@google.com): make this speed feature adaptive based on
643 // current block's vertical texture instead of hardcoded with resolution
644 sf->mv_sf.use_downsampled_sad = 2;
645 }
646
647 if (!is_720p_or_larger) {
648 const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg;
649 const int rate_tolerance =
650 AOMMIN(rc_cfg->under_shoot_pct, rc_cfg->over_shoot_pct);
651 sf->hl_sf.recode_tolerance = 25 + (rate_tolerance >> 2);
652 }
653
654 if (speed >= 1) {
655 if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 1;
656
657 if (is_720p_or_larger) {
658 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
659 } else if (is_480p_or_larger) {
660 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
661 } else {
662 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
663 }
664
665 if (!is_720p_or_larger) {
666 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
667 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
668 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
669 sf->part_sf.ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64
670 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
671 }
672 sf->part_sf.ml_early_term_after_part_split_level = 2;
673
674 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
675 }
676
677 if (speed >= 2) {
678 if (is_720p_or_larger) {
679 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
680 } else if (is_480p_or_larger) {
681 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
682 } else {
683 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
684 }
685
686 if (is_720p_or_larger) {
687 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
688 sf->part_sf.partition_search_breakout_rate_thr = 120;
689 } else {
690 sf->part_sf.partition_search_breakout_dist_thr = (1 << 22);
691 sf->part_sf.partition_search_breakout_rate_thr = 100;
692 }
693
694 if (is_720p_or_larger) {
695 sf->inter_sf.prune_obmc_prob_thresh = 16;
696 } else {
697 sf->inter_sf.prune_obmc_prob_thresh = 8;
698 }
699
700 if (is_480p_or_larger) {
701 sf->inter_sf.disable_interintra_wedge_var_thresh = 100;
702 } else {
703 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
704 }
705
706 if (is_480p_or_lesser) sf->inter_sf.skip_ext_comp_nearmv_mode = 1;
707
708 if (is_720p_or_larger) {
709 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 1 : 0;
710 } else {
711 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 2 : 0;
712 }
713
714 if (is_480p_or_larger) {
715 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1;
716 if (use_hbd) sf->tx_sf.prune_tx_size_level = 2;
717 } else {
718 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
719 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = boosted ? 0 : 1;
720 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = boosted ? 0 : 1;
721 }
722
723 if (!is_720p_or_larger) {
724 sf->mv_sf.disable_second_mv = 1;
725 sf->mv_sf.auto_mv_step_size = 2;
726 } else {
727 sf->mv_sf.disable_second_mv = boosted ? 0 : 2;
728 sf->mv_sf.auto_mv_step_size = 1;
729 }
730
731 if (!is_720p_or_larger) {
732 sf->hl_sf.recode_tolerance = 50;
733 sf->inter_sf.disable_interinter_wedge_newmv_search =
734 is_boosted_arf2_bwd_type ? 0 : 1;
735 sf->inter_sf.enable_fast_wedge_mask_search = 1;
736 }
737 }
738
739 if (speed >= 3) {
740 sf->inter_sf.enable_fast_wedge_mask_search = 1;
741 sf->inter_sf.skip_newmv_in_drl = 2;
742 sf->inter_sf.skip_ext_comp_nearmv_mode = 1;
743 sf->inter_sf.limit_inter_mode_cands = is_lf_frame ? 3 : 0;
744 sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
745 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
746 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch =
747 frame_is_intra_only(&cpi->common) ? 0 : 1;
748
749 sf->part_sf.ml_early_term_after_part_split_level = 0;
750
751 if (is_720p_or_larger) {
752 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
753 sf->part_sf.partition_search_breakout_rate_thr = 200;
754 sf->part_sf.skip_non_sq_part_based_on_none = is_lf_frame ? 2 : 0;
755 } else {
756 sf->part_sf.max_intra_bsize = BLOCK_32X32;
757 sf->part_sf.partition_search_breakout_dist_thr = (1 << 23);
758 sf->part_sf.partition_search_breakout_rate_thr = 120;
759 sf->part_sf.skip_non_sq_part_based_on_none = is_lf_frame ? 1 : 0;
760 }
761 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
762
763 if (is_480p_or_larger) {
764 sf->part_sf.early_term_after_none_split = 1;
765 } else {
766 sf->part_sf.early_term_after_none_split = 0;
767 }
768 if (is_720p_or_larger) {
769 sf->intra_sf.skip_intra_in_interframe = boosted ? 1 : 2;
770 } else {
771 sf->intra_sf.skip_intra_in_interframe = boosted ? 1 : 3;
772 }
773
774 if (is_720p_or_larger) {
775 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
776 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 1;
777 } else {
778 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
779 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 2;
780 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL2;
781 }
782
783 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
784 }
785
786 if (speed >= 4) {
787 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
788 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
789 if (is_720p_or_larger) {
790 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
791 } else {
792 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
793 }
794 sf->part_sf.early_term_after_none_split = 1;
795
796 if (is_480p_or_larger) {
797 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2;
798 } else {
799 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1;
800 }
801
802 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
803 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
804 sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 2;
805 if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 3;
806
807 if (is_720p_or_larger) {
808 sf->inter_sf.prune_comp_ref_frames = 1;
809 } else if (is_480p_or_larger) {
810 sf->inter_sf.prune_comp_ref_frames = is_boosted_arf2_bwd_type ? 0 : 1;
811 }
812
813 if (is_720p_or_larger)
814 sf->hl_sf.recode_tolerance = 32;
815 else
816 sf->hl_sf.recode_tolerance = 55;
817
818 sf->intra_sf.skip_intra_in_interframe = 4;
819
820 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3;
821 }
822
823 if (speed >= 5) {
824 if (is_720p_or_larger) {
825 sf->inter_sf.prune_warped_prob_thresh = 16;
826 } else if (is_480p_or_larger) {
827 sf->inter_sf.prune_warped_prob_thresh = 8;
828 }
829 if (is_720p_or_larger) sf->hl_sf.recode_tolerance = 40;
830
831 sf->inter_sf.skip_newmv_in_drl = 4;
832 sf->inter_sf.prune_comp_ref_frames = 1;
833 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 1;
834
835 if (!is_720p_or_larger) {
836 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW_SET;
837 sf->inter_sf.prune_nearest_near_mv_using_refmv_weight =
838 (boosted || allow_screen_content_tools) ? 0 : 1;
839 sf->mv_sf.use_downsampled_sad = 1;
840 }
841
842 if (!is_480p_or_larger) {
843 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
844 }
845
846 if (is_480p_or_lesser) {
847 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL1;
848 } else {
849 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL2;
850 }
851
852 if (is_720p_or_larger)
853 sf->part_sf.ext_part_eval_based_on_cur_best =
854 (allow_screen_content_tools || frame_is_intra_only(cm)) ? 0 : 1;
855
856 if (is_480p_or_larger) {
857 sf->tpl_sf.reduce_num_frames = 1;
858 }
859 }
860
861 if (speed >= 6) {
862 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4;
863 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL3;
864 sf->inter_sf.prune_comp_ref_frames = 2;
865 sf->inter_sf.prune_nearest_near_mv_using_refmv_weight =
866 (boosted || allow_screen_content_tools) ? 0 : 1;
867 sf->mv_sf.skip_fullpel_search_using_startmv = boosted ? 0 : 2;
868
869 if (is_720p_or_larger) {
870 sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
871 } else if (is_480p_or_larger) {
872 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
873 }
874
875 if (is_480p_or_larger) {
876 sf->hl_sf.allow_sub_blk_me_in_tf = 1;
877 }
878
879 if (is_1080p_or_larger) {
880 sf->part_sf.default_min_partition_size = BLOCK_8X8;
881 }
882
883 if (is_720p_or_larger) {
884 sf->inter_sf.disable_masked_comp = 1;
885 }
886
887 if (!is_720p_or_larger) {
888 sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW;
889 sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW;
890 }
891
892 if (is_720p_or_larger) {
893 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
894 sf->part_sf.partition_search_breakout_dist_thr = (1 << 28);
895 } else {
896 sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
897 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
898 }
899
900 if (is_720p_or_larger) {
901 sf->inter_sf.prune_ref_mv_idx_search = 2;
902 } else {
903 sf->inter_sf.prune_ref_mv_idx_search = 1;
904 }
905
906 if (!is_720p_or_larger) {
907 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh =
908 is_boosted_arf2_bwd_type ? 450 : 150;
909 }
910
911 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
912
913 sf->hl_sf.recode_tolerance = 55;
914 }
915 }
916
set_good_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)917 static void set_good_speed_features_framesize_independent(
918 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
919 const AV1_COMMON *const cm = &cpi->common;
920 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
921 const int boosted = frame_is_boosted(cpi);
922 const int is_boosted_arf2_bwd_type =
923 boosted || gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
924 const int is_inter_frame =
925 gf_group->frame_type[cpi->gf_frame_index] == INTER_FRAME;
926 const int allow_screen_content_tools =
927 cm->features.allow_screen_content_tools;
928 const int use_hbd = cpi->oxcf.use_highbitdepth;
929 if (!cpi->oxcf.tile_cfg.enable_large_scale_tile) {
930 sf->hl_sf.high_precision_mv_usage = LAST_MV_DATA;
931 }
932
933 // Speed 0 for all speed features that give neutral coding performance change.
934 sf->gm_sf.gm_search_type = boosted ? GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2
935 : GM_SEARCH_CLOSEST_REFS_ONLY;
936 sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1;
937 sf->gm_sf.disable_gm_search_based_on_stats = 1;
938
939 sf->part_sf.less_rectangular_check_level = 1;
940 sf->part_sf.ml_prune_partition = 1;
941 sf->part_sf.prune_ext_partition_types_search_level = 1;
942 sf->part_sf.prune_part4_search = 2;
943 sf->part_sf.simple_motion_search_prune_rect = 1;
944 sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
945 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
946 sf->part_sf.use_best_rd_for_pruning = 1;
947 sf->part_sf.simple_motion_search_prune_agg =
948 allow_screen_content_tools ? NO_PRUNING : SIMPLE_AGG_LVL0;
949
950 // TODO(debargha): Test, tweak and turn on either 1 or 2
951 sf->inter_sf.inter_mode_rd_model_estimation = 1;
952 sf->inter_sf.model_based_post_interp_filter_breakout = 1;
953 sf->inter_sf.prune_compound_using_single_ref = 1;
954 sf->inter_sf.prune_mode_search_simple_translation = 1;
955 sf->inter_sf.prune_ref_frame_for_rect_partitions =
956 (boosted || (allow_screen_content_tools))
957 ? 0
958 : (is_boosted_arf2_bwd_type ? 1 : 2);
959 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 2;
960 sf->inter_sf.selective_ref_frame = 1;
961 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
962
963 sf->interp_sf.use_fast_interpolation_filter_search = 1;
964
965 sf->intra_sf.intra_pruning_with_hog = 1;
966
967 sf->tx_sf.adaptive_txb_search_level = 1;
968 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
969 sf->tx_sf.model_based_prune_tx_search_level = 1;
970 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
971
972 sf->tpl_sf.search_method = NSTEP_8PT;
973
974 sf->rt_sf.use_nonrd_pick_mode = 0;
975 sf->rt_sf.use_real_time_ref_set = 0;
976
977 if (cpi->twopass_frame.fr_content_type == FC_GRAPHICS_ANIMATION ||
978 cpi->use_screen_content_tools) {
979 sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
980 } else {
981 sf->mv_sf.exhaustive_searches_thresh = (1 << 25);
982 }
983
984 sf->rd_sf.perform_coeff_opt = 1;
985 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
986
987 if (speed >= 1) {
988 sf->hl_sf.adjust_num_frames_for_arf_filtering =
989 allow_screen_content_tools ? 0 : 1;
990
991 sf->part_sf.intra_cnn_based_part_prune_level =
992 allow_screen_content_tools ? 0 : 2;
993 sf->part_sf.simple_motion_search_early_term_none = 1;
994 // TODO(Venkat): Clean-up frame type dependency for
995 // simple_motion_search_split in partition search function and set the
996 // speed feature accordingly
997 sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2;
998 sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3;
999
1000 sf->mv_sf.exhaustive_searches_thresh <<= 1;
1001 sf->mv_sf.obmc_full_pixel_search_level = 1;
1002 sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS;
1003 sf->mv_sf.disable_extensive_joint_motion_search = 1;
1004
1005 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 2 : 1;
1006 sf->inter_sf.prune_comp_type_by_comp_avg = 1;
1007 sf->inter_sf.prune_comp_type_by_model_rd = boosted ? 0 : 1;
1008 sf->inter_sf.prune_ref_frame_for_rect_partitions =
1009 (frame_is_intra_only(&cpi->common) || (allow_screen_content_tools))
1010 ? 0
1011 : (boosted ? 1 : 2);
1012 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3;
1013 sf->inter_sf.reuse_inter_intra_mode = 1;
1014 sf->inter_sf.selective_ref_frame = 2;
1015 sf->inter_sf.skip_arf_compound = 1;
1016
1017 sf->interp_sf.use_interp_filter = 1;
1018
1019 sf->intra_sf.prune_palette_search_level = 1;
1020
1021 sf->tx_sf.adaptive_txb_search_level = 2;
1022 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
1023 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
1024 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
1025 sf->tx_sf.model_based_prune_tx_search_level = 0;
1026 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
1027 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
1028 sf->tx_sf.tx_type_search.skip_tx_search = 1;
1029
1030 sf->rd_sf.perform_coeff_opt = boosted ? 2 : 3;
1031 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
1032 sf->rd_sf.tx_domain_dist_thres_level = 1;
1033
1034 sf->lpf_sf.dual_sgr_penalty_level = 1;
1035 sf->lpf_sf.enable_sgr_ep_pruning = 1;
1036
1037 // TODO(any, yunqing): move this feature to speed 0.
1038 sf->tpl_sf.skip_alike_starting_mv = 1;
1039 }
1040
1041 if (speed >= 2) {
1042 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
1043
1044 sf->fp_sf.skip_motion_search_threshold = 25;
1045
1046 sf->gm_sf.num_refinement_steps = 2;
1047
1048 sf->part_sf.reuse_best_prediction_for_part_ab =
1049 !frame_is_intra_only(&cpi->common);
1050
1051 sf->mv_sf.simple_motion_subpel_force_stop = QUARTER_PEL;
1052 sf->mv_sf.subpel_iters_per_step = 1;
1053 sf->mv_sf.reduce_search_range = 1;
1054
1055 // TODO(chiyotsai@google.com): We can get 10% speed up if we move
1056 // adaptive_rd_thresh to speed 1. But currently it performs poorly on some
1057 // clips (e.g. 5% loss on dinner_1080p). We need to examine the sequence a
1058 // bit more closely to figure out why.
1059 sf->inter_sf.adaptive_rd_thresh = 1;
1060 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
1061 sf->inter_sf.fast_interintra_wedge_search = 1;
1062 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1;
1063 sf->inter_sf.prune_ext_comp_using_neighbors = 1;
1064 sf->inter_sf.prune_comp_using_best_single_mode_ref = 2;
1065 sf->inter_sf.prune_comp_type_by_comp_avg = 2;
1066 sf->inter_sf.selective_ref_frame = 3;
1067 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
1068 sf->inter_sf.enable_fast_compound_mode_search = 1;
1069 sf->inter_sf.reuse_mask_search_results = 1;
1070 set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level, boosted ? 0 : 1);
1071 sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 1;
1072 sf->inter_sf.alt_ref_search_fp = 1;
1073
1074 sf->interp_sf.adaptive_interp_filter_search = 1;
1075 sf->interp_sf.disable_dual_filter = 1;
1076
1077 sf->intra_sf.disable_smooth_intra =
1078 !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key > 1);
1079 sf->intra_sf.intra_pruning_with_hog = 2;
1080 sf->intra_sf.skip_intra_in_interframe = is_inter_frame ? 2 : 1;
1081 sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
1082
1083 sf->tpl_sf.prune_starting_mv = 1;
1084 sf->tpl_sf.search_method = DIAMOND;
1085
1086 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 3 : 4;
1087 sf->rd_sf.use_mb_rd_hash = 1;
1088
1089 sf->lpf_sf.prune_wiener_based_on_src_var = 1;
1090 sf->lpf_sf.prune_sgr_based_on_wiener = 1;
1091 sf->lpf_sf.disable_loop_restoration_chroma = boosted ? 0 : 1;
1092 sf->lpf_sf.reduce_wiener_window_size = boosted ? 0 : 1;
1093
1094 // TODO(any): Re-evaluate this feature set to 1 in speed 2.
1095 sf->tpl_sf.allow_compound_pred = 0;
1096 sf->tpl_sf.prune_ref_frames_in_tpl = 1;
1097 }
1098
1099 if (speed >= 3) {
1100 sf->hl_sf.high_precision_mv_usage = CURRENT_Q;
1101
1102 sf->gm_sf.prune_ref_frame_for_gm_search = 1;
1103 sf->gm_sf.prune_zero_mv_with_sse = 1;
1104 sf->gm_sf.num_refinement_steps = 0;
1105
1106 sf->part_sf.less_rectangular_check_level = 2;
1107 sf->part_sf.simple_motion_search_prune_agg =
1108 allow_screen_content_tools
1109 ? SIMPLE_AGG_LVL0
1110 : (boosted ? SIMPLE_AGG_LVL1 : QIDX_BASED_AGG_LVL1);
1111 sf->part_sf.prune_ext_part_using_split_info = 1;
1112 sf->part_sf.simple_motion_search_rect_split = 1;
1113
1114 sf->mv_sf.full_pixel_search_level = 1;
1115 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
1116 sf->mv_sf.search_method = DIAMOND;
1117 sf->mv_sf.disable_second_mv = 2;
1118 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_1;
1119 sf->mv_sf.use_intrabc = 0;
1120
1121 sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
1122 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
1123 sf->inter_sf.disable_onesided_comp = 1;
1124 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
1125 // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2
1126 // and clean-up the speed feature
1127 sf->inter_sf.perform_best_rd_based_gating_for_chroma = 1;
1128 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 1;
1129 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 2;
1130 sf->inter_sf.selective_ref_frame = 5;
1131 sf->inter_sf.reuse_compound_type_decision = 1;
1132 set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level,
1133 boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2));
1134 sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 2;
1135
1136 sf->interp_sf.adaptive_interp_filter_search = 2;
1137
1138 // TODO(chiyotsai@google.com): the thresholds chosen for intra hog are
1139 // inherited directly from luma hog with some minor tweaking. Eventually we
1140 // should run this with a bayesian optimizer to find the Pareto frontier.
1141 sf->intra_sf.chroma_intra_pruning_with_hog = 2;
1142 sf->intra_sf.intra_pruning_with_hog = 3;
1143 sf->intra_sf.prune_palette_search_level = 2;
1144 sf->intra_sf.top_intra_model_count_allowed = 2;
1145
1146 sf->tpl_sf.prune_starting_mv = 2;
1147 sf->tpl_sf.skip_alike_starting_mv = 2;
1148 sf->tpl_sf.prune_intra_modes = 1;
1149 sf->tpl_sf.reduce_first_step_size = 6;
1150 sf->tpl_sf.subpel_force_stop = QUARTER_PEL;
1151 sf->tpl_sf.gop_length_decision_method = 1;
1152
1153 sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3;
1154 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
1155 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
1156
1157 // TODO(any): Refactor the code related to following winner mode speed
1158 // features
1159 sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1;
1160 sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
1161 sf->winner_mode_sf.motion_mode_for_winner_cand =
1162 boosted ? 0
1163 : gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE ? 1
1164 : 2;
1165 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 4;
1166
1167 // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
1168 // loss.
1169 sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2;
1170 sf->lpf_sf.prune_wiener_based_on_src_var = 2;
1171 sf->lpf_sf.use_coarse_filter_level_search =
1172 frame_is_intra_only(&cpi->common) ? 0 : 1;
1173 sf->lpf_sf.use_downsampled_wiener_stats = 1;
1174 }
1175
1176 if (speed >= 4) {
1177 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1178
1179 sf->gm_sf.prune_zero_mv_with_sse = 2;
1180 sf->gm_sf.downsample_level = 1;
1181
1182 sf->part_sf.simple_motion_search_prune_agg =
1183 allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL2;
1184 sf->part_sf.simple_motion_search_reduce_search_steps = 4;
1185 sf->part_sf.prune_ext_part_using_split_info = 2;
1186 sf->part_sf.ml_predict_breakout_level = 3;
1187 sf->part_sf.prune_rectangular_split_based_on_qidx =
1188 (allow_screen_content_tools || frame_is_intra_only(&cpi->common)) ? 0
1189 : 1;
1190
1191 sf->inter_sf.alt_ref_search_fp = 2;
1192 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_DEFAULT] = boosted ? 0 : 3;
1193 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_MOTION_MODE] = boosted ? 0 : 5;
1194 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_COMP_TYPE_MODE] = boosted ? 0 : 3;
1195
1196 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 2;
1197 sf->inter_sf.prune_ext_comp_using_neighbors = 2;
1198 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
1199 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
1200
1201 sf->interp_sf.cb_pred_filter_search = 1;
1202 sf->interp_sf.skip_sharp_interp_filter_search = 1;
1203 sf->interp_sf.use_interp_filter = 2;
1204
1205 sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
1206 sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
1207 sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
1208 // TODO(any): "intra_y_mode_mask" doesn't help much at speed 4.
1209 // sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
1210 // sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
1211 // sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
1212 sf->intra_sf.skip_intra_in_interframe = 4;
1213
1214 sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL;
1215 sf->mv_sf.prune_mesh_search = PRUNE_MESH_SEARCH_LVL_2;
1216
1217 sf->tpl_sf.subpel_force_stop = HALF_PEL;
1218 sf->tpl_sf.search_method = FAST_BIGDIA;
1219 sf->tpl_sf.use_sad_for_mode_decision = 1;
1220
1221 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
1222
1223 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 5 : 7;
1224
1225 // TODO(any): Extend multi-winner mode processing support for inter frames
1226 sf->winner_mode_sf.multi_winner_mode_type =
1227 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_DEFAULT
1228 : MULTI_WINNER_MODE_OFF;
1229 sf->winner_mode_sf.dc_blk_pred_level = boosted ? 0 : 2;
1230
1231 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
1232 }
1233
1234 if (speed >= 5) {
1235 sf->hl_sf.weight_calc_level_in_tf = 1;
1236 sf->hl_sf.adjust_num_frames_for_arf_filtering =
1237 allow_screen_content_tools ? 0 : 2;
1238
1239 sf->fp_sf.reduce_mv_step_param = 4;
1240
1241 sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
1242
1243 sf->part_sf.simple_motion_search_prune_agg =
1244 allow_screen_content_tools ? SIMPLE_AGG_LVL0 : SIMPLE_AGG_LVL3;
1245 sf->part_sf.ext_partition_eval_thresh =
1246 allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
1247 sf->part_sf.prune_sub_8x8_partition_level =
1248 allow_screen_content_tools ? 1 : 2;
1249
1250 sf->mv_sf.warp_search_method = WARP_SEARCH_DIAMOND;
1251
1252 sf->inter_sf.prune_inter_modes_if_skippable = 1;
1253 sf->inter_sf.prune_single_ref = is_boosted_arf2_bwd_type ? 0 : 1;
1254 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_DEFAULT] = boosted ? 0 : 4;
1255 sf->inter_sf.txfm_rd_gate_level[TX_SEARCH_COMP_TYPE_MODE] = boosted ? 0 : 5;
1256 sf->inter_sf.enable_fast_compound_mode_search = 2;
1257
1258 sf->interp_sf.skip_interp_filter_search = boosted ? 0 : 1;
1259
1260 sf->intra_sf.chroma_intra_pruning_with_hog = 3;
1261
1262 // TODO(any): Extend multi-winner mode processing support for inter frames
1263 sf->winner_mode_sf.multi_winner_mode_type =
1264 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_FAST
1265 : MULTI_WINNER_MODE_OFF;
1266
1267 // Disable Self-guided Loop restoration filter.
1268 sf->lpf_sf.disable_sgr_filter = true;
1269 sf->lpf_sf.disable_wiener_coeff_refine_search = true;
1270
1271 sf->tpl_sf.prune_starting_mv = 3;
1272 sf->tpl_sf.use_y_only_rate_distortion = 1;
1273 sf->tpl_sf.subpel_force_stop = FULL_PEL;
1274 sf->tpl_sf.gop_length_decision_method = 2;
1275 sf->tpl_sf.use_sad_for_mode_decision = 2;
1276
1277 sf->winner_mode_sf.dc_blk_pred_level = 2;
1278
1279 sf->fp_sf.disable_recon = 1;
1280 }
1281
1282 if (speed >= 6) {
1283 sf->hl_sf.disable_extra_sc_testing = 1;
1284 sf->hl_sf.second_alt_ref_filtering = 0;
1285
1286 sf->gm_sf.downsample_level = 2;
1287
1288 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 3;
1289 sf->inter_sf.selective_ref_frame = 6;
1290 sf->inter_sf.prune_single_ref = is_boosted_arf2_bwd_type ? 0 : 2;
1291 sf->inter_sf.prune_ext_comp_using_neighbors = 3;
1292
1293 sf->intra_sf.chroma_intra_pruning_with_hog = 4;
1294 sf->intra_sf.intra_pruning_with_hog = 4;
1295 sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC;
1296 sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC;
1297 sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC;
1298 sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC;
1299 sf->intra_sf.early_term_chroma_palette_size_search = 1;
1300
1301 sf->part_sf.prune_rectangular_split_based_on_qidx =
1302 boosted || allow_screen_content_tools ? 0 : 2;
1303
1304 sf->part_sf.prune_part4_search = 3;
1305
1306 sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
1307 sf->mv_sf.use_bsize_dependent_search_method = 1;
1308
1309 sf->tpl_sf.gop_length_decision_method = 3;
1310
1311 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 6 : 8;
1312
1313 sf->winner_mode_sf.dc_blk_pred_level = 3;
1314 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
1315
1316 sf->fp_sf.skip_zeromv_motion_search = 1;
1317 }
1318 }
1319
set_rt_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)1320 static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
1321 SPEED_FEATURES *const sf,
1322 int speed) {
1323 const AV1_COMMON *const cm = &cpi->common;
1324 const int boosted = frame_is_boosted(cpi);
1325 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
1326 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
1327 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
1328 const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
1329
1330 if (!is_360p_or_larger) {
1331 sf->rt_sf.prune_intra_mode_based_on_mv_range = 1;
1332 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1;
1333 if (speed >= 6)
1334 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2;
1335 if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 2;
1336 if (speed >= 7) {
1337 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
1338 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
1339 sf->rt_sf.use_rtc_tf = 2;
1340 }
1341 if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 1;
1342 if (speed >= 8) {
1343 sf->rt_sf.use_nonrd_filter_search = 1;
1344 sf->rt_sf.tx_size_level_based_on_qstep = 1;
1345 }
1346 if (speed >= 9) {
1347 sf->rt_sf.use_comp_ref_nonrd = 0;
1348 sf->rt_sf.nonrd_aggressive_skip = 1;
1349 sf->rt_sf.skip_intra_pred = 1;
1350 // Only turn on enable_ref_short_signaling for low resolution when only
1351 // LAST and GOLDEN ref frames are used.
1352 sf->rt_sf.enable_ref_short_signaling =
1353 (!sf->rt_sf.use_nonrd_altref_frame &&
1354 (!sf->rt_sf.use_comp_ref_nonrd ||
1355 (!sf->rt_sf.ref_frame_comp_nonrd[1] &&
1356 !sf->rt_sf.ref_frame_comp_nonrd[2])));
1357
1358 // TODO(kyslov) Re-enable when AV1 models are trained
1359 #if 0
1360 #if CONFIG_RT_ML_PARTITIONING
1361 if (!frame_is_intra_only(cm)) {
1362 sf->part_sf.partition_search_type = ML_BASED_PARTITION;
1363 sf->rt_sf.reuse_inter_pred_nonrd = 0;
1364 }
1365 #endif
1366 #endif
1367 sf->rt_sf.use_adaptive_subpel_search = false;
1368 }
1369 if (speed >= 10) {
1370 // TODO(yunqingwang@google.com): To be conservative, disable
1371 // sf->rt_sf.estimate_motion_for_var_based_partition = 3 for speed 10/qvga
1372 // for now. May enable it in the future.
1373 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1374 sf->rt_sf.skip_intra_pred = 2;
1375 sf->rt_sf.hybrid_intra_pickmode = 3;
1376 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
1377 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
1378 sf->rt_sf.use_nonrd_filter_search = 0;
1379 }
1380 } else {
1381 sf->rt_sf.prune_intra_mode_based_on_mv_range = 2;
1382 sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
1383 if (speed <= 5) {
1384 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh =
1385 boosted ? INT_MAX : 350;
1386 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2;
1387 }
1388 if (speed == 6) sf->part_sf.disable_8x8_part_based_on_qidx = 1;
1389 if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 2;
1390 if (speed == 7) {
1391 sf->rt_sf.prefer_large_partition_blocks = 1;
1392 // Enable this feature for [360p, 720p] resolution range initially.
1393 // Only enable for low bitdepth to mitigate issue: b/303023614.
1394 if (!cpi->rc.rtc_external_ratectrl &&
1395 AOMMIN(cm->width, cm->height) <= 720 && !cpi->oxcf.use_highbitdepth)
1396 sf->hl_sf.accurate_bit_estimate = cpi->oxcf.q_cfg.aq_mode == NO_AQ;
1397 }
1398 if (speed >= 7) {
1399 sf->rt_sf.use_rtc_tf = 1;
1400 }
1401 if (speed == 8 && !cpi->ppi->use_svc) {
1402 sf->rt_sf.short_circuit_low_temp_var = 0;
1403 sf->rt_sf.use_nonrd_altref_frame = 1;
1404 }
1405 if (speed >= 8) sf->rt_sf.tx_size_level_based_on_qstep = 2;
1406 if (speed >= 9) {
1407 sf->rt_sf.gf_length_lvl = 1;
1408 sf->rt_sf.skip_cdef_sb = 1;
1409 sf->rt_sf.sad_based_adp_altref_lag = 2;
1410 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
1411 sf->rt_sf.use_adaptive_subpel_search = true;
1412 sf->interp_sf.cb_pred_filter_search = 1;
1413 }
1414 if (speed >= 10) {
1415 sf->rt_sf.hybrid_intra_pickmode = 2;
1416 sf->rt_sf.sad_based_adp_altref_lag = 4;
1417 sf->rt_sf.tx_size_level_based_on_qstep = 0;
1418 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1419 sf->rt_sf.use_adaptive_subpel_search = false;
1420 sf->interp_sf.cb_pred_filter_search = 2;
1421 }
1422 }
1423 if (!is_480p_or_larger) {
1424 if (speed == 7) {
1425 sf->rt_sf.nonrd_check_partition_merge_mode = 2;
1426 }
1427 }
1428 if (!is_720p_or_larger) {
1429 if (speed >= 9) {
1430 sf->rt_sf.force_large_partition_blocks_intra = 1;
1431 }
1432 } else {
1433 if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 3;
1434 if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 0;
1435 if (speed >= 7) {
1436 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 2;
1437 sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
1438 }
1439 if (speed >= 9) {
1440 sf->rt_sf.sad_based_adp_altref_lag = 1;
1441 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 0;
1442 sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
1443 }
1444 if (speed >= 10) {
1445 sf->rt_sf.sad_based_adp_altref_lag = 3;
1446 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1447 }
1448 }
1449 // TODO(Any): Check/Tune settings of other sfs for 1080p.
1450 if (is_1080p_or_larger) {
1451 if (speed >= 7) {
1452 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
1453 sf->rt_sf.use_adaptive_subpel_search = 0;
1454 }
1455 if (speed >= 9) sf->interp_sf.cb_pred_filter_search = 0;
1456 } else {
1457 if (speed >= 9) sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
1458 if (speed >= 10) sf->rt_sf.nonrd_aggressive_skip = 1;
1459 }
1460 // TODO(marpan): Tune settings for speed 11 video mode,
1461 // for resolutions below 720p.
1462 if (speed >= 11 && !is_720p_or_larger &&
1463 cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) {
1464 sf->rt_sf.skip_cdef_sb = 2;
1465 sf->rt_sf.force_only_last_ref = 1;
1466 sf->rt_sf.selective_cdf_update = 1;
1467 sf->rt_sf.use_nonrd_filter_search = 0;
1468 if (is_360p_or_larger) {
1469 sf->part_sf.fixed_partition_size = BLOCK_32X32;
1470 sf->rt_sf.use_fast_fixed_part = 1;
1471 sf->mv_sf.subpel_force_stop = HALF_PEL;
1472 }
1473 sf->rt_sf.increase_source_sad_thresh = 1;
1474 sf->rt_sf.part_early_exit_zeromv = 2;
1475 sf->rt_sf.set_zeromv_skip_based_on_source_sad = 2;
1476 for (int i = 0; i < BLOCK_SIZES; ++i) {
1477 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1478 }
1479 sf->rt_sf.hybrid_intra_pickmode = 0;
1480 }
1481 // Setting for SVC, or when the ref_frame_config control is
1482 // used to set the reference structure.
1483 if (cpi->ppi->use_svc || cpi->ppi->rtc_ref.set_ref_frame_config) {
1484 const RTC_REF *const rtc_ref = &cpi->ppi->rtc_ref;
1485 // For SVC: for greater than 2 temporal layers, use better mv search on
1486 // base temporal layers, and only on base spatial layer if highest
1487 // resolution is above 640x360.
1488 if (cpi->svc.number_temporal_layers >= 2 &&
1489 cpi->svc.temporal_layer_id == 0 &&
1490 (cpi->svc.spatial_layer_id == 0 ||
1491 cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
1492 640 * 360)) {
1493 sf->mv_sf.search_method = NSTEP;
1494 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
1495 sf->rt_sf.fullpel_search_step_param = 10;
1496 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
1497 if (cm->width * cm->height <= 352 * 288)
1498 sf->rt_sf.nonrd_prune_ref_frame_search = 2;
1499 sf->rt_sf.force_large_partition_blocks_intra = 0;
1500 }
1501 if (speed >= 8) {
1502 if (cpi->svc.number_temporal_layers > 2)
1503 sf->rt_sf.disable_cdf_update_non_reference_frame = true;
1504 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1505 if (rtc_ref->non_reference_frame) {
1506 sf->rt_sf.nonrd_aggressive_skip = 1;
1507 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1508 }
1509 }
1510 if (speed <= 9 && cpi->svc.number_temporal_layers > 2 &&
1511 cpi->svc.temporal_layer_id == 0)
1512 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = false;
1513 else
1514 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
1515 sf->rt_sf.frame_level_mode_cost_update = false;
1516
1517 // Compound mode enabling.
1518 if (rtc_ref->ref_frame_comp[0] || rtc_ref->ref_frame_comp[1] ||
1519 rtc_ref->ref_frame_comp[2]) {
1520 sf->rt_sf.use_comp_ref_nonrd = 1;
1521 sf->rt_sf.ref_frame_comp_nonrd[0] =
1522 rtc_ref->ref_frame_comp[0] && rtc_ref->reference[GOLDEN_FRAME - 1];
1523 sf->rt_sf.ref_frame_comp_nonrd[1] =
1524 rtc_ref->ref_frame_comp[1] && rtc_ref->reference[LAST2_FRAME - 1];
1525 sf->rt_sf.ref_frame_comp_nonrd[2] =
1526 rtc_ref->ref_frame_comp[2] && rtc_ref->reference[ALTREF_FRAME - 1];
1527 } else {
1528 sf->rt_sf.use_comp_ref_nonrd = 0;
1529 }
1530
1531 if (cpi->svc.number_spatial_layers > 1 ||
1532 cpi->svc.number_temporal_layers > 1)
1533 sf->hl_sf.accurate_bit_estimate = 0;
1534
1535 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
1536
1537 // For single layers RPS: bias/adjustment for recovery frame.
1538 if (cpi->ppi->rtc_ref.bias_recovery_frame) {
1539 sf->mv_sf.search_method = NSTEP;
1540 sf->mv_sf.subpel_search_method = SUBPEL_TREE;
1541 sf->rt_sf.fullpel_search_step_param = 8;
1542 sf->rt_sf.nonrd_aggressive_skip = 0;
1543 }
1544 }
1545 // Screen settings.
1546 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
1547 // TODO(marpan): Check settings for speed 7 and 8.
1548 if (speed >= 7) {
1549 sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
1550 sf->mv_sf.use_bsize_dependent_search_method = 0;
1551 sf->rt_sf.skip_cdef_sb = 1;
1552 sf->rt_sf.increase_color_thresh_palette = 1;
1553 if (!frame_is_intra_only(cm)) sf->rt_sf.dct_only_palette_nonrd = 1;
1554 }
1555 if (speed >= 8) {
1556 sf->rt_sf.nonrd_check_partition_merge_mode = 3;
1557 sf->rt_sf.nonrd_prune_ref_frame_search = 1;
1558 sf->rt_sf.use_nonrd_filter_search = 0;
1559 sf->rt_sf.prune_hv_pred_modes_using_src_sad = false;
1560 }
1561 if (speed >= 9) {
1562 sf->rt_sf.prune_idtx_nonrd = 1;
1563 sf->rt_sf.part_early_exit_zeromv = 2;
1564 sf->rt_sf.skip_lf_screen = 1;
1565 sf->rt_sf.nonrd_prune_ref_frame_search = 3;
1566 sf->rt_sf.var_part_split_threshold_shift = 10;
1567 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1568 sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
1569 sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
1570 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
1571 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
1572 sf->interp_sf.cb_pred_filter_search = 0;
1573 }
1574 if (speed >= 10) {
1575 if (cm->width * cm->height > 1920 * 1080)
1576 sf->part_sf.disable_8x8_part_based_on_qidx = 1;
1577 sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80;
1578 sf->rt_sf.part_early_exit_zeromv = 1;
1579 sf->rt_sf.nonrd_aggressive_skip = 1;
1580 sf->rt_sf.thresh_active_maps_skip_lf_cdef = 90;
1581 }
1582 if (speed >= 11) {
1583 sf->rt_sf.skip_lf_screen = 2;
1584 sf->rt_sf.skip_cdef_sb = 2;
1585 sf->rt_sf.part_early_exit_zeromv = 2;
1586 sf->rt_sf.prune_palette_nonrd = 1;
1587 sf->rt_sf.increase_color_thresh_palette = 0;
1588 }
1589 sf->rt_sf.use_nonrd_altref_frame = 0;
1590 sf->rt_sf.use_rtc_tf = 0;
1591 sf->rt_sf.use_comp_ref_nonrd = 0;
1592 sf->rt_sf.source_metrics_sb_nonrd = 1;
1593 if (cpi->rc.high_source_sad == 1) {
1594 sf->rt_sf.prefer_large_partition_blocks = 0;
1595 sf->part_sf.max_intra_bsize = BLOCK_128X128;
1596 for (int i = 0; i < BLOCK_SIZES; ++i) {
1597 if (i > BLOCK_32X32)
1598 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1599 else
1600 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
1601 }
1602 }
1603 if (cpi->rc.max_block_source_sad > 20000 &&
1604 cpi->rc.frame_source_sad > 100 && speed >= 6 &&
1605 (cpi->rc.percent_blocks_with_motion > 1 ||
1606 cpi->svc.last_layer_dropped[0])) {
1607 sf->mv_sf.search_method = NSTEP;
1608 sf->rt_sf.fullpel_search_step_param = 2;
1609 }
1610 sf->rt_sf.partition_direct_merging = 0;
1611 sf->hl_sf.accurate_bit_estimate = 0;
1612 // This feature is for nonrd_pickmode.
1613 if (sf->rt_sf.use_nonrd_pick_mode)
1614 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
1615 else
1616 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1617 }
1618 if (is_lossless_requested(&cpi->oxcf.rc_cfg)) {
1619 sf->rt_sf.use_rtc_tf = 0;
1620 // TODO(aomedia:3412): The setting accurate_bit_estimate = 0
1621 // can be removed once it's fixed for lossless mode.
1622 sf->hl_sf.accurate_bit_estimate = 0;
1623 }
1624 if (cpi->oxcf.use_highbitdepth) {
1625 // Disable for use_highbitdepth = 1 to mitigate issue: b/303023614.
1626 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1627 }
1628 if (cpi->oxcf.superres_cfg.enable_superres) {
1629 sf->rt_sf.use_rtc_tf = 0;
1630 sf->rt_sf.nonrd_prune_ref_frame_search = 1;
1631 }
1632 // rtc_tf feature allocates new source because of possible
1633 // temporal filtering which may change the input source during encoding:
1634 // this causes an issue on resized frames when psnr is calculated,
1635 // so disable it here for frames that are resized (encoding width/height
1636 // different from configured width/height).
1637 if (is_psnr_calc_enabled(cpi) && (cpi->oxcf.frm_dim_cfg.width != cm->width ||
1638 cpi->oxcf.frm_dim_cfg.height != cm->height))
1639 sf->rt_sf.use_rtc_tf = 0;
1640 }
1641
1642 // TODO(kyslov): now this is very similar to
1643 // set_good_speed_features_framesize_independent
1644 // except it sets non-rd flag on speed 8. This function will likely
1645 // be modified in the future with RT-specific speed features.
set_rt_speed_features_framesize_independent(AV1_COMP * cpi,SPEED_FEATURES * sf,int speed)1646 static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
1647 SPEED_FEATURES *sf,
1648 int speed) {
1649 AV1_COMMON *const cm = &cpi->common;
1650 const int boosted = frame_is_boosted(cpi);
1651
1652 // Currently, rt speed 0, 1, 2, 3, 4, 5 are the same.
1653 // Following set of speed features are not impacting encoder's decisions as
1654 // the relevant tools are disabled by default.
1655 sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
1656 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
1657 sf->inter_sf.reuse_inter_intra_mode = 1;
1658 sf->inter_sf.prune_compound_using_single_ref = 0;
1659 sf->inter_sf.prune_comp_search_by_single_result = 2;
1660 sf->inter_sf.prune_comp_type_by_comp_avg = 2;
1661 sf->inter_sf.fast_wedge_sign_estimate = 1;
1662 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
1663 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
1664 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
1665 sf->interp_sf.cb_pred_filter_search = 0;
1666 sf->interp_sf.skip_interp_filter_search = 1;
1667 sf->part_sf.ml_prune_partition = 1;
1668 sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
1669 sf->part_sf.prune_ext_partition_types_search_level = 2;
1670 sf->part_sf.less_rectangular_check_level = 2;
1671 sf->mv_sf.obmc_full_pixel_search_level = 1;
1672 sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
1673 sf->tx_sf.model_based_prune_tx_search_level = 0;
1674 sf->lpf_sf.dual_sgr_penalty_level = 1;
1675 // Disable Wiener and Self-guided Loop restoration filters.
1676 sf->lpf_sf.disable_wiener_filter = true;
1677 sf->lpf_sf.disable_sgr_filter = true;
1678 sf->intra_sf.prune_palette_search_level = 2;
1679 sf->intra_sf.prune_luma_palette_size_search_level = 2;
1680 sf->intra_sf.early_term_chroma_palette_size_search = 1;
1681
1682 // End of set
1683
1684 // TODO(any, yunqing): tune these features for real-time use cases.
1685 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_SOLO;
1686 sf->hl_sf.frame_parameter_update = 0;
1687
1688 sf->inter_sf.model_based_post_interp_filter_breakout = 1;
1689 // TODO(any): As per the experiments, this speed feature is doing redundant
1690 // computation since the model rd based pruning logic is similar to model rd
1691 // based gating when inter_mode_rd_model_estimation = 2. Enable this SF if
1692 // either of the condition becomes true.
1693 // (1) inter_mode_rd_model_estimation != 2
1694 // (2) skip_interp_filter_search == 0
1695 // (3) Motion mode or compound mode is enabled */
1696 sf->inter_sf.prune_mode_search_simple_translation = 0;
1697 sf->inter_sf.prune_ref_frame_for_rect_partitions = !boosted;
1698 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
1699 sf->inter_sf.selective_ref_frame = 4;
1700 sf->inter_sf.alt_ref_search_fp = 2;
1701 set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level, boosted ? 0 : 4);
1702 sf->inter_sf.limit_txfm_eval_per_mode = 3;
1703
1704 sf->inter_sf.adaptive_rd_thresh = 4;
1705 sf->inter_sf.inter_mode_rd_model_estimation = 2;
1706 sf->inter_sf.prune_inter_modes_if_skippable = 1;
1707 sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL3;
1708 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3;
1709 sf->inter_sf.skip_newmv_in_drl = 4;
1710
1711 sf->interp_sf.use_fast_interpolation_filter_search = 1;
1712 sf->interp_sf.use_interp_filter = 1;
1713 sf->interp_sf.adaptive_interp_filter_search = 1;
1714 sf->interp_sf.disable_dual_filter = 1;
1715
1716 sf->part_sf.default_max_partition_size = BLOCK_128X128;
1717 sf->part_sf.default_min_partition_size = BLOCK_8X8;
1718 sf->part_sf.use_best_rd_for_pruning = 1;
1719 sf->part_sf.early_term_after_none_split = 1;
1720 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
1721 sf->part_sf.max_intra_bsize = BLOCK_16X16;
1722 sf->part_sf.partition_search_breakout_rate_thr = 500;
1723 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
1724 sf->part_sf.adjust_var_based_rd_partitioning = 2;
1725
1726 sf->mv_sf.full_pixel_search_level = 1;
1727 sf->mv_sf.exhaustive_searches_thresh = INT_MAX;
1728 sf->mv_sf.auto_mv_step_size = 1;
1729 sf->mv_sf.subpel_iters_per_step = 1;
1730 sf->mv_sf.use_accurate_subpel_search = USE_2_TAPS;
1731 sf->mv_sf.search_method = FAST_DIAMOND;
1732 sf->mv_sf.subpel_force_stop = EIGHTH_PEL;
1733 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
1734
1735 for (int i = 0; i < TX_SIZES; ++i) {
1736 sf->intra_sf.intra_y_mode_mask[i] = INTRA_DC;
1737 sf->intra_sf.intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
1738 }
1739 sf->intra_sf.skip_intra_in_interframe = 5;
1740 sf->intra_sf.disable_smooth_intra = 1;
1741 sf->intra_sf.skip_filter_intra_in_inter_frames = 1;
1742
1743 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
1744 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
1745 sf->tx_sf.adaptive_txb_search_level = 2;
1746 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
1747 sf->tx_sf.tx_size_search_lgr_block = 1;
1748 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
1749 sf->tx_sf.tx_type_search.skip_tx_search = 1;
1750 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
1751 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
1752 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
1753 sf->tx_sf.refine_fast_tx_search_results = 0;
1754 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
1755 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
1756 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4;
1757
1758 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
1759 sf->rd_sf.simple_model_rd_from_var = 1;
1760 sf->rd_sf.tx_domain_dist_level = 2;
1761 sf->rd_sf.tx_domain_dist_thres_level = 2;
1762
1763 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
1764 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
1765
1766 sf->winner_mode_sf.dc_blk_pred_level = frame_is_intra_only(cm) ? 0 : 3;
1767 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
1768 sf->winner_mode_sf.tx_size_search_level = 1;
1769 sf->winner_mode_sf.winner_mode_ifs = 1;
1770
1771 sf->rt_sf.check_intra_pred_nonrd = 1;
1772 sf->rt_sf.estimate_motion_for_var_based_partition = 2;
1773 sf->rt_sf.hybrid_intra_pickmode = 1;
1774 sf->rt_sf.use_comp_ref_nonrd = 0;
1775 sf->rt_sf.ref_frame_comp_nonrd[0] = 0;
1776 sf->rt_sf.ref_frame_comp_nonrd[1] = 0;
1777 sf->rt_sf.ref_frame_comp_nonrd[2] = 0;
1778 sf->rt_sf.use_nonrd_filter_search = 1;
1779 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
1780 sf->rt_sf.num_inter_modes_for_tx_search = 5;
1781 sf->rt_sf.prune_inter_modes_using_temp_var = 1;
1782 sf->rt_sf.use_real_time_ref_set = 1;
1783 sf->rt_sf.use_simple_rd_model = 1;
1784 sf->rt_sf.prune_inter_modes_with_golden_ref = boosted ? 0 : 1;
1785 // TODO(any): This sf could be removed.
1786 sf->rt_sf.short_circuit_low_temp_var = 1;
1787 sf->rt_sf.check_scene_detection = 1;
1788 if (cpi->rc.rtc_external_ratectrl) sf->rt_sf.check_scene_detection = 0;
1789 if (cm->current_frame.frame_type != KEY_FRAME &&
1790 cpi->oxcf.rc_cfg.mode == AOM_CBR)
1791 sf->rt_sf.overshoot_detection_cbr = FAST_DETECTION_MAXQ;
1792 // Enable noise estimation only for high resolutions for now.
1793 //
1794 // Since use_temporal_noise_estimate has no effect for all-intra frame
1795 // encoding, it is disabled for this case.
1796 if (cpi->oxcf.kf_cfg.key_freq_max != 0 && cm->width * cm->height > 640 * 480)
1797 sf->rt_sf.use_temporal_noise_estimate = 1;
1798 sf->rt_sf.skip_tx_no_split_var_based_partition = 1;
1799 sf->rt_sf.skip_newmv_mode_based_on_sse = 1;
1800 sf->rt_sf.mode_search_skip_flags =
1801 (cm->current_frame.frame_type == KEY_FRAME)
1802 ? 0
1803 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
1804 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
1805 FLAG_EARLY_TERMINATE;
1806 sf->rt_sf.var_part_split_threshold_shift = 5;
1807 if (!frame_is_intra_only(&cpi->common)) sf->rt_sf.var_part_based_on_qidx = 1;
1808 sf->rt_sf.use_fast_fixed_part = 0;
1809 sf->rt_sf.increase_source_sad_thresh = 0;
1810
1811 if (speed >= 6) {
1812 sf->mv_sf.use_fullpel_costlist = 1;
1813
1814 sf->rd_sf.tx_domain_dist_thres_level = 3;
1815
1816 sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh = 0;
1817 sf->inter_sf.limit_inter_mode_cands = 4;
1818 sf->inter_sf.prune_warped_prob_thresh = 8;
1819 sf->inter_sf.extra_prune_warped = 1;
1820
1821 sf->rt_sf.gf_refresh_based_on_qp = 1;
1822 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1;
1823 sf->rt_sf.var_part_split_threshold_shift = 7;
1824 if (!frame_is_intra_only(&cpi->common))
1825 sf->rt_sf.var_part_based_on_qidx = 2;
1826
1827 sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 3;
1828 }
1829
1830 if (speed >= 7) {
1831 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_1;
1832 sf->rt_sf.use_comp_ref_nonrd = 1;
1833 sf->rt_sf.ref_frame_comp_nonrd[2] = 1; // LAST_ALTREF
1834 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 2;
1835 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
1836 sf->part_sf.max_intra_bsize = BLOCK_32X32;
1837
1838 sf->mv_sf.search_method = FAST_DIAMOND;
1839 sf->mv_sf.subpel_force_stop = QUARTER_PEL;
1840
1841 sf->inter_sf.inter_mode_rd_model_estimation = 2;
1842 // This sf is not applicable in non-rd path.
1843 sf->inter_sf.skip_newmv_in_drl = 0;
1844
1845 sf->interp_sf.skip_interp_filter_search = 0;
1846
1847 // Disable intra_y_mode_mask pruning since the performance at speed 7 isn't
1848 // good. May need more study.
1849 for (int i = 0; i < TX_SIZES; ++i) {
1850 sf->intra_sf.intra_y_mode_mask[i] = INTRA_ALL;
1851 }
1852
1853 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
1854 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL5;
1855
1856 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
1857 sf->rt_sf.nonrd_prune_ref_frame_search = 1;
1858 // This is for rd path only.
1859 sf->rt_sf.prune_inter_modes_using_temp_var = 0;
1860 sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
1861 sf->rt_sf.prune_intra_mode_based_on_mv_range = 0;
1862 #if !CONFIG_REALTIME_ONLY
1863 sf->rt_sf.reuse_inter_pred_nonrd =
1864 (cpi->oxcf.motion_mode_cfg.enable_warped_motion == 0);
1865 #else
1866 sf->rt_sf.reuse_inter_pred_nonrd = 1;
1867 #endif
1868 #if CONFIG_AV1_TEMPORAL_DENOISING
1869 sf->rt_sf.reuse_inter_pred_nonrd = (cpi->oxcf.noise_sensitivity == 0);
1870 #endif
1871 sf->rt_sf.short_circuit_low_temp_var = 0;
1872 // For spatial layers, only LAST and GOLDEN are currently used in the SVC
1873 // for nonrd. The flag use_nonrd_altref_frame can disable GOLDEN in the
1874 // get_ref_frame_flags() for some patterns, so disable it here for
1875 // spatial layers.
1876 sf->rt_sf.use_nonrd_altref_frame =
1877 (cpi->svc.number_spatial_layers > 1) ? 0 : 1;
1878 sf->rt_sf.use_nonrd_pick_mode = 1;
1879 sf->rt_sf.nonrd_check_partition_merge_mode = 3;
1880 sf->rt_sf.skip_intra_pred = 1;
1881 sf->rt_sf.source_metrics_sb_nonrd = 1;
1882 // Set mask for intra modes.
1883 for (int i = 0; i < BLOCK_SIZES; ++i)
1884 if (i >= BLOCK_32X32)
1885 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1886 else
1887 // Use DC, H, V intra mode for block sizes < 32X32.
1888 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
1889
1890 sf->winner_mode_sf.dc_blk_pred_level = 0;
1891 sf->rt_sf.var_part_based_on_qidx = 3;
1892 sf->rt_sf.prune_compoundmode_with_singlecompound_var = true;
1893 sf->rt_sf.prune_compoundmode_with_singlemode_var = true;
1894 sf->rt_sf.skip_compound_based_on_var = true;
1895 sf->rt_sf.use_adaptive_subpel_search = true;
1896 }
1897
1898 if (speed >= 8) {
1899 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_2;
1900 sf->intra_sf.intra_pruning_with_hog = 1;
1901 sf->rt_sf.short_circuit_low_temp_var = 1;
1902 sf->rt_sf.use_nonrd_altref_frame = 0;
1903 sf->rt_sf.nonrd_prune_ref_frame_search = 2;
1904 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
1905 sf->rt_sf.var_part_split_threshold_shift = 8;
1906 sf->rt_sf.var_part_based_on_qidx = 4;
1907 sf->rt_sf.partition_direct_merging = 1;
1908 sf->rt_sf.prune_compoundmode_with_singlemode_var = false;
1909 sf->mv_sf.use_bsize_dependent_search_method = 2;
1910 sf->rt_sf.prune_hv_pred_modes_using_src_sad = true;
1911 }
1912 if (speed >= 9) {
1913 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
1914 sf->rt_sf.estimate_motion_for_var_based_partition = 3;
1915 sf->rt_sf.prefer_large_partition_blocks = 3;
1916 sf->rt_sf.skip_intra_pred = 2;
1917 sf->rt_sf.var_part_split_threshold_shift = 9;
1918 for (int i = 0; i < BLOCK_SIZES; ++i)
1919 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1920 sf->rt_sf.var_part_based_on_qidx = 0;
1921 sf->rt_sf.frame_level_mode_cost_update = true;
1922 sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
1923 sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
1924 sf->rt_sf.use_adaptive_subpel_search = true;
1925 sf->mv_sf.use_bsize_dependent_search_method = 0;
1926 }
1927 if (speed >= 10) {
1928 sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_4;
1929 sf->rt_sf.nonrd_prune_ref_frame_search = 3;
1930 sf->rt_sf.var_part_split_threshold_shift = 10;
1931 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1932 }
1933 if (speed >= 11 && !frame_is_intra_only(cm) &&
1934 cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
1935 sf->winner_mode_sf.dc_blk_pred_level = 3;
1936 }
1937 }
1938
init_hl_sf(HIGH_LEVEL_SPEED_FEATURES * hl_sf)1939 static AOM_INLINE void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {
1940 // best quality defaults
1941 hl_sf->frame_parameter_update = 1;
1942 hl_sf->recode_loop = ALLOW_RECODE;
1943 // Recode loop tolerance %.
1944 hl_sf->recode_tolerance = 25;
1945 hl_sf->high_precision_mv_usage = CURRENT_Q;
1946 hl_sf->superres_auto_search_type = SUPERRES_AUTO_ALL;
1947 hl_sf->disable_extra_sc_testing = 0;
1948 hl_sf->second_alt_ref_filtering = 1;
1949 hl_sf->adjust_num_frames_for_arf_filtering = 0;
1950 hl_sf->accurate_bit_estimate = 0;
1951 hl_sf->weight_calc_level_in_tf = 0;
1952 hl_sf->allow_sub_blk_me_in_tf = 0;
1953 }
1954
init_fp_sf(FIRST_PASS_SPEED_FEATURES * fp_sf)1955 static AOM_INLINE void init_fp_sf(FIRST_PASS_SPEED_FEATURES *fp_sf) {
1956 fp_sf->reduce_mv_step_param = 3;
1957 fp_sf->skip_motion_search_threshold = 0;
1958 fp_sf->disable_recon = 0;
1959 fp_sf->skip_zeromv_motion_search = 0;
1960 }
1961
init_tpl_sf(TPL_SPEED_FEATURES * tpl_sf)1962 static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
1963 tpl_sf->gop_length_decision_method = 0;
1964 tpl_sf->prune_intra_modes = 0;
1965 tpl_sf->prune_starting_mv = 0;
1966 tpl_sf->reduce_first_step_size = 0;
1967 tpl_sf->skip_alike_starting_mv = 0;
1968 tpl_sf->subpel_force_stop = EIGHTH_PEL;
1969 tpl_sf->search_method = NSTEP;
1970 tpl_sf->prune_ref_frames_in_tpl = 0;
1971 tpl_sf->allow_compound_pred = 1;
1972 tpl_sf->use_y_only_rate_distortion = 0;
1973 tpl_sf->use_sad_for_mode_decision = 0;
1974 tpl_sf->reduce_num_frames = 0;
1975 }
1976
init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES * gm_sf)1977 static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
1978 gm_sf->gm_search_type = GM_FULL_SEARCH;
1979 gm_sf->prune_ref_frame_for_gm_search = 0;
1980 gm_sf->prune_zero_mv_with_sse = 0;
1981 gm_sf->disable_gm_search_based_on_stats = 0;
1982 gm_sf->downsample_level = 0;
1983 gm_sf->num_refinement_steps = GM_MAX_REFINEMENT_STEPS;
1984 }
1985
init_part_sf(PARTITION_SPEED_FEATURES * part_sf)1986 static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
1987 part_sf->partition_search_type = SEARCH_PARTITION;
1988 part_sf->less_rectangular_check_level = 0;
1989 part_sf->use_square_partition_only_threshold = BLOCK_128X128;
1990 part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
1991 part_sf->default_max_partition_size = BLOCK_LARGEST;
1992 part_sf->default_min_partition_size = BLOCK_4X4;
1993 part_sf->adjust_var_based_rd_partitioning = 0;
1994 part_sf->max_intra_bsize = BLOCK_LARGEST;
1995 // This setting only takes effect when partition_search_type is set
1996 // to FIXED_PARTITION.
1997 part_sf->fixed_partition_size = BLOCK_16X16;
1998 // Recode loop tolerance %.
1999 part_sf->partition_search_breakout_dist_thr = 0;
2000 part_sf->partition_search_breakout_rate_thr = 0;
2001 part_sf->prune_ext_partition_types_search_level = 0;
2002 part_sf->prune_part4_search = 0;
2003 part_sf->ml_prune_partition = 0;
2004 part_sf->ml_early_term_after_part_split_level = 0;
2005 for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
2006 part_sf->ml_partition_search_breakout_thresh[i] =
2007 -1; // -1 means not enabled.
2008 }
2009 part_sf->simple_motion_search_prune_agg = SIMPLE_AGG_LVL0;
2010 part_sf->simple_motion_search_split = 0;
2011 part_sf->simple_motion_search_prune_rect = 0;
2012 part_sf->simple_motion_search_early_term_none = 0;
2013 part_sf->simple_motion_search_reduce_search_steps = 0;
2014 part_sf->intra_cnn_based_part_prune_level = 0;
2015 part_sf->ext_partition_eval_thresh = BLOCK_8X8;
2016 part_sf->rect_partition_eval_thresh = BLOCK_128X128;
2017 part_sf->ext_part_eval_based_on_cur_best = 0;
2018 part_sf->prune_ext_part_using_split_info = 0;
2019 part_sf->prune_rectangular_split_based_on_qidx = 0;
2020 part_sf->prune_rect_part_using_4x4_var_deviation = false;
2021 part_sf->prune_rect_part_using_none_pred_mode = false;
2022 part_sf->early_term_after_none_split = 0;
2023 part_sf->ml_predict_breakout_level = 0;
2024 part_sf->prune_sub_8x8_partition_level = 0;
2025 part_sf->simple_motion_search_rect_split = 0;
2026 part_sf->reuse_prev_rd_results_for_part_ab = 0;
2027 part_sf->reuse_best_prediction_for_part_ab = 0;
2028 part_sf->use_best_rd_for_pruning = 0;
2029 part_sf->skip_non_sq_part_based_on_none = 0;
2030 part_sf->disable_8x8_part_based_on_qidx = 0;
2031 }
2032
init_mv_sf(MV_SPEED_FEATURES * mv_sf)2033 static AOM_INLINE void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
2034 mv_sf->full_pixel_search_level = 0;
2035 mv_sf->auto_mv_step_size = 0;
2036 mv_sf->exhaustive_searches_thresh = 0;
2037 mv_sf->obmc_full_pixel_search_level = 0;
2038 mv_sf->prune_mesh_search = PRUNE_MESH_SEARCH_DISABLED;
2039 mv_sf->reduce_search_range = 0;
2040 mv_sf->search_method = NSTEP;
2041 mv_sf->simple_motion_subpel_force_stop = EIGHTH_PEL;
2042 mv_sf->subpel_force_stop = EIGHTH_PEL;
2043 mv_sf->subpel_iters_per_step = 2;
2044 mv_sf->subpel_search_method = SUBPEL_TREE;
2045 mv_sf->use_accurate_subpel_search = USE_8_TAPS;
2046 mv_sf->use_bsize_dependent_search_method = 0;
2047 mv_sf->use_fullpel_costlist = 0;
2048 mv_sf->use_downsampled_sad = 0;
2049 mv_sf->disable_extensive_joint_motion_search = 0;
2050 mv_sf->disable_second_mv = 0;
2051 mv_sf->skip_fullpel_search_using_startmv = 0;
2052 mv_sf->warp_search_method = WARP_SEARCH_SQUARE;
2053 mv_sf->warp_search_iters = 8;
2054 mv_sf->use_intrabc = 1;
2055 }
2056
init_inter_sf(INTER_MODE_SPEED_FEATURES * inter_sf)2057 static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
2058 inter_sf->adaptive_rd_thresh = 0;
2059 inter_sf->model_based_post_interp_filter_breakout = 0;
2060 inter_sf->reduce_inter_modes = 0;
2061 inter_sf->alt_ref_search_fp = 0;
2062 inter_sf->prune_single_ref = 0;
2063 inter_sf->prune_comp_ref_frames = 0;
2064 inter_sf->selective_ref_frame = 0;
2065 inter_sf->prune_ref_frame_for_rect_partitions = 0;
2066 inter_sf->fast_wedge_sign_estimate = 0;
2067 inter_sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED;
2068 inter_sf->reuse_inter_intra_mode = 0;
2069 inter_sf->mv_cost_upd_level = INTERNAL_COST_UPD_SB;
2070 inter_sf->coeff_cost_upd_level = INTERNAL_COST_UPD_SB;
2071 inter_sf->mode_cost_upd_level = INTERNAL_COST_UPD_SB;
2072 inter_sf->prune_inter_modes_based_on_tpl = 0;
2073 inter_sf->prune_nearmv_using_neighbors = PRUNE_NEARMV_OFF;
2074 inter_sf->prune_comp_search_by_single_result = 0;
2075 inter_sf->skip_repeated_ref_mv = 0;
2076 inter_sf->skip_newmv_in_drl = 0;
2077 inter_sf->inter_mode_rd_model_estimation = 0;
2078 inter_sf->prune_compound_using_single_ref = 0;
2079 inter_sf->prune_ext_comp_using_neighbors = 0;
2080 inter_sf->skip_ext_comp_nearmv_mode = 0;
2081 inter_sf->prune_comp_using_best_single_mode_ref = 0;
2082 inter_sf->prune_nearest_near_mv_using_refmv_weight = 0;
2083 inter_sf->disable_onesided_comp = 0;
2084 inter_sf->prune_mode_search_simple_translation = 0;
2085 inter_sf->prune_comp_type_by_comp_avg = 0;
2086 inter_sf->disable_interinter_wedge_newmv_search = 0;
2087 inter_sf->fast_interintra_wedge_search = 0;
2088 inter_sf->prune_comp_type_by_model_rd = 0;
2089 inter_sf->perform_best_rd_based_gating_for_chroma = 0;
2090 inter_sf->prune_obmc_prob_thresh = 0;
2091 inter_sf->disable_interinter_wedge_var_thresh = 0;
2092 inter_sf->disable_interintra_wedge_var_thresh = 0;
2093 inter_sf->prune_ref_mv_idx_search = 0;
2094 inter_sf->prune_warped_prob_thresh = 0;
2095 inter_sf->reuse_compound_type_decision = 0;
2096 inter_sf->prune_inter_modes_if_skippable = 0;
2097 inter_sf->disable_masked_comp = 0;
2098 inter_sf->enable_fast_compound_mode_search = 0;
2099 inter_sf->reuse_mask_search_results = 0;
2100 inter_sf->enable_fast_wedge_mask_search = 0;
2101 inter_sf->inter_mode_txfm_breakout = 0;
2102 inter_sf->limit_inter_mode_cands = 0;
2103 inter_sf->limit_txfm_eval_per_mode = 0;
2104 inter_sf->skip_arf_compound = 0;
2105 set_txfm_rd_gate_level(inter_sf->txfm_rd_gate_level, 0);
2106 }
2107
init_interp_sf(INTERP_FILTER_SPEED_FEATURES * interp_sf)2108 static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
2109 interp_sf->adaptive_interp_filter_search = 0;
2110 interp_sf->cb_pred_filter_search = 0;
2111 interp_sf->disable_dual_filter = 0;
2112 interp_sf->skip_sharp_interp_filter_search = 0;
2113 interp_sf->use_fast_interpolation_filter_search = 0;
2114 interp_sf->use_interp_filter = 0;
2115 interp_sf->skip_interp_filter_search = 0;
2116 }
2117
init_intra_sf(INTRA_MODE_SPEED_FEATURES * intra_sf)2118 static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) {
2119 intra_sf->dv_cost_upd_level = INTERNAL_COST_UPD_SB;
2120 intra_sf->skip_intra_in_interframe = 1;
2121 intra_sf->intra_pruning_with_hog = 0;
2122 intra_sf->chroma_intra_pruning_with_hog = 0;
2123 intra_sf->prune_palette_search_level = 0;
2124 intra_sf->prune_luma_palette_size_search_level = 0;
2125
2126 for (int i = 0; i < TX_SIZES; i++) {
2127 intra_sf->intra_y_mode_mask[i] = INTRA_ALL;
2128 intra_sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
2129 }
2130 intra_sf->disable_smooth_intra = 0;
2131 intra_sf->prune_smooth_intra_mode_for_chroma = 0;
2132 intra_sf->prune_filter_intra_level = 0;
2133 intra_sf->prune_chroma_modes_using_luma_winner = 0;
2134 intra_sf->cfl_search_range = 3;
2135 intra_sf->top_intra_model_count_allowed = TOP_INTRA_MODEL_COUNT;
2136 intra_sf->adapt_top_model_rd_count_using_neighbors = 0;
2137 intra_sf->early_term_chroma_palette_size_search = 0;
2138 intra_sf->skip_filter_intra_in_inter_frames = 0;
2139 intra_sf->prune_luma_odd_delta_angles_in_intra = 0;
2140 }
2141
init_tx_sf(TX_SPEED_FEATURES * tx_sf)2142 static AOM_INLINE void init_tx_sf(TX_SPEED_FEATURES *tx_sf) {
2143 tx_sf->inter_tx_size_search_init_depth_sqr = 0;
2144 tx_sf->inter_tx_size_search_init_depth_rect = 0;
2145 tx_sf->intra_tx_size_search_init_depth_rect = 0;
2146 tx_sf->intra_tx_size_search_init_depth_sqr = 0;
2147 tx_sf->tx_size_search_lgr_block = 0;
2148 tx_sf->model_based_prune_tx_search_level = 0;
2149 tx_sf->tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_1;
2150 tx_sf->tx_type_search.ml_tx_split_thresh = 8500;
2151 tx_sf->tx_type_search.use_skip_flag_prediction = 1;
2152 tx_sf->tx_type_search.use_reduced_intra_txset = 0;
2153 tx_sf->tx_type_search.fast_intra_tx_type_search = 0;
2154 tx_sf->tx_type_search.fast_inter_tx_type_prob_thresh = INT_MAX;
2155 tx_sf->tx_type_search.skip_tx_search = 0;
2156 tx_sf->tx_type_search.prune_tx_type_using_stats = 0;
2157 tx_sf->tx_type_search.prune_tx_type_est_rd = 0;
2158 tx_sf->tx_type_search.winner_mode_tx_type_pruning = 0;
2159 tx_sf->txb_split_cap = 1;
2160 tx_sf->adaptive_txb_search_level = 0;
2161 tx_sf->refine_fast_tx_search_results = 1;
2162 tx_sf->prune_tx_size_level = 0;
2163 tx_sf->prune_intra_tx_depths_using_nn = false;
2164 tx_sf->use_rd_based_breakout_for_intra_tx_search = false;
2165 }
2166
init_rd_sf(RD_CALC_SPEED_FEATURES * rd_sf,const AV1EncoderConfig * oxcf)2167 static AOM_INLINE void init_rd_sf(RD_CALC_SPEED_FEATURES *rd_sf,
2168 const AV1EncoderConfig *oxcf) {
2169 const int disable_trellis_quant = oxcf->algo_cfg.disable_trellis_quant;
2170 if (disable_trellis_quant == 3) {
2171 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
2172 ? NO_ESTIMATE_YRD_TRELLIS_OPT
2173 : NO_TRELLIS_OPT;
2174 } else if (disable_trellis_quant == 2) {
2175 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
2176 ? FINAL_PASS_TRELLIS_OPT
2177 : NO_TRELLIS_OPT;
2178 } else if (disable_trellis_quant == 0) {
2179 if (is_lossless_requested(&oxcf->rc_cfg)) {
2180 rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
2181 } else {
2182 rd_sf->optimize_coefficients = FULL_TRELLIS_OPT;
2183 }
2184 } else if (disable_trellis_quant == 1) {
2185 rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
2186 } else {
2187 assert(0 && "Invalid disable_trellis_quant value");
2188 }
2189 rd_sf->use_mb_rd_hash = 0;
2190 rd_sf->simple_model_rd_from_var = 0;
2191 rd_sf->tx_domain_dist_level = 0;
2192 rd_sf->tx_domain_dist_thres_level = 0;
2193 rd_sf->perform_coeff_opt = 0;
2194 }
2195
init_winner_mode_sf(WINNER_MODE_SPEED_FEATURES * winner_mode_sf)2196 static AOM_INLINE void init_winner_mode_sf(
2197 WINNER_MODE_SPEED_FEATURES *winner_mode_sf) {
2198 winner_mode_sf->motion_mode_for_winner_cand = 0;
2199 // Set this at the appropriate speed levels
2200 winner_mode_sf->tx_size_search_level = 0;
2201 winner_mode_sf->enable_winner_mode_for_coeff_opt = 0;
2202 winner_mode_sf->enable_winner_mode_for_tx_size_srch = 0;
2203 winner_mode_sf->enable_winner_mode_for_use_tx_domain_dist = 0;
2204 winner_mode_sf->multi_winner_mode_type = 0;
2205 winner_mode_sf->dc_blk_pred_level = 0;
2206 winner_mode_sf->winner_mode_ifs = 0;
2207 winner_mode_sf->prune_winner_mode_eval_level = 0;
2208 }
2209
init_lpf_sf(LOOP_FILTER_SPEED_FEATURES * lpf_sf)2210 static AOM_INLINE void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
2211 lpf_sf->disable_loop_restoration_chroma = 0;
2212 lpf_sf->disable_loop_restoration_luma = 0;
2213 lpf_sf->min_lr_unit_size = RESTORATION_PROC_UNIT_SIZE;
2214 lpf_sf->max_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2215 lpf_sf->prune_wiener_based_on_src_var = 0;
2216 lpf_sf->prune_sgr_based_on_wiener = 0;
2217 lpf_sf->enable_sgr_ep_pruning = 0;
2218 lpf_sf->reduce_wiener_window_size = 0;
2219 lpf_sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
2220 lpf_sf->use_coarse_filter_level_search = 0;
2221 lpf_sf->cdef_pick_method = CDEF_FULL_SEARCH;
2222 // Set decoder side speed feature to use less dual sgr modes
2223 lpf_sf->dual_sgr_penalty_level = 0;
2224 // Enable Wiener and Self-guided Loop restoration filters by default.
2225 lpf_sf->disable_wiener_filter = false;
2226 lpf_sf->disable_sgr_filter = false;
2227 lpf_sf->disable_wiener_coeff_refine_search = false;
2228 lpf_sf->use_downsampled_wiener_stats = 0;
2229 }
2230
init_rt_sf(REAL_TIME_SPEED_FEATURES * rt_sf)2231 static AOM_INLINE void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
2232 rt_sf->check_intra_pred_nonrd = 0;
2233 rt_sf->skip_intra_pred = 0;
2234 rt_sf->estimate_motion_for_var_based_partition = 0;
2235 rt_sf->nonrd_check_partition_merge_mode = 0;
2236 rt_sf->nonrd_check_partition_split = 0;
2237 rt_sf->mode_search_skip_flags = 0;
2238 rt_sf->nonrd_prune_ref_frame_search = 0;
2239 rt_sf->use_nonrd_pick_mode = 0;
2240 rt_sf->use_nonrd_altref_frame = 0;
2241 rt_sf->use_comp_ref_nonrd = 0;
2242 rt_sf->use_real_time_ref_set = 0;
2243 rt_sf->short_circuit_low_temp_var = 0;
2244 rt_sf->reuse_inter_pred_nonrd = 0;
2245 rt_sf->num_inter_modes_for_tx_search = INT_MAX;
2246 rt_sf->use_nonrd_filter_search = 0;
2247 rt_sf->use_simple_rd_model = 0;
2248 rt_sf->hybrid_intra_pickmode = 0;
2249 rt_sf->source_metrics_sb_nonrd = 0;
2250 rt_sf->overshoot_detection_cbr = NO_DETECTION;
2251 rt_sf->check_scene_detection = 0;
2252 rt_sf->prefer_large_partition_blocks = 0;
2253 rt_sf->use_temporal_noise_estimate = 0;
2254 rt_sf->fullpel_search_step_param = 0;
2255 for (int i = 0; i < BLOCK_SIZES; ++i)
2256 rt_sf->intra_y_mode_bsize_mask_nrd[i] = INTRA_ALL;
2257 rt_sf->prune_hv_pred_modes_using_src_sad = false;
2258 rt_sf->nonrd_aggressive_skip = 0;
2259 rt_sf->skip_cdef_sb = 0;
2260 rt_sf->force_large_partition_blocks_intra = 0;
2261 rt_sf->skip_tx_no_split_var_based_partition = 0;
2262 rt_sf->skip_newmv_mode_based_on_sse = 0;
2263 rt_sf->gf_length_lvl = 0;
2264 rt_sf->prune_inter_modes_with_golden_ref = 0;
2265 rt_sf->prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
2266 rt_sf->prune_inter_modes_using_temp_var = 0;
2267 rt_sf->reduce_mv_pel_precision_highmotion = 0;
2268 rt_sf->reduce_mv_pel_precision_lowcomplex = 0;
2269 rt_sf->prune_intra_mode_based_on_mv_range = 0;
2270 rt_sf->var_part_split_threshold_shift = 7;
2271 rt_sf->gf_refresh_based_on_qp = 0;
2272 rt_sf->use_rtc_tf = 0;
2273 rt_sf->prune_idtx_nonrd = 0;
2274 rt_sf->prune_palette_nonrd = 0;
2275 rt_sf->dct_only_palette_nonrd = 0;
2276 rt_sf->part_early_exit_zeromv = 0;
2277 rt_sf->sse_early_term_inter_search = EARLY_TERM_DISABLED;
2278 rt_sf->skip_lf_screen = 0;
2279 rt_sf->thresh_active_maps_skip_lf_cdef = 100;
2280 rt_sf->sad_based_adp_altref_lag = 0;
2281 rt_sf->partition_direct_merging = 0;
2282 rt_sf->var_part_based_on_qidx = 0;
2283 rt_sf->tx_size_level_based_on_qstep = 0;
2284 rt_sf->vbp_prune_16x16_split_using_min_max_sub_blk_var = false;
2285 rt_sf->prune_compoundmode_with_singlecompound_var = false;
2286 rt_sf->frame_level_mode_cost_update = false;
2287 rt_sf->prune_h_pred_using_best_mode_so_far = false;
2288 rt_sf->enable_intra_mode_pruning_using_neighbors = false;
2289 rt_sf->prune_intra_mode_using_best_sad_so_far = false;
2290 rt_sf->check_only_zero_zeromv_on_large_blocks = false;
2291 rt_sf->disable_cdf_update_non_reference_frame = false;
2292 rt_sf->prune_compoundmode_with_singlemode_var = false;
2293 rt_sf->skip_compound_based_on_var = false;
2294 rt_sf->set_zeromv_skip_based_on_source_sad = 1;
2295 rt_sf->use_adaptive_subpel_search = false;
2296 rt_sf->screen_content_cdef_filter_qindex_thresh = 0;
2297 rt_sf->enable_ref_short_signaling = false;
2298 rt_sf->check_globalmv_on_single_ref = true;
2299 rt_sf->increase_color_thresh_palette = false;
2300 rt_sf->selective_cdf_update = 0;
2301 rt_sf->force_only_last_ref = 0;
2302 }
2303
2304 static fractional_mv_step_fp
2305 *const fractional_mv_search[SUBPEL_SEARCH_METHODS] = {
2306 av1_find_best_sub_pixel_tree, // SUBPEL_TREE = 0
2307 av1_find_best_sub_pixel_tree_pruned, // SUBPEL_TREE_PRUNED = 1
2308 av1_find_best_sub_pixel_tree_pruned_more // SUBPEL_TREE_PRUNED_MORE = 2
2309 };
2310
2311 // Populate appropriate sub-pel search method based on speed feature and user
2312 // specified settings
set_subpel_search_method(MotionVectorSearchParams * mv_search_params,unsigned int motion_vector_unit_test,SUBPEL_SEARCH_METHOD subpel_search_method)2313 static void set_subpel_search_method(
2314 MotionVectorSearchParams *mv_search_params,
2315 unsigned int motion_vector_unit_test,
2316 SUBPEL_SEARCH_METHOD subpel_search_method) {
2317 assert(subpel_search_method <= SUBPEL_TREE_PRUNED_MORE);
2318 mv_search_params->find_fractional_mv_step =
2319 fractional_mv_search[subpel_search_method];
2320
2321 // This is only used in motion vector unit test.
2322 if (motion_vector_unit_test == 1)
2323 mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
2324 else if (motion_vector_unit_test == 2)
2325 mv_search_params->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
2326 }
2327
av1_set_speed_features_framesize_dependent(AV1_COMP * cpi,int speed)2328 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
2329 SPEED_FEATURES *const sf = &cpi->sf;
2330 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2331
2332 switch (oxcf->mode) {
2333 case GOOD:
2334 set_good_speed_feature_framesize_dependent(cpi, sf, speed);
2335 break;
2336 case ALLINTRA:
2337 set_allintra_speed_feature_framesize_dependent(cpi, sf, speed);
2338 break;
2339 case REALTIME:
2340 set_rt_speed_feature_framesize_dependent(cpi, sf, speed);
2341 break;
2342 }
2343
2344 if (!cpi->ppi->seq_params_locked) {
2345 cpi->common.seq_params->enable_masked_compound &=
2346 !sf->inter_sf.disable_masked_comp;
2347 cpi->common.seq_params->enable_interintra_compound &=
2348 (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
2349 }
2350
2351 set_subpel_search_method(&cpi->mv_search_params,
2352 cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
2353 sf->mv_sf.subpel_search_method);
2354
2355 // For multi-thread use case with row_mt enabled, cost update for a set of
2356 // SB rows is not desirable. Hence, the sf mv_cost_upd_level is set to
2357 // INTERNAL_COST_UPD_SBROW in such cases.
2358 if ((cpi->oxcf.row_mt == 1) && (cpi->mt_info.num_workers > 1)) {
2359 if (sf->inter_sf.mv_cost_upd_level == INTERNAL_COST_UPD_SBROW_SET) {
2360 // Set mv_cost_upd_level to use row level update.
2361 sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
2362 }
2363 }
2364 }
2365
av1_set_speed_features_framesize_independent(AV1_COMP * cpi,int speed)2366 void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
2367 SPEED_FEATURES *const sf = &cpi->sf;
2368 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
2369 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2370 int i;
2371
2372 init_hl_sf(&sf->hl_sf);
2373 init_fp_sf(&sf->fp_sf);
2374 init_tpl_sf(&sf->tpl_sf);
2375 init_gm_sf(&sf->gm_sf);
2376 init_part_sf(&sf->part_sf);
2377 init_mv_sf(&sf->mv_sf);
2378 init_inter_sf(&sf->inter_sf);
2379 init_interp_sf(&sf->interp_sf);
2380 init_intra_sf(&sf->intra_sf);
2381 init_tx_sf(&sf->tx_sf);
2382 init_rd_sf(&sf->rd_sf, oxcf);
2383 init_winner_mode_sf(&sf->winner_mode_sf);
2384 init_lpf_sf(&sf->lpf_sf);
2385 init_rt_sf(&sf->rt_sf);
2386
2387 switch (oxcf->mode) {
2388 case GOOD:
2389 set_good_speed_features_framesize_independent(cpi, sf, speed);
2390 break;
2391 case ALLINTRA:
2392 set_allintra_speed_features_framesize_independent(cpi, sf, speed);
2393 break;
2394 case REALTIME:
2395 set_rt_speed_features_framesize_independent(cpi, sf, speed);
2396 break;
2397 }
2398
2399 // Note: when use_nonrd_pick_mode is true, the transform size is the
2400 // minimum of 16x16 and the largest possible size of the current block,
2401 // which conflicts with the speed feature "enable_tx_size_search".
2402 if (!oxcf->txfm_cfg.enable_tx_size_search &&
2403 sf->rt_sf.use_nonrd_pick_mode == 0) {
2404 sf->winner_mode_sf.tx_size_search_level = 3;
2405 }
2406
2407 if (cpi->mt_info.num_workers > 1) {
2408 // Loop restoration stage is conditionally disabled for speed 5, 6 when
2409 // num_workers > 1. Since av1_pick_filter_restoration() is not
2410 // multi-threaded, enabling the Loop restoration stage will cause an
2411 // increase in encode time (3% to 7% increase depends on frame
2412 // resolution).
2413 // TODO(aomedia:3446): Implement multi-threading of
2414 // av1_pick_filter_restoration() and enable Wiener filter for speed 5, 6
2415 // similar to single thread encoding path.
2416 if (speed >= 5) {
2417 sf->lpf_sf.disable_sgr_filter = true;
2418 sf->lpf_sf.disable_wiener_filter = true;
2419 }
2420 }
2421
2422 if (!cpi->ppi->seq_params_locked) {
2423 cpi->common.seq_params->order_hint_info.enable_dist_wtd_comp &=
2424 (sf->inter_sf.use_dist_wtd_comp_flag != DIST_WTD_COMP_DISABLED);
2425 cpi->common.seq_params->enable_dual_filter &=
2426 !sf->interp_sf.disable_dual_filter;
2427 // Set the flag 'enable_restoration', if one the Loop restoration filters
2428 // (i.e., Wiener or Self-guided) is enabled.
2429 cpi->common.seq_params->enable_restoration &=
2430 (!sf->lpf_sf.disable_wiener_filter || !sf->lpf_sf.disable_sgr_filter);
2431
2432 cpi->common.seq_params->enable_interintra_compound &=
2433 (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
2434 }
2435
2436 const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
2437 for (i = 0; i < MAX_MESH_STEP; ++i) {
2438 sf->mv_sf.mesh_patterns[i].range =
2439 good_quality_mesh_patterns[mesh_speed][i].range;
2440 sf->mv_sf.mesh_patterns[i].interval =
2441 good_quality_mesh_patterns[mesh_speed][i].interval;
2442 }
2443
2444 // Update the mesh pattern of exhaustive motion search for intraBC
2445 // Though intraBC mesh pattern is populated for all frame types, it is used
2446 // only for intra frames of screen contents
2447 for (i = 0; i < MAX_MESH_STEP; ++i) {
2448 sf->mv_sf.intrabc_mesh_patterns[i].range =
2449 intrabc_mesh_patterns[mesh_speed][i].range;
2450 sf->mv_sf.intrabc_mesh_patterns[i].interval =
2451 intrabc_mesh_patterns[mesh_speed][i].interval;
2452 }
2453
2454 // Slow quant, dct and trellis not worthwhile for first pass
2455 // so make sure they are always turned off.
2456 if (is_stat_generation_stage(cpi))
2457 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
2458
2459 // No recode for 1 pass.
2460 if (oxcf->pass == AOM_RC_ONE_PASS && has_no_stats_stage(cpi))
2461 sf->hl_sf.recode_loop = DISALLOW_RECODE;
2462
2463 set_subpel_search_method(&cpi->mv_search_params,
2464 cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
2465 sf->mv_sf.subpel_search_method);
2466
2467 // assert ensures that tx_domain_dist_level is accessed correctly
2468 assert(cpi->sf.rd_sf.tx_domain_dist_thres_level >= 0 &&
2469 cpi->sf.rd_sf.tx_domain_dist_thres_level < 4);
2470 memcpy(winner_mode_params->tx_domain_dist_threshold,
2471 tx_domain_dist_thresholds[cpi->sf.rd_sf.tx_domain_dist_thres_level],
2472 sizeof(winner_mode_params->tx_domain_dist_threshold));
2473
2474 assert(cpi->sf.rd_sf.tx_domain_dist_level >= 0 &&
2475 cpi->sf.rd_sf.tx_domain_dist_level < TX_DOMAIN_DIST_LEVELS);
2476 memcpy(winner_mode_params->use_transform_domain_distortion,
2477 tx_domain_dist_types[cpi->sf.rd_sf.tx_domain_dist_level],
2478 sizeof(winner_mode_params->use_transform_domain_distortion));
2479
2480 // assert ensures that coeff_opt_thresholds is accessed correctly
2481 assert(cpi->sf.rd_sf.perform_coeff_opt >= 0 &&
2482 cpi->sf.rd_sf.perform_coeff_opt < 9);
2483 memcpy(winner_mode_params->coeff_opt_thresholds,
2484 &coeff_opt_thresholds[cpi->sf.rd_sf.perform_coeff_opt],
2485 sizeof(winner_mode_params->coeff_opt_thresholds));
2486
2487 // assert ensures that predict_skip_levels is accessed correctly
2488 assert(cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction >= 0 &&
2489 cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction < 3);
2490 memcpy(winner_mode_params->skip_txfm_level,
2491 predict_skip_levels[cpi->sf.tx_sf.tx_type_search
2492 .use_skip_flag_prediction],
2493 sizeof(winner_mode_params->skip_txfm_level));
2494
2495 // assert ensures that tx_size_search_level is accessed correctly
2496 assert(cpi->sf.winner_mode_sf.tx_size_search_level >= 0 &&
2497 cpi->sf.winner_mode_sf.tx_size_search_level <= 3);
2498 memcpy(winner_mode_params->tx_size_search_methods,
2499 tx_size_search_methods[cpi->sf.winner_mode_sf.tx_size_search_level],
2500 sizeof(winner_mode_params->tx_size_search_methods));
2501 memcpy(winner_mode_params->predict_dc_level,
2502 predict_dc_levels[cpi->sf.winner_mode_sf.dc_blk_pred_level],
2503 sizeof(winner_mode_params->predict_dc_level));
2504
2505 if (cpi->oxcf.row_mt == 1 && (cpi->mt_info.num_workers > 1)) {
2506 if (sf->inter_sf.inter_mode_rd_model_estimation == 1) {
2507 // Revert to type 2
2508 sf->inter_sf.inter_mode_rd_model_estimation = 2;
2509 }
2510
2511 #if !CONFIG_FPMT_TEST
2512 // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve
2513 // better parallelism when number of threads available are greater than or
2514 // equal to maximum number of reference frames allowed for global motion.
2515 if (sf->gm_sf.gm_search_type != GM_DISABLE_SEARCH &&
2516 (cpi->mt_info.num_workers >=
2517 gm_available_reference_frames[sf->gm_sf.gm_search_type]))
2518 sf->gm_sf.prune_ref_frame_for_gm_search = 0;
2519 #endif
2520 }
2521
2522 // This only applies to the real time mode. Adaptive gf refresh is disabled if
2523 // gf_cbr_boost_pct that is set by the user is larger than 0.
2524 if (cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 0)
2525 sf->rt_sf.gf_refresh_based_on_qp = 0;
2526 }
2527
2528 // Override some speed features based on qindex
av1_set_speed_features_qindex_dependent(AV1_COMP * cpi,int speed)2529 void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
2530 AV1_COMMON *const cm = &cpi->common;
2531 SPEED_FEATURES *const sf = &cpi->sf;
2532 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
2533 const int boosted = frame_is_boosted(cpi);
2534 const int is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480;
2535 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
2536 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
2537 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
2538 const int is_1440p_or_larger = AOMMIN(cm->width, cm->height) >= 1440;
2539 const int is_arf2_bwd_type =
2540 cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
2541
2542 if (cpi->oxcf.mode == REALTIME) {
2543 if (speed >= 6) {
2544 const int qindex_thresh = boosted ? 190 : (is_720p_or_larger ? 120 : 150);
2545 sf->part_sf.adjust_var_based_rd_partitioning =
2546 frame_is_intra_only(cm)
2547 ? 0
2548 : cm->quant_params.base_qindex > qindex_thresh;
2549 }
2550 return;
2551 }
2552
2553 if (speed == 0) {
2554 // qindex_thresh for resolution < 720p
2555 const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140);
2556 if (!is_720p_or_larger && cm->quant_params.base_qindex <= qindex_thresh) {
2557 sf->part_sf.simple_motion_search_split =
2558 cm->features.allow_screen_content_tools ? 1 : 2;
2559 sf->part_sf.simple_motion_search_early_term_none = 1;
2560 sf->tx_sf.model_based_prune_tx_search_level = 0;
2561 }
2562
2563 if (is_720p_or_larger && cm->quant_params.base_qindex <= 128) {
2564 sf->rd_sf.perform_coeff_opt = 2 + is_1080p_or_larger;
2565 memcpy(winner_mode_params->coeff_opt_thresholds,
2566 &coeff_opt_thresholds[sf->rd_sf.perform_coeff_opt],
2567 sizeof(winner_mode_params->coeff_opt_thresholds));
2568 sf->part_sf.simple_motion_search_split =
2569 cm->features.allow_screen_content_tools ? 1 : 2;
2570 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
2571 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
2572 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
2573 sf->tx_sf.model_based_prune_tx_search_level = 0;
2574
2575 if (is_1080p_or_larger && cm->quant_params.base_qindex <= 108) {
2576 sf->inter_sf.selective_ref_frame = 2;
2577 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
2578 sf->rd_sf.tx_domain_dist_thres_level = 1;
2579 sf->part_sf.simple_motion_search_early_term_none = 1;
2580 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
2581 sf->interp_sf.cb_pred_filter_search = 0;
2582 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
2583 sf->tx_sf.tx_type_search.skip_tx_search = 1;
2584 }
2585 }
2586 }
2587
2588 if (speed >= 2) {
2589 // Disable extended partitions for lower quantizers
2590 const int aggr = AOMMIN(4, speed - 2);
2591 const int qindex_thresh1[4] = { 50, 50, 80, 100 };
2592 const int qindex_thresh2[4] = { 80, 100, 120, 160 };
2593 int qindex_thresh;
2594 if (aggr <= 1) {
2595 const int qthresh2 =
2596 (!aggr && !is_480p_or_larger) ? 70 : qindex_thresh2[aggr];
2597 qindex_thresh = cm->features.allow_screen_content_tools
2598 ? qindex_thresh1[aggr]
2599 : qthresh2;
2600 if (cm->quant_params.base_qindex <= qindex_thresh && !boosted)
2601 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2602 } else if (aggr <= 2) {
2603 qindex_thresh = boosted ? qindex_thresh1[aggr] : qindex_thresh2[aggr];
2604 if (cm->quant_params.base_qindex <= qindex_thresh &&
2605 !frame_is_intra_only(cm))
2606 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2607 } else if (aggr <= 3) {
2608 if (!is_480p_or_larger) {
2609 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2610 } else if (!is_720p_or_larger && !frame_is_intra_only(cm) &&
2611 !cm->features.allow_screen_content_tools) {
2612 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2613 } else {
2614 qindex_thresh = boosted ? qindex_thresh1[aggr] : qindex_thresh2[aggr];
2615 if (cm->quant_params.base_qindex <= qindex_thresh &&
2616 !frame_is_intra_only(cm))
2617 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2618 }
2619 } else {
2620 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
2621 }
2622 }
2623
2624 if (speed >= 4) {
2625 // Disable rectangular partitions for lower quantizers
2626 const int aggr = AOMMIN(1, speed - 4);
2627 const int qindex_thresh[2] = { 65, 80 };
2628 int disable_rect_part;
2629 disable_rect_part = !boosted;
2630 if (cm->quant_params.base_qindex <= qindex_thresh[aggr] &&
2631 disable_rect_part && is_480p_or_larger) {
2632 sf->part_sf.rect_partition_eval_thresh = BLOCK_8X8;
2633 }
2634 }
2635
2636 if (speed <= 2) {
2637 if (!is_stat_generation_stage(cpi)) {
2638 // Use faster full-pel motion search for high quantizers.
2639 // Also use reduced total search range for low resolutions at high
2640 // quantizers.
2641 const int aggr = speed;
2642 const int qindex_thresh1 = ms_qindex_thresh[aggr][is_720p_or_larger][0];
2643 const int qindex_thresh2 = ms_qindex_thresh[aggr][is_720p_or_larger][1];
2644 const SEARCH_METHODS search_method =
2645 motion_search_method[is_720p_or_larger];
2646 if (cm->quant_params.base_qindex > qindex_thresh1) {
2647 sf->mv_sf.search_method = search_method;
2648 sf->tpl_sf.search_method = search_method;
2649 } else if (cm->quant_params.base_qindex > qindex_thresh2) {
2650 sf->mv_sf.search_method = NSTEP_8PT;
2651 }
2652 }
2653 }
2654
2655 if (speed >= 4) {
2656 // Disable LR search at low and high quantizers and enable only for
2657 // mid-quantizer range.
2658 if (!boosted && !is_arf2_bwd_type) {
2659 const int qindex_low[2] = { 100, 60 };
2660 const int qindex_high[2] = { 180, 160 };
2661 if (cm->quant_params.base_qindex <= qindex_low[is_720p_or_larger] ||
2662 cm->quant_params.base_qindex > qindex_high[is_720p_or_larger]) {
2663 sf->lpf_sf.disable_loop_restoration_luma = 1;
2664 }
2665 }
2666 }
2667
2668 if (speed == 1) {
2669 // Reuse interinter wedge mask search from first search for non-boosted
2670 // non-internal-arf frames, except at very high quantizers.
2671 if (cm->quant_params.base_qindex <= 200) {
2672 if (!boosted && !is_arf2_bwd_type)
2673 sf->inter_sf.reuse_mask_search_results = 1;
2674 }
2675 }
2676
2677 if (speed == 5) {
2678 if (!(frame_is_intra_only(&cpi->common) ||
2679 cm->features.allow_screen_content_tools)) {
2680 const int qindex[2] = { 256, 128 };
2681 // Set the sf value as 3 for low resolution and
2682 // for higher resolutions with low quantizers.
2683 if (cm->quant_params.base_qindex < qindex[is_480p_or_larger])
2684 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3;
2685 }
2686 }
2687
2688 if (speed >= 5) {
2689 // Disable the sf for low quantizers in case of low resolution screen
2690 // contents.
2691 if (cm->features.allow_screen_content_tools &&
2692 cm->quant_params.base_qindex < 128 && is_480p_or_lesser) {
2693 sf->part_sf.prune_sub_8x8_partition_level = 0;
2694 }
2695 }
2696
2697 // Loop restoration size search
2698 // At speed 0, always search all available sizes for the maximum possible gain
2699 sf->lpf_sf.min_lr_unit_size = RESTORATION_PROC_UNIT_SIZE;
2700 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2701
2702 if (speed >= 1) {
2703 // For large frames, small restoration units are almost never useful,
2704 // so prune them away
2705 if (is_1440p_or_larger) {
2706 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2707 } else if (is_720p_or_larger) {
2708 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
2709 }
2710 }
2711
2712 if (speed >= 3 || (cpi->oxcf.mode == ALLINTRA && speed >= 1)) {
2713 // At this speed, a full search is too expensive. Instead, pick a single
2714 // size based on size and qindex. Note that, in general, higher quantizers
2715 // (== lower quality) and larger frames generally want to use larger
2716 // restoration units.
2717 int qindex_thresh = 96;
2718 if (cm->quant_params.base_qindex <= qindex_thresh && !is_1440p_or_larger) {
2719 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
2720 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
2721 } else {
2722 sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2723 sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX;
2724 }
2725 }
2726
2727 set_subpel_search_method(&cpi->mv_search_params,
2728 cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
2729 sf->mv_sf.subpel_search_method);
2730 }
2731