1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13
14 #include "av1/encoder/encoder.h"
15 #include "av1/encoder/speed_features.h"
16 #include "av1/encoder/rdopt.h"
17
18 #include "aom_dsp/aom_dsp_common.h"
19
20 #define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
21 // Max speed setting for tx domain evaluation
22 #define MAX_TX_DOMAIN_EVAL_SPEED 5
23 static MESH_PATTERN
24 good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
25 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
26 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
27 { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
28 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
29 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
30 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
31 };
32 static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
33 50, 50, 25, 15, 5, 1
34 };
35
36 // TODO(huisu@google.com): These settings are pretty relaxed, tune them for
37 // each speed setting
38 static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
39 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
40 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
41 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
42 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
43 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
44 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
45 };
46 static uint8_t intrabc_max_mesh_pct[MAX_MESH_SPEED + 1] = { 100, 100, 100,
47 25, 25, 10 };
48
49 // Threshold values to be used for pruning the txfm_domain_distortion
50 // based on block MSE
51 // TODO(any): Experiment the threshold logic based on variance metric
52 static unsigned int tx_domain_dist_thresholds[MAX_TX_DOMAIN_EVAL_SPEED + 1] = {
53 UINT_MAX, 162754, 22026, 22026, 22026, 0
54 };
55 // Threshold values to be used for disabling coeff RD-optimization
56 // based on block MSE
57 // TODO(any): Experiment the threshold logic based on variance metric
58 static unsigned int coeff_opt_dist_thresholds[5] = { UINT_MAX, 162754, 162754,
59 22026, 22026 };
60 // scaling values to be used for gating wedge/compound segment based on best
61 // approximate rd
62 static int comp_type_rd_threshold_mul[3] = { 1, 11, 12 };
63 static int comp_type_rd_threshold_div[3] = { 3, 16, 16 };
64
65 // Intra only frames, golden frames (except alt ref overlays) and
66 // alt ref frames tend to be coded at a higher than ambient quality
frame_is_boosted(const AV1_COMP * cpi)67 static int frame_is_boosted(const AV1_COMP *cpi) {
68 return frame_is_kf_gf_arf(cpi);
69 }
70
71 // Sets a partition size down to which the auto partition code will always
72 // search (can go lower), based on the image dimensions. The logic here
73 // is that the extent to which ringing artefacts are offensive, depends
74 // partly on the screen area that over which they propogate. Propogation is
75 // limited by transform block size but the screen area take up by a given block
76 // size will be larger for a small image format stretched to full screen.
set_partition_min_limit(const AV1_COMMON * const cm)77 static BLOCK_SIZE set_partition_min_limit(const AV1_COMMON *const cm) {
78 unsigned int screen_area = (cm->width * cm->height);
79
80 // Select block size based on image format size.
81 if (screen_area < 1280 * 720) {
82 // Formats smaller in area than 720P
83 return BLOCK_4X4;
84 } else if (screen_area < 1920 * 1080) {
85 // Format >= 720P and < 1080P
86 return BLOCK_8X8;
87 } else {
88 // Formats 1080P and up
89 return BLOCK_16X16;
90 }
91 }
92
set_good_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)93 static void set_good_speed_feature_framesize_dependent(
94 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
95 const AV1_COMMON *const cm = &cpi->common;
96 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
97 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
98
99 if (is_480p_or_larger) {
100 sf->use_square_partition_only_threshold = BLOCK_128X128;
101 if (is_720p_or_larger)
102 sf->auto_max_partition_based_on_simple_motion = ADAPT_PRED;
103 else
104 sf->auto_max_partition_based_on_simple_motion = RELAXED_PRED;
105 } else {
106 sf->use_square_partition_only_threshold = BLOCK_64X64;
107 sf->auto_max_partition_based_on_simple_motion = DIRECT_PRED;
108 }
109
110 // TODO(huisu@google.com): train models for 720P and above.
111 if (!is_720p_or_larger) {
112 sf->ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
113 sf->ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
114 sf->ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
115 sf->ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64
116 sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
117 }
118
119 if (is_720p_or_larger && speed >= CONFIG_2PASS_PARTITION_SEARCH_LVL_START &&
120 speed < CONFIG_2PASS_PARTITION_SEARCH_LVL_END) {
121 sf->two_pass_partition_search = 1;
122 }
123
124 if (speed >= 1) {
125 if (is_720p_or_larger) {
126 sf->use_square_partition_only_threshold = BLOCK_128X128;
127 } else if (is_480p_or_larger) {
128 sf->use_square_partition_only_threshold = BLOCK_64X64;
129 } else {
130 sf->use_square_partition_only_threshold = BLOCK_32X32;
131 }
132
133 if (!is_720p_or_larger) {
134 sf->ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
135 sf->ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
136 sf->ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
137 sf->ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64
138 sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
139
140 sf->firstpass_simple_motion_search_early_term = 1;
141 }
142 }
143
144 if (speed >= 2) {
145 if (is_720p_or_larger) {
146 sf->use_square_partition_only_threshold = BLOCK_64X64;
147 } else if (is_480p_or_larger) {
148 sf->use_square_partition_only_threshold = BLOCK_32X32;
149 } else {
150 // TODO(chiyotsai@google.com): Setting the threshold to BLOCK_16X16 incurs
151 // a large loss (about 0.584%). Try increasing the threshold on boosted
152 // frame and see if it improves the performance.
153 sf->use_square_partition_only_threshold = BLOCK_32X32;
154 }
155
156 if (is_720p_or_larger) {
157 sf->adaptive_pred_interp_filter = 0;
158 sf->partition_search_breakout_dist_thr = (1 << 24);
159 sf->partition_search_breakout_rate_thr = 120;
160 } else {
161 sf->partition_search_breakout_dist_thr = (1 << 22);
162 sf->partition_search_breakout_rate_thr = 100;
163 }
164 sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
165 }
166
167 if (speed >= 3) {
168 if (is_720p_or_larger) {
169 sf->partition_search_breakout_dist_thr = (1 << 25);
170 sf->partition_search_breakout_rate_thr = 200;
171 } else {
172 sf->max_intra_bsize = BLOCK_32X32;
173 sf->partition_search_breakout_dist_thr = (1 << 23);
174 sf->partition_search_breakout_rate_thr = 120;
175 }
176 sf->use_first_partition_pass_interintra_stats =
177 sf->two_pass_partition_search;
178 }
179
180 if (speed >= 4) {
181 if (is_720p_or_larger) {
182 sf->partition_search_breakout_dist_thr = (1 << 26);
183 } else {
184 sf->partition_search_breakout_dist_thr = (1 << 24);
185 }
186 }
187 }
188
set_good_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)189 static void set_good_speed_features_framesize_independent(
190 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
191 const AV1_COMMON *const cm = &cpi->common;
192 const int boosted = frame_is_boosted(cpi);
193 const int is_boosted_arf2_bwd_type =
194 boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame;
195
196 // Speed 0 for all speed features that give neutral coding performance change.
197 sf->reduce_inter_modes = 1;
198 sf->prune_ext_partition_types_search_level = 1;
199 sf->ml_prune_rect_partition = 1;
200 sf->ml_prune_ab_partition = 1;
201 sf->ml_prune_4_partition = 1;
202 sf->simple_motion_search_prune_rect = 1;
203 sf->adaptive_txb_search_level = 1;
204 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
205 sf->model_based_prune_tx_search_level = 1;
206 sf->model_based_post_interp_filter_breakout = 1;
207 sf->model_based_motion_mode_rd_breakout = 1;
208
209 // TODO(debargha): Test, tweak and turn on either 1 or 2
210 sf->inter_mode_rd_model_estimation = 1;
211 sf->inter_mode_rd_model_estimation_adaptive = 0;
212
213 sf->two_loop_comp_search = 0;
214 sf->prune_ref_frame_for_rect_partitions =
215 boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2);
216 sf->less_rectangular_check_level = 1;
217 sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
218 sf->gm_disable_recode = 1;
219 sf->use_fast_interpolation_filter_search = 1;
220 sf->intra_tx_size_search_init_depth_sqr = 1;
221 sf->intra_angle_estimation = 1;
222 sf->selective_ref_frame = 1;
223 sf->prune_wedge_pred_diff_based = 1;
224 sf->disable_wedge_search_var_thresh = 0;
225 sf->disable_wedge_search_edge_thresh = 0;
226 sf->prune_motion_mode_level = 1;
227 sf->cb_pred_filter_search = 0;
228 sf->use_nonrd_pick_mode = 0;
229 sf->use_real_time_ref_set = 0;
230
231 if (speed >= 1) {
232 sf->gm_erroradv_type = GM_ERRORADV_TR_1;
233 sf->selective_ref_frame = 2;
234
235 sf->intra_tx_size_search_init_depth_rect = 1;
236 sf->tx_size_search_lgr_block = 1;
237
238 sf->prune_ext_partition_types_search_level = 2;
239 sf->skip_repeat_interpolation_filter_search = 1;
240 sf->tx_type_search.skip_tx_search = 1;
241 sf->tx_type_search.ml_tx_split_thresh = 40;
242 sf->model_based_prune_tx_search_level = 0;
243 sf->adaptive_txb_search_level = 2;
244 sf->use_intra_txb_hash = 1;
245 sf->optimize_b_precheck = 1;
246 sf->dual_sgr_penalty_level = 1;
247 sf->use_accurate_subpel_search = USE_4_TAPS;
248 sf->reuse_inter_intra_mode = 1;
249 sf->prune_comp_search_by_single_result = 1;
250 sf->skip_repeated_newmv = 1;
251 sf->obmc_full_pixel_search_level = 1;
252 // TODO(anyone): Following speed feature will be further explored to
253 // identify the appropriate tradeoff between encoder performance and its
254 // speed.
255 sf->prune_single_motion_modes_by_simple_trans = 1;
256
257 sf->simple_motion_search_split_only = 1;
258 sf->simple_motion_search_early_term_none = 1;
259
260 sf->disable_wedge_search_var_thresh = 0;
261 sf->disable_wedge_search_edge_thresh = 0;
262 sf->disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
263 sf->prune_comp_type_by_comp_avg = 1;
264 sf->prune_motion_mode_level = 2;
265 sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
266 sf->cb_pred_filter_search = 1;
267 sf->use_transform_domain_distortion = boosted ? 0 : 1;
268 sf->perform_coeff_opt = boosted ? 0 : 1;
269 sf->use_inter_txb_hash = 0;
270 }
271
272 if (speed >= 2) {
273 sf->gm_erroradv_type = GM_ERRORADV_TR_2;
274
275 sf->selective_ref_frame = 3;
276 sf->inter_tx_size_search_init_depth_rect = 1;
277 sf->inter_tx_size_search_init_depth_sqr = 1;
278
279 sf->fast_cdef_search = 1;
280
281 sf->adaptive_rd_thresh = 1;
282 sf->mv.auto_mv_step_size = 1;
283 sf->mv.subpel_iters_per_step = 1;
284 sf->disable_filter_search_var_thresh = 100;
285 sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
286
287 sf->partition_search_breakout_rate_thr = 80;
288 sf->allow_partition_search_skip = 1;
289 sf->disable_wedge_search_var_thresh = 100;
290 sf->disable_wedge_search_edge_thresh = 0;
291 sf->disable_interinter_wedge_newmv_search = 1;
292 sf->fast_wedge_sign_estimate = 1;
293 sf->disable_dual_filter = 1;
294 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
295 sf->prune_comp_type_by_comp_avg = 2;
296 // TODO(Sachin): Enable/Enhance this speed feature for speed 2 & 3
297 sf->cb_pred_filter_search = 0;
298 sf->adaptive_interp_filter_search = 1;
299 sf->perform_coeff_opt = boosted ? 0 : 2;
300 }
301
302 if (speed >= 3) {
303 sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL;
304 sf->less_rectangular_check_level = 2;
305 sf->adaptive_pred_interp_filter = 1;
306 // adaptive_motion_search breaks encoder multi-thread tests.
307 // The values in x->pred_mv[] differ for single and multi-thread cases.
308 // See aomedia:1778.
309 // sf->adaptive_motion_search = 1;
310 sf->recode_loop = ALLOW_RECODE_KFARFGF;
311 sf->use_transform_domain_distortion = boosted ? 1 : 2;
312 sf->use_accurate_subpel_search = USE_2_TAPS;
313 sf->adaptive_rd_thresh = 2;
314 if (cpi->oxcf.enable_smooth_interintra) {
315 sf->disable_smooth_interintra =
316 (boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame)
317 ? 0
318 : 1;
319 }
320 sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
321 sf->gm_search_type = GM_DISABLE_SEARCH;
322 sf->prune_comp_search_by_single_result = 2;
323 sf->prune_motion_mode_level = boosted ? 2 : 3;
324 sf->prune_warp_using_wmtype = 1;
325 // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
326 // it with cpi->sf.disable_wedge_search_var_thresh.
327 sf->disable_wedge_interintra_search = 1;
328 // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2
329 // and clean-up the speed feature
330 sf->perform_best_rd_based_gating_for_chroma = 1;
331 sf->prune_ref_frame_for_rect_partitions =
332 frame_is_intra_only(&cpi->common) ? 0 : (boosted ? 1 : 2);
333 sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 3;
334 sf->prune_comp_type_by_model_rd = boosted ? 0 : 1;
335 // TODO(Venkat): Clean-up frame type dependency for
336 // simple_motion_search_split_only in partition search function and set the
337 // speed feature accordingly
338 // TODO(Venkat): Evaluate this speed feature for speed 1 & 2
339 sf->simple_motion_search_split_only =
340 cm->allow_screen_content_tools ? 1 : 2;
341 sf->disable_smooth_intra =
342 !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key != 1);
343 }
344
345 if (speed >= 4) {
346 sf->use_intra_txb_hash = 0;
347 sf->tx_type_search.fast_intra_tx_type_search = 1;
348 sf->disable_loop_restoration_chroma =
349 (boosted || cm->allow_screen_content_tools) ? 0 : 1;
350 sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
351 sf->adaptive_pred_interp_filter = 0;
352 sf->cb_pred_filter_search = 1;
353 sf->adaptive_mode_search = 1;
354 sf->alt_ref_search_fp = 1;
355 sf->skip_sharp_interp_filter_search = 1;
356 sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 4;
357 sf->adaptive_txb_search_level = boosted ? 2 : 3;
358 }
359
360 if (speed >= 5) {
361 sf->recode_loop = ALLOW_RECODE_KFMAXBW;
362 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
363 sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
364 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
365 sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
366 sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
367 sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
368 sf->tx_size_search_method = USE_LARGESTALL;
369 sf->mv.search_method = BIGDIA;
370 sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
371 sf->adaptive_rd_thresh = 4;
372 sf->mode_search_skip_flags =
373 (cm->current_frame.frame_type == KEY_FRAME)
374 ? 0
375 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
376 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
377 FLAG_EARLY_TERMINATE;
378 sf->disable_filter_search_var_thresh = 200;
379 sf->use_fast_coef_costing = 1;
380 sf->partition_search_breakout_rate_thr = 300;
381 sf->use_transform_domain_distortion = 2;
382 }
383
384 if (speed >= 6) {
385 int i;
386 sf->optimize_coefficients = NO_TRELLIS_OPT;
387 sf->mv.search_method = HEX;
388 sf->disable_filter_search_var_thresh = 500;
389 for (i = 0; i < TX_SIZES; ++i) {
390 sf->intra_y_mode_mask[i] = INTRA_DC;
391 sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
392 }
393 sf->partition_search_breakout_rate_thr = 500;
394 sf->mv.reduce_first_step_size = 1;
395 sf->simple_model_rd_from_var = 1;
396 }
397 if (speed >= 7) {
398 sf->default_max_partition_size = BLOCK_32X32;
399 sf->default_min_partition_size = BLOCK_8X8;
400 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
401 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
402 sf->frame_parameter_update = 0;
403 sf->mv.search_method = FAST_HEX;
404 sf->partition_search_type = REFERENCE_PARTITION;
405 sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
406 // TODO(any): evaluate adaptive_mode_search=1 for speed 7 & 8
407 sf->adaptive_mode_search = 2;
408 }
409 if (speed >= 8) {
410 sf->mv.search_method = FAST_DIAMOND;
411 sf->mv.subpel_force_stop = HALF_PEL;
412 sf->lpf_pick = LPF_PICK_FROM_Q;
413 }
414 }
415
416 // TODO(kyslov): now this is very similar to
417 // set_good_speed_features_framesize_independent
418 // except it sets non-rd flag on speed8. This function will likely
419 // be modified in the future with RT-specific speed features
set_rt_speed_features_framesize_independent(AV1_COMP * cpi,SPEED_FEATURES * sf,int speed)420 static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
421 SPEED_FEATURES *sf,
422 int speed) {
423 AV1_COMMON *const cm = &cpi->common;
424 const int boosted = frame_is_boosted(cpi);
425
426 // Speed 0 for all speed features that give neutral coding performance change.
427 sf->reduce_inter_modes = 1;
428 sf->prune_ext_partition_types_search_level = 1;
429 sf->ml_prune_rect_partition = 1;
430 sf->ml_prune_ab_partition = 1;
431 sf->ml_prune_4_partition = 1;
432 sf->adaptive_txb_search_level = 1;
433 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
434 sf->model_based_prune_tx_search_level = 1;
435 sf->model_based_post_interp_filter_breakout = 1;
436 sf->model_based_motion_mode_rd_breakout = 1;
437
438 // TODO(debargha): Test, tweak and turn on either 1 or 2
439 sf->inter_mode_rd_model_estimation = 0;
440 sf->inter_mode_rd_model_estimation_adaptive = 0;
441 sf->two_loop_comp_search = 0;
442
443 sf->prune_ref_frame_for_rect_partitions = !boosted;
444 sf->less_rectangular_check_level = 1;
445 sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
446 sf->gm_disable_recode = 1;
447 sf->use_fast_interpolation_filter_search = 1;
448 sf->intra_tx_size_search_init_depth_sqr = 1;
449 sf->intra_angle_estimation = 1;
450 sf->selective_ref_frame = 1;
451 sf->prune_wedge_pred_diff_based = 1;
452 sf->disable_wedge_search_var_thresh = 0;
453 sf->disable_wedge_search_edge_thresh = 0;
454 sf->prune_motion_mode_level = 1;
455 sf->cb_pred_filter_search = 0;
456 sf->use_nonrd_pick_mode = 0;
457 sf->use_real_time_ref_set = 0;
458
459 if (speed >= 1) {
460 sf->gm_erroradv_type = GM_ERRORADV_TR_1;
461 sf->selective_ref_frame = 2;
462
463 sf->intra_tx_size_search_init_depth_rect = 1;
464 sf->tx_size_search_lgr_block = 1;
465 sf->prune_ext_partition_types_search_level = 2;
466 sf->skip_repeat_interpolation_filter_search = 1;
467 sf->tx_type_search.skip_tx_search = 1;
468 sf->tx_type_search.ml_tx_split_thresh = 40;
469 sf->model_based_prune_tx_search_level = 0;
470 sf->adaptive_txb_search_level = 2;
471 sf->use_intra_txb_hash = 1;
472 sf->optimize_b_precheck = 1;
473 sf->dual_sgr_penalty_level = 1;
474 sf->use_accurate_subpel_search = USE_4_TAPS;
475 sf->reuse_inter_intra_mode = 1;
476 sf->prune_comp_search_by_single_result = 1;
477 sf->skip_repeated_newmv = 1;
478 sf->obmc_full_pixel_search_level = 1;
479 // TODO(anyone): Following speed feature will be further explored to
480 // identify the appropriate tradeoff between encoder performance and its
481 // speed.
482 sf->prune_single_motion_modes_by_simple_trans = 1;
483
484 sf->simple_motion_search_prune_rect = 1;
485
486 sf->disable_wedge_search_var_thresh = 0;
487 sf->disable_wedge_search_edge_thresh = 0;
488 sf->prune_comp_type_by_comp_avg = 1;
489 sf->prune_motion_mode_level = 2;
490 sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
491 sf->cb_pred_filter_search = 1;
492 sf->use_transform_domain_distortion = boosted ? 0 : 1;
493 }
494
495 if (speed >= 2) {
496 sf->gm_erroradv_type = GM_ERRORADV_TR_2;
497
498 sf->selective_ref_frame = 3;
499 sf->inter_tx_size_search_init_depth_rect = 1;
500 sf->inter_tx_size_search_init_depth_sqr = 1;
501 sf->fast_cdef_search = 1;
502
503 sf->adaptive_rd_thresh = 1;
504 sf->mv.auto_mv_step_size = 1;
505 sf->mv.subpel_iters_per_step = 1;
506 sf->disable_filter_search_var_thresh = 100;
507 sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
508
509 sf->partition_search_breakout_rate_thr = 80;
510 sf->allow_partition_search_skip = 1;
511 sf->disable_wedge_search_var_thresh = 100;
512 sf->disable_wedge_search_edge_thresh = 0;
513 sf->fast_wedge_sign_estimate = 1;
514 sf->disable_dual_filter = 1;
515 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
516 sf->prune_comp_type_by_comp_avg = 2;
517 sf->cb_pred_filter_search = 0;
518 sf->adaptive_interp_filter_search = 1;
519 }
520
521 if (speed >= 3) {
522 sf->selective_ref_frame = 4;
523 sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL;
524 sf->less_rectangular_check_level = 2;
525 sf->adaptive_pred_interp_filter = 1;
526 // adaptive_motion_search breaks encoder multi-thread tests.
527 // The values in x->pred_mv[] differ for single and multi-thread cases.
528 // See aomedia:1778.
529 // sf->adaptive_motion_search = 1;
530 sf->recode_loop = ALLOW_RECODE_KFARFGF;
531 sf->use_transform_domain_distortion = 1;
532 sf->use_accurate_subpel_search = USE_2_TAPS;
533 sf->adaptive_rd_thresh = 2;
534 sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
535 sf->gm_search_type = GM_DISABLE_SEARCH;
536 sf->prune_comp_search_by_single_result = 2;
537 sf->prune_motion_mode_level = boosted ? 2 : 3;
538 sf->prune_warp_using_wmtype = 1;
539 // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
540 // it with cpi->sf.disable_wedge_search_var_thresh.
541 sf->disable_wedge_interintra_search = 1;
542 }
543
544 if (speed >= 4) {
545 sf->use_intra_txb_hash = 0;
546 sf->use_mb_rd_hash = 0;
547 sf->tx_type_search.fast_intra_tx_type_search = 1;
548 sf->tx_type_search.fast_inter_tx_type_search = 1;
549 sf->tx_size_search_method =
550 frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
551 sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
552 sf->adaptive_pred_interp_filter = 0;
553 sf->adaptive_mode_search = 1;
554 sf->alt_ref_search_fp = 1;
555 sf->skip_sharp_interp_filter_search = 1;
556 }
557
558 if (speed >= 5) {
559 sf->recode_loop = ALLOW_RECODE_KFMAXBW;
560 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
561 sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
562 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
563 sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
564 sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
565 sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
566 sf->tx_size_search_method = USE_LARGESTALL;
567 sf->mv.search_method = BIGDIA;
568 sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
569 sf->adaptive_rd_thresh = 4;
570 sf->mode_search_skip_flags =
571 (cm->current_frame.frame_type == KEY_FRAME)
572 ? 0
573 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
574 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
575 FLAG_EARLY_TERMINATE;
576 sf->disable_filter_search_var_thresh = 200;
577 sf->use_fast_coef_costing = 1;
578 sf->partition_search_breakout_rate_thr = 300;
579 sf->use_transform_domain_distortion = 2;
580 }
581
582 if (speed >= 6) {
583 int i;
584 sf->optimize_coefficients = NO_TRELLIS_OPT;
585 sf->mv.search_method = HEX;
586 sf->disable_filter_search_var_thresh = 500;
587 for (i = 0; i < TX_SIZES; ++i) {
588 sf->intra_y_mode_mask[i] = INTRA_DC;
589 sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
590 }
591 sf->partition_search_breakout_rate_thr = 500;
592 sf->mv.reduce_first_step_size = 1;
593 sf->simple_model_rd_from_var = 1;
594 }
595 if (speed >= 7) {
596 sf->default_max_partition_size = BLOCK_32X32;
597 sf->default_min_partition_size = BLOCK_8X8;
598 sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
599 sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
600 sf->frame_parameter_update = 0;
601 sf->mv.search_method = FAST_HEX;
602 sf->partition_search_type = REFERENCE_PARTITION;
603 sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
604 }
605 if (speed >= 8) {
606 sf->mv.search_method = FAST_DIAMOND;
607 sf->lpf_pick = LPF_PICK_FROM_Q;
608 sf->default_max_partition_size = BLOCK_128X128;
609 sf->default_min_partition_size = BLOCK_8X8;
610 sf->partition_search_type = VAR_BASED_PARTITION;
611 sf->use_real_time_ref_set = 1;
612 // Can't use LARGEST TX mode with pre-calculated partition
613 // and disabled TX64
614 if (!cpi->oxcf.enable_tx64) sf->tx_size_search_method = USE_FAST_RD;
615 sf->use_nonrd_pick_mode = 1;
616 sf->inter_mode_rd_model_estimation = 2;
617 }
618 }
619
av1_set_speed_features_framesize_dependent(AV1_COMP * cpi,int speed)620 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
621 SPEED_FEATURES *const sf = &cpi->sf;
622 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
623
624 if (oxcf->mode == GOOD) {
625 set_good_speed_feature_framesize_dependent(cpi, sf, speed);
626 }
627
628 // This is only used in motion vector unit test.
629 if (cpi->oxcf.motion_vector_unit_test == 1)
630 cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
631 else if (cpi->oxcf.motion_vector_unit_test == 2)
632 cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
633 }
634
av1_set_speed_features_framesize_independent(AV1_COMP * cpi,int speed)635 void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
636 AV1_COMMON *const cm = &cpi->common;
637 SPEED_FEATURES *const sf = &cpi->sf;
638 MACROBLOCK *const x = &cpi->td.mb;
639 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
640 int i;
641
642 // best quality defaults
643 sf->frame_parameter_update = 1;
644 sf->mv.search_method = NSTEP;
645 sf->recode_loop = ALLOW_RECODE;
646 sf->mv.subpel_search_method = SUBPEL_TREE;
647 sf->mv.subpel_iters_per_step = 2;
648 sf->mv.subpel_force_stop = EIGHTH_PEL;
649 if (cpi->oxcf.disable_trellis_quant == 3) {
650 sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf)
651 ? NO_ESTIMATE_YRD_TRELLIS_OPT
652 : NO_TRELLIS_OPT;
653 } else if (cpi->oxcf.disable_trellis_quant == 2) {
654 sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf)
655 ? FINAL_PASS_TRELLIS_OPT
656 : NO_TRELLIS_OPT;
657 } else if (cpi->oxcf.disable_trellis_quant == 0) {
658 if (is_lossless_requested(&cpi->oxcf))
659 sf->optimize_coefficients = NO_TRELLIS_OPT;
660 else
661 sf->optimize_coefficients = FULL_TRELLIS_OPT;
662 } else if (cpi->oxcf.disable_trellis_quant == 1) {
663 sf->optimize_coefficients = NO_TRELLIS_OPT;
664 } else {
665 assert(0 && "Invalid disable_trellis_quant value");
666 }
667 sf->gm_erroradv_type = GM_ERRORADV_TR_0;
668 sf->mv.reduce_first_step_size = 0;
669 sf->mv.auto_mv_step_size = 0;
670 sf->comp_inter_joint_search_thresh = BLOCK_4X4;
671 sf->adaptive_rd_thresh = 0;
672 // TODO(sarahparker) Pair this with a speed setting once experiments are done
673 sf->trellis_eob_fast = 0;
674 sf->tx_size_search_method = cpi->oxcf.tx_size_search_method;
675 sf->inter_tx_size_search_init_depth_sqr = 0;
676 sf->inter_tx_size_search_init_depth_rect = 0;
677 sf->intra_tx_size_search_init_depth_rect = 0;
678 sf->intra_tx_size_search_init_depth_sqr = 0;
679 sf->tx_size_search_lgr_block = 0;
680 sf->model_based_prune_tx_search_level = 0;
681 sf->model_based_post_interp_filter_breakout = 0;
682 sf->model_based_motion_mode_rd_breakout = 0;
683 sf->reduce_inter_modes = 0;
684 sf->selective_ref_gm = 1;
685 sf->adaptive_motion_search = 0;
686 sf->adaptive_pred_interp_filter = 0;
687 sf->adaptive_mode_search = 0;
688 sf->alt_ref_search_fp = 0;
689 sf->partition_search_type = SEARCH_PARTITION;
690 sf->tx_type_search.prune_mode = PRUNE_2D_ACCURATE;
691 sf->tx_type_search.ml_tx_split_thresh = 30;
692 sf->tx_type_search.use_skip_flag_prediction = 1;
693 sf->tx_type_search.fast_intra_tx_type_search = 0;
694 sf->tx_type_search.fast_inter_tx_type_search = 0;
695 sf->tx_type_search.skip_tx_search = 0;
696 sf->selective_ref_frame = 0;
697 sf->less_rectangular_check_level = 0;
698 sf->use_square_partition_only_threshold = BLOCK_128X128;
699 sf->prune_ref_frame_for_rect_partitions = 0;
700 sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
701 sf->auto_min_partition_based_on_simple_motion = 0;
702 sf->rd_auto_partition_min_limit = BLOCK_4X4;
703 sf->default_max_partition_size = BLOCK_LARGEST;
704 sf->default_min_partition_size = BLOCK_4X4;
705 sf->adjust_partitioning_from_last_frame = 0;
706 sf->mode_search_skip_flags = 0;
707 sf->disable_filter_search_var_thresh = 0;
708 sf->allow_partition_search_skip = 0;
709 sf->use_accurate_subpel_search = USE_8_TAPS;
710 sf->disable_wedge_search_edge_thresh = 0;
711 sf->use_first_partition_pass_interintra_stats = 0;
712 sf->disable_wedge_search_var_thresh = 0;
713 sf->disable_loop_restoration_chroma = 0;
714 sf->fast_wedge_sign_estimate = 0;
715 sf->prune_wedge_pred_diff_based = 0;
716 sf->drop_ref = 0;
717 sf->skip_intra_in_interframe = 1;
718 sf->txb_split_cap = 1;
719 sf->adaptive_txb_search_level = 0;
720 sf->two_pass_partition_search = 0;
721 sf->firstpass_simple_motion_search_early_term = 0;
722 sf->use_intra_txb_hash = 0;
723 sf->use_inter_txb_hash = 1;
724 sf->use_mb_rd_hash = 1;
725 sf->optimize_b_precheck = 0;
726 sf->two_loop_comp_search = 1;
727 sf->second_loop_comp_fast_tx_search = 0;
728 sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED;
729 sf->reuse_inter_intra_mode = 0;
730 sf->intra_angle_estimation = 0;
731 sf->skip_obmc_in_uniform_mv_field = 0;
732 sf->skip_wm_in_uniform_mv_field = 0;
733 sf->adaptive_interp_filter_search = 0;
734
735 for (i = 0; i < TX_SIZES; i++) {
736 sf->intra_y_mode_mask[i] = INTRA_ALL;
737 sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
738 }
739 sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
740 sf->use_fast_coef_costing = 0;
741 sf->max_intra_bsize = BLOCK_LARGEST;
742 // This setting only takes effect when partition_search_type is set
743 // to FIXED_PARTITION.
744 sf->always_this_block_size = BLOCK_16X16;
745 // Recode loop tolerance %.
746 sf->recode_tolerance = 25;
747 sf->partition_search_breakout_dist_thr = 0;
748 sf->partition_search_breakout_rate_thr = 0;
749 sf->simple_model_rd_from_var = 0;
750 sf->prune_ext_partition_types_search_level = 0;
751 sf->ml_prune_rect_partition = 0;
752 sf->ml_prune_ab_partition = 0;
753 sf->ml_prune_4_partition = 0;
754 sf->fast_cdef_search = 0;
755 for (i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
756 sf->ml_partition_search_breakout_thresh[i] = -1; // -1 means not enabled.
757 }
758 sf->simple_motion_search_split_only = 0;
759 sf->simple_motion_search_prune_rect = 0;
760 sf->simple_motion_search_early_term_none = 0;
761
762 // Set this at the appropriate speed levels
763 sf->use_transform_domain_distortion = 0;
764 sf->gm_search_type = GM_FULL_SEARCH;
765 sf->gm_disable_recode = 0;
766 sf->use_fast_interpolation_filter_search = 0;
767 sf->disable_dual_filter = 0;
768 sf->skip_repeat_interpolation_filter_search = 0;
769 sf->use_hash_based_trellis = 0;
770 sf->prune_comp_search_by_single_result = 0;
771 sf->skip_repeated_newmv = 0;
772 sf->prune_single_motion_modes_by_simple_trans = 0;
773
774 // Set decoder side speed feature to use less dual sgr modes
775 sf->dual_sgr_penalty_level = 0;
776
777 // TODO(angiebird, debargha): Re-evaluate the impact of
778 // inter_mode_rd_model_estimation in conjunction with
779 // model_based_motion_mode_rd_breakout
780 sf->inter_mode_rd_model_estimation = 0;
781 sf->inter_mode_rd_model_estimation_adaptive = 0;
782
783 sf->obmc_full_pixel_search_level = 0;
784 sf->skip_sharp_interp_filter_search = 0;
785 sf->prune_comp_type_by_comp_avg = 0;
786 sf->disable_interinter_wedge_newmv_search = 0;
787 sf->disable_smooth_interintra = 0;
788 sf->prune_motion_mode_level = 0;
789 sf->prune_warp_using_wmtype = 0;
790 sf->disable_wedge_interintra_search = 0;
791 sf->perform_coeff_opt = 0;
792 sf->prune_comp_type_by_model_rd = 0;
793 sf->disable_smooth_intra = 0;
794 sf->perform_best_rd_based_gating_for_chroma = 0;
795
796 if (oxcf->mode == GOOD)
797 set_good_speed_features_framesize_independent(cpi, sf, speed);
798 else if (oxcf->mode == REALTIME)
799 set_rt_speed_features_framesize_independent(cpi, sf, speed);
800
801 if (!cpi->seq_params_locked) {
802 cpi->common.seq_params.enable_dual_filter &= !sf->disable_dual_filter;
803 }
804
805 // sf->partition_search_breakout_dist_thr is set assuming max 64x64
806 // blocks. Normalise this if the blocks are bigger.
807 if (MAX_SB_SIZE_LOG2 > 6) {
808 sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6);
809 }
810
811 cpi->diamond_search_sad = av1_diamond_search_sad;
812
813 sf->allow_exhaustive_searches = 1;
814
815 const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
816 if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
817 sf->exhaustive_searches_thresh = (1 << 24);
818 else
819 sf->exhaustive_searches_thresh = (1 << 25);
820 sf->max_exaustive_pct = good_quality_max_mesh_pct[mesh_speed];
821 if (mesh_speed > 0)
822 sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
823
824 for (i = 0; i < MAX_MESH_STEP; ++i) {
825 sf->mesh_patterns[i].range =
826 good_quality_mesh_patterns[mesh_speed][i].range;
827 sf->mesh_patterns[i].interval =
828 good_quality_mesh_patterns[mesh_speed][i].interval;
829 }
830 if ((frame_is_intra_only(cm) && cm->allow_screen_content_tools) &&
831 (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
832 cpi->oxcf.content == AOM_CONTENT_SCREEN)) {
833 for (i = 0; i < MAX_MESH_STEP; ++i) {
834 sf->mesh_patterns[i].range = intrabc_mesh_patterns[mesh_speed][i].range;
835 sf->mesh_patterns[i].interval =
836 intrabc_mesh_patterns[mesh_speed][i].interval;
837 }
838 sf->max_exaustive_pct = intrabc_max_mesh_pct[mesh_speed];
839 }
840
841 // Slow quant, dct and trellis not worthwhile for first pass
842 // so make sure they are always turned off.
843 if (oxcf->pass == 1) sf->optimize_coefficients = NO_TRELLIS_OPT;
844
845 // No recode or trellis for 1 pass.
846 if (oxcf->pass == 0) {
847 sf->recode_loop = DISALLOW_RECODE;
848 sf->optimize_coefficients = NO_TRELLIS_OPT;
849 }
850 // FIXME: trellis not very efficient for quantization matrices
851 if (oxcf->using_qm) sf->optimize_coefficients = NO_TRELLIS_OPT;
852
853 if (sf->mv.subpel_search_method == SUBPEL_TREE) {
854 cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree;
855 } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
856 cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned;
857 } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
858 cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_more;
859 } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) {
860 cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_evenmore;
861 }
862
863 x->min_partition_size = sf->default_min_partition_size;
864 x->max_partition_size = sf->default_max_partition_size;
865
866 // This is only used in motion vector unit test.
867 if (cpi->oxcf.motion_vector_unit_test == 1)
868 cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
869 else if (cpi->oxcf.motion_vector_unit_test == 2)
870 cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
871 cpi->max_comp_type_rd_threshold_mul =
872 comp_type_rd_threshold_mul[sf->prune_comp_type_by_comp_avg];
873 cpi->max_comp_type_rd_threshold_div =
874 comp_type_rd_threshold_div[sf->prune_comp_type_by_comp_avg];
875 const int tx_domain_speed = AOMMIN(speed, MAX_TX_DOMAIN_EVAL_SPEED);
876 cpi->tx_domain_dist_threshold = tx_domain_dist_thresholds[tx_domain_speed];
877
878 // assert ensures that coeff_opt_dist_thresholds is accessed correctly
879 assert(cpi->sf.perform_coeff_opt >= 0 && cpi->sf.perform_coeff_opt < 5);
880 cpi->coeff_opt_dist_threshold =
881 coeff_opt_dist_thresholds[cpi->sf.perform_coeff_opt];
882
883 #if CONFIG_DIST_8X8
884 if (sf->use_transform_domain_distortion > 0) cpi->oxcf.using_dist_8x8 = 0;
885
886 if (cpi->oxcf.using_dist_8x8) x->min_partition_size = BLOCK_8X8;
887 #endif // CONFIG_DIST_8X8
888 if (cpi->oxcf.row_mt == 1 && (cpi->oxcf.max_threads > 1)) {
889 sf->adaptive_rd_thresh = 0;
890 if (sf->inter_mode_rd_model_estimation == 1) {
891 sf->inter_mode_rd_model_estimation = 0;
892 sf->inter_mode_rd_model_estimation_adaptive = 0;
893 }
894 }
895 }
896