1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72 #include "av1/encoder/var_based_part.h"
73
74 #define LAST_NEW_MV_INDEX 6
75
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81 10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82 9975, 9951, 9927, 9903, 9879, 9854, 9830, 9806, 9782, 9758, 9734,
83 9710, 9686, 9662, 9638, 9614, 9589, 9565, 9541, 9517, 9493, 9469,
84 9445, 9421, 9397, 9373, 9349, 9324, 9300, 9276, 9252, 9228, 9204,
85 9180, 9156, 9132, 9108, 9083, 9059, 9035, 9011, 8987, 8963, 8939,
86 8915, 8891, 8867, 8843, 8818, 8794, 8770, 8746, 8722, 8698, 8674,
87 8650, 8626, 8602, 8578, 8553, 8529, 8505, 8481, 8457, 8433, 8409,
88 8385, 8361, 8337, 8312, 8288, 8264, 8240, 8216, 8192, 8168, 8144,
89 8120, 8096, 8072, 8047, 8023, 7999, 7975, 7951, 7927, 7903, 7879,
90 7855, 7831, 7806, 7782, 7758, 7734, 7710, 7686, 7662, 7638, 7614,
91 7590, 7566, 7541, 7517, 7493, 7469, 7445, 7421, 7397, 7373, 7349,
92 7325, 7301, 7276, 7252, 7228, 7204, 7180, 7156, 7132, 7108, 7084,
93 7060, 7035, 7011, 6987, 6963, 6939, 6915, 6891, 6867, 6843, 6819,
94 6795, 6770, 6746, 6722, 6698, 6674, 6650, 6626, 6602, 6578, 6554,
95 6530, 6505, 6481, 6457, 6433, 6409, 6385, 6361, 6337, 6313, 6289,
96 6264, 6240, 6216, 6192, 6168, 6144, 6120, 6096, 6072, 6048, 6024,
97 5999, 5975, 5951, 5927, 5903, 5879, 5855, 5831, 5807, 5783, 5758,
98 5734, 5710, 5686, 5662, 5638, 5614, 5590, 5566, 5542, 5518, 5493,
99 5469, 5445, 5421, 5397, 5373, 5349, 5325, 5301, 5277, 5253, 5228,
100 5204, 5180, 5156, 5132, 5108, 5084, 5060, 5036, 5012, 4987, 4963,
101 4939, 4915, 4891, 4867, 4843, 4819, 4795, 4771, 4747, 4722, 4698,
102 4674, 4650, 4626, 4602, 4578, 4554, 4530, 4506, 4482, 4457, 4433,
103 4409, 4385, 4361, 4337, 4313, 4289, 4265, 4241, 4216, 4192, 4168,
104 4144, 4120, 4096
105 };
106
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108 THR_NEARESTMV,
109 THR_NEARESTL2,
110 THR_NEARESTL3,
111 THR_NEARESTB,
112 THR_NEARESTA2,
113 THR_NEARESTA,
114 THR_NEARESTG,
115
116 THR_NEWMV,
117 THR_NEWL2,
118 THR_NEWL3,
119 THR_NEWB,
120 THR_NEWA2,
121 THR_NEWA,
122 THR_NEWG,
123
124 THR_NEARMV,
125 THR_NEARL2,
126 THR_NEARL3,
127 THR_NEARB,
128 THR_NEARA2,
129 THR_NEARA,
130 THR_NEARG,
131
132 THR_GLOBALMV,
133 THR_GLOBALL2,
134 THR_GLOBALL3,
135 THR_GLOBALB,
136 THR_GLOBALA2,
137 THR_GLOBALA,
138 THR_GLOBALG,
139
140 THR_COMP_NEAREST_NEARESTLA,
141 THR_COMP_NEAREST_NEARESTL2A,
142 THR_COMP_NEAREST_NEARESTL3A,
143 THR_COMP_NEAREST_NEARESTGA,
144 THR_COMP_NEAREST_NEARESTLB,
145 THR_COMP_NEAREST_NEARESTL2B,
146 THR_COMP_NEAREST_NEARESTL3B,
147 THR_COMP_NEAREST_NEARESTGB,
148 THR_COMP_NEAREST_NEARESTLA2,
149 THR_COMP_NEAREST_NEARESTL2A2,
150 THR_COMP_NEAREST_NEARESTL3A2,
151 THR_COMP_NEAREST_NEARESTGA2,
152 THR_COMP_NEAREST_NEARESTLL2,
153 THR_COMP_NEAREST_NEARESTLL3,
154 THR_COMP_NEAREST_NEARESTLG,
155 THR_COMP_NEAREST_NEARESTBA,
156
157 THR_COMP_NEAR_NEARLB,
158 THR_COMP_NEW_NEWLB,
159 THR_COMP_NEW_NEARESTLB,
160 THR_COMP_NEAREST_NEWLB,
161 THR_COMP_NEW_NEARLB,
162 THR_COMP_NEAR_NEWLB,
163 THR_COMP_GLOBAL_GLOBALLB,
164
165 THR_COMP_NEAR_NEARLA,
166 THR_COMP_NEW_NEWLA,
167 THR_COMP_NEW_NEARESTLA,
168 THR_COMP_NEAREST_NEWLA,
169 THR_COMP_NEW_NEARLA,
170 THR_COMP_NEAR_NEWLA,
171 THR_COMP_GLOBAL_GLOBALLA,
172
173 THR_COMP_NEAR_NEARL2A,
174 THR_COMP_NEW_NEWL2A,
175 THR_COMP_NEW_NEARESTL2A,
176 THR_COMP_NEAREST_NEWL2A,
177 THR_COMP_NEW_NEARL2A,
178 THR_COMP_NEAR_NEWL2A,
179 THR_COMP_GLOBAL_GLOBALL2A,
180
181 THR_COMP_NEAR_NEARL3A,
182 THR_COMP_NEW_NEWL3A,
183 THR_COMP_NEW_NEARESTL3A,
184 THR_COMP_NEAREST_NEWL3A,
185 THR_COMP_NEW_NEARL3A,
186 THR_COMP_NEAR_NEWL3A,
187 THR_COMP_GLOBAL_GLOBALL3A,
188
189 THR_COMP_NEAR_NEARGA,
190 THR_COMP_NEW_NEWGA,
191 THR_COMP_NEW_NEARESTGA,
192 THR_COMP_NEAREST_NEWGA,
193 THR_COMP_NEW_NEARGA,
194 THR_COMP_NEAR_NEWGA,
195 THR_COMP_GLOBAL_GLOBALGA,
196
197 THR_COMP_NEAR_NEARL2B,
198 THR_COMP_NEW_NEWL2B,
199 THR_COMP_NEW_NEARESTL2B,
200 THR_COMP_NEAREST_NEWL2B,
201 THR_COMP_NEW_NEARL2B,
202 THR_COMP_NEAR_NEWL2B,
203 THR_COMP_GLOBAL_GLOBALL2B,
204
205 THR_COMP_NEAR_NEARL3B,
206 THR_COMP_NEW_NEWL3B,
207 THR_COMP_NEW_NEARESTL3B,
208 THR_COMP_NEAREST_NEWL3B,
209 THR_COMP_NEW_NEARL3B,
210 THR_COMP_NEAR_NEWL3B,
211 THR_COMP_GLOBAL_GLOBALL3B,
212
213 THR_COMP_NEAR_NEARGB,
214 THR_COMP_NEW_NEWGB,
215 THR_COMP_NEW_NEARESTGB,
216 THR_COMP_NEAREST_NEWGB,
217 THR_COMP_NEW_NEARGB,
218 THR_COMP_NEAR_NEWGB,
219 THR_COMP_GLOBAL_GLOBALGB,
220
221 THR_COMP_NEAR_NEARLA2,
222 THR_COMP_NEW_NEWLA2,
223 THR_COMP_NEW_NEARESTLA2,
224 THR_COMP_NEAREST_NEWLA2,
225 THR_COMP_NEW_NEARLA2,
226 THR_COMP_NEAR_NEWLA2,
227 THR_COMP_GLOBAL_GLOBALLA2,
228
229 THR_COMP_NEAR_NEARL2A2,
230 THR_COMP_NEW_NEWL2A2,
231 THR_COMP_NEW_NEARESTL2A2,
232 THR_COMP_NEAREST_NEWL2A2,
233 THR_COMP_NEW_NEARL2A2,
234 THR_COMP_NEAR_NEWL2A2,
235 THR_COMP_GLOBAL_GLOBALL2A2,
236
237 THR_COMP_NEAR_NEARL3A2,
238 THR_COMP_NEW_NEWL3A2,
239 THR_COMP_NEW_NEARESTL3A2,
240 THR_COMP_NEAREST_NEWL3A2,
241 THR_COMP_NEW_NEARL3A2,
242 THR_COMP_NEAR_NEWL3A2,
243 THR_COMP_GLOBAL_GLOBALL3A2,
244
245 THR_COMP_NEAR_NEARGA2,
246 THR_COMP_NEW_NEWGA2,
247 THR_COMP_NEW_NEARESTGA2,
248 THR_COMP_NEAREST_NEWGA2,
249 THR_COMP_NEW_NEARGA2,
250 THR_COMP_NEAR_NEWGA2,
251 THR_COMP_GLOBAL_GLOBALGA2,
252
253 THR_COMP_NEAR_NEARLL2,
254 THR_COMP_NEW_NEWLL2,
255 THR_COMP_NEW_NEARESTLL2,
256 THR_COMP_NEAREST_NEWLL2,
257 THR_COMP_NEW_NEARLL2,
258 THR_COMP_NEAR_NEWLL2,
259 THR_COMP_GLOBAL_GLOBALLL2,
260
261 THR_COMP_NEAR_NEARLL3,
262 THR_COMP_NEW_NEWLL3,
263 THR_COMP_NEW_NEARESTLL3,
264 THR_COMP_NEAREST_NEWLL3,
265 THR_COMP_NEW_NEARLL3,
266 THR_COMP_NEAR_NEWLL3,
267 THR_COMP_GLOBAL_GLOBALLL3,
268
269 THR_COMP_NEAR_NEARLG,
270 THR_COMP_NEW_NEWLG,
271 THR_COMP_NEW_NEARESTLG,
272 THR_COMP_NEAREST_NEWLG,
273 THR_COMP_NEW_NEARLG,
274 THR_COMP_NEAR_NEWLG,
275 THR_COMP_GLOBAL_GLOBALLG,
276
277 THR_COMP_NEAR_NEARBA,
278 THR_COMP_NEW_NEWBA,
279 THR_COMP_NEW_NEARESTBA,
280 THR_COMP_NEAREST_NEWBA,
281 THR_COMP_NEW_NEARBA,
282 THR_COMP_NEAR_NEWBA,
283 THR_COMP_GLOBAL_GLOBALBA,
284
285 THR_DC,
286 THR_PAETH,
287 THR_SMOOTH,
288 THR_SMOOTH_V,
289 THR_SMOOTH_H,
290 THR_H_PRED,
291 THR_V_PRED,
292 THR_D135_PRED,
293 THR_D203_PRED,
294 THR_D157_PRED,
295 THR_D67_PRED,
296 THR_D113_PRED,
297 THR_D45_PRED,
298 };
299
300 /*!\cond */
301 typedef struct SingleInterModeState {
302 int64_t rd;
303 MV_REFERENCE_FRAME ref_frame;
304 int valid;
305 } SingleInterModeState;
306
307 typedef struct InterModeSearchState {
308 int64_t best_rd;
309 int64_t best_skip_rd[2];
310 MB_MODE_INFO best_mbmode;
311 int best_rate_y;
312 int best_rate_uv;
313 int best_mode_skippable;
314 int best_skip2;
315 THR_MODES best_mode_index;
316 int num_available_refs;
317 int64_t dist_refs[REF_FRAMES];
318 int dist_order_refs[REF_FRAMES];
319 int64_t mode_threshold[MAX_MODES];
320 int64_t best_intra_rd;
321 unsigned int best_pred_sse;
322
323 /*!
324 * \brief Keep track of best intra rd for use in compound mode.
325 */
326 int64_t best_pred_rd[REFERENCE_MODES];
327 // Save a set of single_newmv for each checked ref_mv.
328 int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329 int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330 int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331 int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332 // The rd of simple translation in single inter modes
333 int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334 int64_t best_single_rd[REF_FRAMES];
335 PREDICTION_MODE best_single_mode[REF_FRAMES];
336
337 // Single search results by [directions][modes][reference frames]
338 SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339 int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340 SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341 [FWD_REFS];
342 int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343 MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344 IntraModeSearchState intra_search_state;
345 RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350 for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351 InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352 md->ready = 0;
353 md->num = 0;
354 md->dist_sum = 0;
355 md->ld_sum = 0;
356 md->sse_sum = 0;
357 md->sse_sse_sum = 0;
358 md->sse_ld_sum = 0;
359 }
360 }
361
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363 int64_t sse, int *est_residue_cost,
364 int64_t *est_dist) {
365 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366 if (md->ready) {
367 if (sse < md->dist_mean) {
368 *est_residue_cost = 0;
369 *est_dist = sse;
370 } else {
371 *est_dist = (int64_t)round(md->dist_mean);
372 const double est_ld = md->a * sse + md->b;
373 // Clamp estimated rate cost by INT_MAX / 2.
374 // TODO(angiebird@google.com): find better solution than clamping.
375 if (fabs(est_ld) < 1e-2) {
376 *est_residue_cost = INT_MAX / 2;
377 } else {
378 double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379 if (est_residue_cost_dbl < 0) {
380 *est_residue_cost = 0;
381 } else {
382 *est_residue_cost =
383 (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384 }
385 }
386 if (*est_residue_cost <= 0) {
387 *est_residue_cost = 0;
388 *est_dist = sse;
389 }
390 }
391 return 1;
392 }
393 return 0;
394 }
395
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398 const int block_idx = inter_mode_data_block_idx(bsize);
399 InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400 if (block_idx == -1) continue;
401 if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402 continue;
403 } else {
404 if (md->ready == 0) {
405 md->dist_mean = md->dist_sum / md->num;
406 md->ld_mean = md->ld_sum / md->num;
407 md->sse_mean = md->sse_sum / md->num;
408 md->sse_sse_mean = md->sse_sse_sum / md->num;
409 md->sse_ld_mean = md->sse_ld_sum / md->num;
410 } else {
411 const double factor = 3;
412 md->dist_mean =
413 (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414 md->ld_mean =
415 (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416 md->sse_mean =
417 (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418 md->sse_sse_mean =
419 (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420 (factor + 1);
421 md->sse_ld_mean =
422 (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423 (factor + 1);
424 }
425
426 const double my = md->ld_mean;
427 const double mx = md->sse_mean;
428 const double dx = sqrt(md->sse_sse_mean);
429 const double dxy = md->sse_ld_mean;
430
431 md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432 md->b = my - md->a * mx;
433 md->ready = 1;
434
435 md->num = 0;
436 md->dist_sum = 0;
437 md->ld_sum = 0;
438 md->sse_sum = 0;
439 md->sse_sse_sum = 0;
440 md->sse_ld_sum = 0;
441 }
442 (void)rdmult;
443 }
444 }
445
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
447 BLOCK_SIZE bsize, int64_t sse,
448 int64_t dist, int residue_cost) {
449 if (residue_cost == 0 || sse == dist) return;
450 const int block_idx = inter_mode_data_block_idx(bsize);
451 if (block_idx == -1) return;
452 InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453 if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454 const double ld = (sse - dist) * 1. / residue_cost;
455 ++rd_model->num;
456 rd_model->dist_sum += dist;
457 rd_model->ld_sum += ld;
458 rd_model->sse_sum += sse;
459 rd_model->sse_sse_sum += (double)sse * (double)sse;
460 rd_model->sse_ld_sum += sse * ld;
461 }
462 }
463
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
465 int mode_rate, int64_t sse,
466 int64_t rd, RD_STATS *rd_cost,
467 RD_STATS *rd_cost_y,
468 RD_STATS *rd_cost_uv,
469 const MB_MODE_INFO *mbmi) {
470 const int num = inter_modes_info->num;
471 assert(num < MAX_INTER_MODES);
472 inter_modes_info->mbmi_arr[num] = *mbmi;
473 inter_modes_info->mode_rate_arr[num] = mode_rate;
474 inter_modes_info->sse_arr[num] = sse;
475 inter_modes_info->est_rd_arr[num] = rd;
476 inter_modes_info->rd_cost_arr[num] = *rd_cost;
477 inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
478 inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
479 ++inter_modes_info->num;
480 }
481
compare_rd_idx_pair(const void * a,const void * b)482 static int compare_rd_idx_pair(const void *a, const void *b) {
483 if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
484 // To avoid inconsistency in qsort() ordering when two elements are equal,
485 // using idx as tie breaker. Refer aomedia:2928
486 if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
487 return 0;
488 else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
489 return 1;
490 else
491 return -1;
492 } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
493 return 1;
494 } else {
495 return -1;
496 }
497 }
498
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)499 static AOM_INLINE void inter_modes_info_sort(
500 const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
501 if (inter_modes_info->num == 0) {
502 return;
503 }
504 for (int i = 0; i < inter_modes_info->num; ++i) {
505 rd_idx_pair_arr[i].idx = i;
506 rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
507 }
508 qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
509 compare_rd_idx_pair);
510 }
511
512 // Similar to get_horver_correlation, but also takes into account first
513 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)514 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
515 int width, int height, float *hcorr,
516 float *vcorr) {
517 // The following notation is used:
518 // x - current pixel
519 // y - left neighbor pixel
520 // z - top neighbor pixel
521 int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
522 int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
523 int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
524
525 // First, process horizontal correlation on just the first row
526 x_sum += diff[0];
527 x2_sum += diff[0] * diff[0];
528 x_firstrow += diff[0];
529 x2_firstrow += diff[0] * diff[0];
530 for (int j = 1; j < width; ++j) {
531 const int16_t x = diff[j];
532 const int16_t y = diff[j - 1];
533 x_sum += x;
534 x_firstrow += x;
535 x2_sum += x * x;
536 x2_firstrow += x * x;
537 xy_sum += x * y;
538 }
539
540 // Process vertical correlation in the first column
541 x_firstcol += diff[0];
542 x2_firstcol += diff[0] * diff[0];
543 for (int i = 1; i < height; ++i) {
544 const int16_t x = diff[i * stride];
545 const int16_t z = diff[(i - 1) * stride];
546 x_sum += x;
547 x_firstcol += x;
548 x2_sum += x * x;
549 x2_firstcol += x * x;
550 xz_sum += x * z;
551 }
552
553 // Now process horiz and vert correlation through the rest unit
554 for (int i = 1; i < height; ++i) {
555 for (int j = 1; j < width; ++j) {
556 const int16_t x = diff[i * stride + j];
557 const int16_t y = diff[i * stride + j - 1];
558 const int16_t z = diff[(i - 1) * stride + j];
559 x_sum += x;
560 x2_sum += x * x;
561 xy_sum += x * y;
562 xz_sum += x * z;
563 }
564 }
565
566 for (int j = 0; j < width; ++j) {
567 x_finalrow += diff[(height - 1) * stride + j];
568 x2_finalrow +=
569 diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
570 }
571 for (int i = 0; i < height; ++i) {
572 x_finalcol += diff[i * stride + width - 1];
573 x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
574 }
575
576 int64_t xhor_sum = x_sum - x_finalcol;
577 int64_t xver_sum = x_sum - x_finalrow;
578 int64_t y_sum = x_sum - x_firstcol;
579 int64_t z_sum = x_sum - x_firstrow;
580 int64_t x2hor_sum = x2_sum - x2_finalcol;
581 int64_t x2ver_sum = x2_sum - x2_finalrow;
582 int64_t y2_sum = x2_sum - x2_firstcol;
583 int64_t z2_sum = x2_sum - x2_firstrow;
584
585 const float num_hor = (float)(height * (width - 1));
586 const float num_ver = (float)((height - 1) * width);
587
588 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
589 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
590
591 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
592 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
593
594 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
595 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
596
597 if (xhor_var_n > 0 && y_var_n > 0) {
598 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
599 *hcorr = *hcorr < 0 ? 0 : *hcorr;
600 } else {
601 *hcorr = 1.0;
602 }
603 if (xver_var_n > 0 && z_var_n > 0) {
604 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
605 *vcorr = *vcorr < 0 ? 0 : *vcorr;
606 } else {
607 *vcorr = 1.0;
608 }
609 }
610
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)611 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
612 int64_t *sse_y) {
613 const AV1_COMMON *cm = &cpi->common;
614 const int num_planes = av1_num_planes(cm);
615 const MACROBLOCKD *xd = &x->e_mbd;
616 const MB_MODE_INFO *mbmi = xd->mi[0];
617 int64_t total_sse = 0;
618 for (int plane = 0; plane < num_planes; ++plane) {
619 if (plane && !xd->is_chroma_ref) break;
620 const struct macroblock_plane *const p = &x->plane[plane];
621 const struct macroblockd_plane *const pd = &xd->plane[plane];
622 const BLOCK_SIZE bs =
623 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
624 unsigned int sse;
625
626 cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
627 pd->dst.stride, &sse);
628 total_sse += sse;
629 if (!plane && sse_y) *sse_y = sse;
630 }
631 total_sse <<= 4;
632 return total_sse;
633 }
634
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)635 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
636 intptr_t block_size, int64_t *ssz) {
637 int i;
638 int64_t error = 0, sqcoeff = 0;
639
640 for (i = 0; i < block_size; i++) {
641 const int diff = coeff[i] - dqcoeff[i];
642 error += diff * diff;
643 sqcoeff += coeff[i] * coeff[i];
644 }
645
646 *ssz = sqcoeff;
647 return error;
648 }
649
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)650 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
651 intptr_t block_size) {
652 int64_t error = 0;
653
654 for (int i = 0; i < block_size; i++) {
655 const int diff = coeff[i] - dqcoeff[i];
656 error += diff * diff;
657 }
658
659 return error;
660 }
661
662 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)663 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
664 const tran_low_t *dqcoeff, intptr_t block_size,
665 int64_t *ssz, int bd) {
666 int i;
667 int64_t error = 0, sqcoeff = 0;
668 int shift = 2 * (bd - 8);
669 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
670
671 for (i = 0; i < block_size; i++) {
672 const int64_t diff = coeff[i] - dqcoeff[i];
673 error += diff * diff;
674 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
675 }
676 assert(error >= 0 && sqcoeff >= 0);
677 error = (error + rounding) >> shift;
678 sqcoeff = (sqcoeff + rounding) >> shift;
679
680 *ssz = sqcoeff;
681 return error;
682 }
683 #endif
684
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)685 static int conditional_skipintra(PREDICTION_MODE mode,
686 PREDICTION_MODE best_intra_mode) {
687 if (mode == D113_PRED && best_intra_mode != V_PRED &&
688 best_intra_mode != D135_PRED)
689 return 1;
690 if (mode == D67_PRED && best_intra_mode != V_PRED &&
691 best_intra_mode != D45_PRED)
692 return 1;
693 if (mode == D203_PRED && best_intra_mode != H_PRED &&
694 best_intra_mode != D45_PRED)
695 return 1;
696 if (mode == D157_PRED && best_intra_mode != H_PRED &&
697 best_intra_mode != D135_PRED)
698 return 1;
699 return 0;
700 }
701
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)702 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
703 int16_t mode_context) {
704 if (is_inter_compound_mode(mode)) {
705 return mode_costs
706 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
707 }
708
709 int mode_cost = 0;
710 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
711
712 assert(is_inter_mode(mode));
713
714 if (mode == NEWMV) {
715 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
716 return mode_cost;
717 } else {
718 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
719 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
720
721 if (mode == GLOBALMV) {
722 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
723 return mode_cost;
724 } else {
725 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
726 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
727 mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
728 return mode_cost;
729 }
730 }
731 }
732
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)733 static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
734 int ref_idx) {
735 return ref_idx ? compound_ref1_mode(this_mode)
736 : compound_ref0_mode(this_mode);
737 }
738
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])739 static AOM_INLINE void estimate_ref_frame_costs(
740 const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
741 int segment_id, unsigned int *ref_costs_single,
742 unsigned int (*ref_costs_comp)[REF_FRAMES]) {
743 int seg_ref_active =
744 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
745 if (seg_ref_active) {
746 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
747 int ref_frame;
748 for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
749 memset(ref_costs_comp[ref_frame], 0,
750 REF_FRAMES * sizeof((*ref_costs_comp)[0]));
751 } else {
752 int intra_inter_ctx = av1_get_intra_inter_context(xd);
753 ref_costs_single[INTRA_FRAME] =
754 mode_costs->intra_inter_cost[intra_inter_ctx][0];
755 unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
756
757 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
758 ref_costs_single[i] = base_cost;
759
760 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
761 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
762 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
763 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
764 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
765 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
766
767 // Determine cost of a single ref frame, where frame types are represented
768 // by a tree:
769 // Level 0: add cost whether this ref is a forward or backward ref
770 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
773 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
774 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
775 ref_costs_single[ALTREF2_FRAME] +=
776 mode_costs->single_ref_cost[ctx_p1][0][1];
777 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
778
779 // Level 1: if this ref is forward ref,
780 // add cost whether it is last/last2 or last3/golden
781 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
782 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
783 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
784 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
785
786 // Level 1: if this ref is backward ref
787 // then add cost whether this ref is altref or backward ref
788 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
789 ref_costs_single[ALTREF2_FRAME] +=
790 mode_costs->single_ref_cost[ctx_p2][1][0];
791 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
792
793 // Level 2: further add cost whether this ref is last or last2
794 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
795 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
796
797 // Level 2: last3 or golden
798 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
799 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
800
801 // Level 2: bwdref or altref2
802 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
803 ref_costs_single[ALTREF2_FRAME] +=
804 mode_costs->single_ref_cost[ctx_p6][5][1];
805
806 if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
807 // Similar to single ref, determine cost of compound ref frames.
808 // cost_compound_refs = cost_first_ref + cost_second_ref
809 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
810 const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
811 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
812 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
813 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
814
815 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
816 unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
817
818 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
819 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
820 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
821 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
822 ref_bicomp_costs[ALTREF_FRAME] = 0;
823
824 // cost of first ref frame
825 ref_bicomp_costs[LAST_FRAME] +=
826 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827 ref_bicomp_costs[LAST2_FRAME] +=
828 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
829 ref_bicomp_costs[LAST3_FRAME] +=
830 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831 ref_bicomp_costs[GOLDEN_FRAME] +=
832 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
833
834 ref_bicomp_costs[LAST_FRAME] +=
835 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
836 ref_bicomp_costs[LAST2_FRAME] +=
837 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
838
839 ref_bicomp_costs[LAST3_FRAME] +=
840 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
841 ref_bicomp_costs[GOLDEN_FRAME] +=
842 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
843
844 // cost of second ref frame
845 ref_bicomp_costs[BWDREF_FRAME] +=
846 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847 ref_bicomp_costs[ALTREF2_FRAME] +=
848 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
849 ref_bicomp_costs[ALTREF_FRAME] +=
850 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
851
852 ref_bicomp_costs[BWDREF_FRAME] +=
853 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
854 ref_bicomp_costs[ALTREF2_FRAME] +=
855 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
856
857 // cost: if one ref frame is forward ref, the other ref is backward ref
858 int ref0, ref1;
859 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
860 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
861 ref_costs_comp[ref0][ref1] =
862 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
863 }
864 }
865
866 // cost: if both ref frames are the same side.
867 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
868 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
869 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
870 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
871 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
872 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
873 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
874 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
875 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
876 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
877 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
878 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
879 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
880 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
881 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
882 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
883 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
884 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
885 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
886 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
887 } else {
888 int ref0, ref1;
889 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
890 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
891 ref_costs_comp[ref0][ref1] = 512;
892 }
893 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
894 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
895 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
896 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
897 }
898 }
899 }
900
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int skippable)901 static AOM_INLINE void store_coding_context(
902 #if CONFIG_INTERNAL_STATS
903 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
904 #else
905 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
906 #endif // CONFIG_INTERNAL_STATS
907 int skippable) {
908 MACROBLOCKD *const xd = &x->e_mbd;
909
910 // Take a snapshot of the coding context so it can be
911 // restored if we decide to encode this way
912 ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
913 ctx->skippable = skippable;
914 #if CONFIG_INTERNAL_STATS
915 ctx->best_mode_index = mode_index;
916 #endif // CONFIG_INTERNAL_STATS
917 ctx->mic = *xd->mi[0];
918 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
919 av1_ref_frame_type(xd->mi[0]->ref_frame));
920 }
921
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])922 static AOM_INLINE void setup_buffer_ref_mvs_inter(
923 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
924 BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
925 const AV1_COMMON *cm = &cpi->common;
926 const int num_planes = av1_num_planes(cm);
927 const YV12_BUFFER_CONFIG *scaled_ref_frame =
928 av1_get_scaled_ref_frame(cpi, ref_frame);
929 MACROBLOCKD *const xd = &x->e_mbd;
930 MB_MODE_INFO *const mbmi = xd->mi[0];
931 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
932 const struct scale_factors *const sf =
933 get_ref_scale_factors_const(cm, ref_frame);
934 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
935 assert(yv12 != NULL);
936
937 if (scaled_ref_frame) {
938 // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
939 // support scaling.
940 av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
941 num_planes);
942 } else {
943 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
944 }
945
946 // Gets an initial list of candidate vectors from neighbours and orders them
947 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
948 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
949 mbmi_ext->mode_context);
950 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
951 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
952 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
953 // Further refinement that is encode side only to test the top few candidates
954 // in full and choose the best as the center point for subsequent searches.
955 // The current implementation doesn't support scaling.
956 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
957 ref_frame, block_size);
958
959 // Go back to unscaled reference.
960 if (scaled_ref_frame) {
961 // We had temporarily setup pred block based on scaled reference above. Go
962 // back to unscaled reference now, for subsequent use.
963 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
964 }
965 }
966
967 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
968 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
969
970 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)971 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
972 const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
973 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
974 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
975 xd->mb_to_bottom_edge +
976 RIGHT_BOTTOM_MARGIN };
977 clamp_mv(mv, &mv_limits);
978 }
979
980 /* If the current mode shares the same mv with other modes with higher cost,
981 * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)982 static int skip_repeated_mv(const AV1_COMMON *const cm,
983 const MACROBLOCK *const x,
984 PREDICTION_MODE this_mode,
985 const MV_REFERENCE_FRAME ref_frames[2],
986 InterModeSearchState *search_state) {
987 const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
988 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
989 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
990 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
991 PREDICTION_MODE compare_mode = MB_MODE_COUNT;
992 if (!is_comp_pred) {
993 if (this_mode == NEARMV) {
994 if (ref_mv_count == 0) {
995 // NEARMV has the same motion vector as NEARESTMV
996 compare_mode = NEARESTMV;
997 }
998 if (ref_mv_count == 1 &&
999 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1000 // NEARMV has the same motion vector as GLOBALMV
1001 compare_mode = GLOBALMV;
1002 }
1003 }
1004 if (this_mode == GLOBALMV) {
1005 if (ref_mv_count == 0 &&
1006 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1007 // GLOBALMV has the same motion vector as NEARESTMV
1008 compare_mode = NEARESTMV;
1009 }
1010 if (ref_mv_count == 1) {
1011 // GLOBALMV has the same motion vector as NEARMV
1012 compare_mode = NEARMV;
1013 }
1014 }
1015
1016 if (compare_mode != MB_MODE_COUNT) {
1017 // Use modelled_rd to check whether compare mode was searched
1018 if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1019 INT64_MAX) {
1020 const int16_t mode_ctx =
1021 av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1022 const int compare_cost =
1023 cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1024 const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1025
1026 // Only skip if the mode cost is larger than compare mode cost
1027 if (this_cost > compare_cost) {
1028 search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1029 search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1030 return 1;
1031 }
1032 }
1033 }
1034 }
1035 return 0;
1036 }
1037
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1038 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1039 const AV1_COMMON *cm,
1040 const MACROBLOCK *x) {
1041 const MACROBLOCKD *const xd = &x->e_mbd;
1042 *out_mv = in_mv;
1043 lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1044 cm->features.cur_frame_force_integer_mv);
1045 clamp_mv2(&out_mv->as_mv, xd);
1046 return av1_is_fullmv_in_range(&x->mv_limits,
1047 get_fullmv_from_mv(&out_mv->as_mv));
1048 }
1049
1050 // To use single newmv directly for compound modes, need to clamp the mv to the
1051 // valid mv range. Without this, encoder would generate out of range mv, and
1052 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1053 static INLINE void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1054 int ref_idx) {
1055 const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1056 SubpelMvLimits mv_limits;
1057
1058 av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1059 clamp_mv(&mv->as_mv, &mv_limits);
1060 }
1061
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1062 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1063 const BLOCK_SIZE bsize, int_mv *cur_mv,
1064 int *const rate_mv, HandleInterModeArgs *const args,
1065 inter_mode_info *mode_info) {
1066 MACROBLOCKD *const xd = &x->e_mbd;
1067 MB_MODE_INFO *const mbmi = xd->mi[0];
1068 const int is_comp_pred = has_second_ref(mbmi);
1069 const PREDICTION_MODE this_mode = mbmi->mode;
1070 const int refs[2] = { mbmi->ref_frame[0],
1071 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1072 const int ref_mv_idx = mbmi->ref_mv_idx;
1073
1074 if (is_comp_pred) {
1075 const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1076 const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1077 if (this_mode == NEW_NEWMV) {
1078 if (valid_mv0) {
1079 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1080 clamp_mv_in_range(x, &cur_mv[0], 0);
1081 }
1082 if (valid_mv1) {
1083 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1084 clamp_mv_in_range(x, &cur_mv[1], 1);
1085 }
1086 *rate_mv = 0;
1087 for (int i = 0; i < 2; ++i) {
1088 const int_mv ref_mv = av1_get_ref_mv(x, i);
1089 *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1090 x->mv_costs->nmv_joint_cost,
1091 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1092 }
1093 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1094 if (valid_mv1) {
1095 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1096 clamp_mv_in_range(x, &cur_mv[1], 1);
1097 }
1098 const int_mv ref_mv = av1_get_ref_mv(x, 1);
1099 *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1100 x->mv_costs->nmv_joint_cost,
1101 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1102 } else {
1103 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1104 if (valid_mv0) {
1105 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1106 clamp_mv_in_range(x, &cur_mv[0], 0);
1107 }
1108 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1109 *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1110 x->mv_costs->nmv_joint_cost,
1111 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1112 }
1113 } else {
1114 // Single ref case.
1115 const int ref_idx = 0;
1116 int search_range = INT_MAX;
1117
1118 if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1119 const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1120 int min_mv_diff = INT_MAX;
1121 int best_match = -1;
1122 MV prev_ref_mv[2] = { { 0 } };
1123 for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1124 prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1125 idx, &x->mbmi_ext)
1126 .as_mv;
1127 const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1128 abs(ref_mv.col - prev_ref_mv[idx].col));
1129
1130 if (min_mv_diff > ref_mv_diff) {
1131 min_mv_diff = ref_mv_diff;
1132 best_match = idx;
1133 }
1134 }
1135
1136 if (min_mv_diff < (16 << 3)) {
1137 if (args->single_newmv_valid[best_match][refs[0]]) {
1138 search_range = min_mv_diff;
1139 search_range +=
1140 AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1141 prev_ref_mv[best_match].row),
1142 abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1143 prev_ref_mv[best_match].col));
1144 // Get full pixel search range.
1145 search_range = (search_range + 4) >> 3;
1146 }
1147 }
1148 }
1149
1150 int_mv best_mv;
1151 av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1152 mode_info, &best_mv, args);
1153 if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1154
1155 args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1156 args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1157 args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1158 cur_mv[0].as_int = best_mv.as_int;
1159
1160 // Return after single_newmv is set.
1161 if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1162 }
1163
1164 return 0;
1165 }
1166
update_mode_start_end_index(const AV1_COMP * const cpi,const MB_MODE_INFO * const mbmi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1167 static INLINE void update_mode_start_end_index(
1168 const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1169 int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1170 int interintra_allowed, int eval_motion_mode) {
1171 *mode_index_start = (int)SIMPLE_TRANSLATION;
1172 *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1173 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1174 if (!eval_motion_mode) {
1175 *mode_index_end = (int)SIMPLE_TRANSLATION;
1176 } else {
1177 // Set the start index appropriately to process motion modes other than
1178 // simple translation
1179 *mode_index_start = 1;
1180 }
1181 }
1182 if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1183 *mode_index_end = SIMPLE_TRANSLATION;
1184 }
1185
1186 /*!\brief AV1 motion mode search
1187 *
1188 * \ingroup inter_mode_search
1189 * Function to search over and determine the motion mode. It will update
1190 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1191 * WARPED_CAUSAL and determine any necessary side information for the selected
1192 * motion mode. It will also perform the full transform search, unless the
1193 * input parameter do_tx_search indicates to do an estimation of the RD rather
1194 * than an RD corresponding to a full transform search. It will return the
1195 * RD for the final motion_mode.
1196 * Do the RD search for a given inter mode and compute all information relevant
1197 * to the input mode. It will compute the best MV,
1198 * compound parameters (if the mode is a compound mode) and interpolation filter
1199 * parameters.
1200 *
1201 * \param[in] cpi Top-level encoder structure.
1202 * \param[in] tile_data Pointer to struct holding adaptive
1203 * data/contexts/models for the tile during
1204 * encoding.
1205 * \param[in] x Pointer to struct holding all the data for
1206 * the current macroblock.
1207 * \param[in] bsize Current block size.
1208 * \param[in,out] rd_stats Struct to keep track of the overall RD
1209 * information.
1210 * \param[in,out] rd_stats_y Struct to keep track of the RD information
1211 * for only the Y plane.
1212 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
1213 * for only the UV planes.
1214 * \param[in] args HandleInterModeArgs struct holding
1215 * miscellaneous arguments for inter mode
1216 * search. See the documentation for this
1217 * struct for a description of each member.
1218 * \param[in] ref_best_rd Best RD found so far for this block.
1219 * It is used for early termination of this
1220 * search if the RD exceeds this value.
1221 * \param[in,out] ref_skip_rd A length 2 array, where skip_rd[0] is the
1222 * best total RD for a skip mode so far, and
1223 * skip_rd[1] is the best RD for a skip mode so
1224 * far in luma. This is used as a speed feature
1225 * to skip the transform search if the computed
1226 * skip RD for the current mode is not better
1227 * than the best skip_rd so far.
1228 * \param[in,out] rate_mv The rate associated with the motion vectors.
1229 * This will be modified if a motion search is
1230 * done in the motion mode search.
1231 * \param[in,out] orig_dst A prediction buffer to hold a computed
1232 * prediction. This will eventually hold the
1233 * final prediction, and the tmp_dst info will
1234 * be copied here.
1235 * \param[in,out] best_est_rd Estimated RD for motion mode search if
1236 * do_tx_search (see below) is 0.
1237 * \param[in] do_tx_search Parameter to indicate whether or not to do
1238 * a full transform search. This will compute
1239 * an estimated RD for the modes without the
1240 * transform search and later perform the full
1241 * transform search on the best candidates.
1242 * \param[in] inter_modes_info InterModesInfo struct to hold inter mode
1243 * information to perform a full transform
1244 * search only on winning candidates searched
1245 * with an estimate for transform coding RD.
1246 * \param[in] eval_motion_mode Boolean whether or not to evaluate motion
1247 * motion modes other than SIMPLE_TRANSLATION.
1248 * \param[out] yrd Stores the rdcost corresponding to encoding
1249 * the luma plane.
1250 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1251 * current motion mode being tested should be skipped. It returns 0 if the
1252 * motion mode search is a success.
1253 */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1254 static int64_t motion_mode_rd(
1255 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1256 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1257 RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1258 int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1259 int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1260 int eval_motion_mode, int64_t *yrd) {
1261 const AV1_COMMON *const cm = &cpi->common;
1262 const FeatureFlags *const features = &cm->features;
1263 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1264 const int num_planes = av1_num_planes(cm);
1265 MACROBLOCKD *xd = &x->e_mbd;
1266 MB_MODE_INFO *mbmi = xd->mi[0];
1267 const int is_comp_pred = has_second_ref(mbmi);
1268 const PREDICTION_MODE this_mode = mbmi->mode;
1269 const int rate2_nocoeff = rd_stats->rate;
1270 int best_xskip_txfm = 0;
1271 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1272 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1273 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1274 const int rate_mv0 = *rate_mv;
1275 const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1276 is_interintra_allowed(mbmi) &&
1277 mbmi->compound_idx;
1278 WARP_SAMPLE_INFO *const warp_sample_info =
1279 &x->warp_sample_info[mbmi->ref_frame[0]];
1280 int *pts0 = warp_sample_info->pts;
1281 int *pts_inref0 = warp_sample_info->pts_inref;
1282
1283 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1284 const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1285 av1_invalid_rd_stats(&best_rd_stats);
1286 mbmi->num_proj_ref = 1; // assume num_proj_ref >=1
1287 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1288 *yrd = INT64_MAX;
1289 if (features->switchable_motion_mode) {
1290 // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1291 // is allowed.
1292 last_motion_mode_allowed = motion_mode_allowed(
1293 xd->global_motion, xd, mbmi, features->allow_warped_motion);
1294 }
1295
1296 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1297 // Collect projection samples used in least squares approximation of
1298 // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1299 if (warp_sample_info->num < 0) {
1300 warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1301 }
1302 mbmi->num_proj_ref = warp_sample_info->num;
1303 }
1304 const int total_samples = mbmi->num_proj_ref;
1305 if (total_samples == 0) {
1306 // Do not search WARPED_CAUSAL if there are no samples to use to determine
1307 // warped parameters.
1308 last_motion_mode_allowed = OBMC_CAUSAL;
1309 }
1310
1311 const MB_MODE_INFO base_mbmi = *mbmi;
1312 MB_MODE_INFO best_mbmi;
1313 const int interp_filter = features->interp_filter;
1314 const int switchable_rate =
1315 av1_is_interp_needed(xd)
1316 ? av1_get_switchable_rate(x, xd, interp_filter,
1317 cm->seq_params->enable_dual_filter)
1318 : 0;
1319 int64_t best_rd = INT64_MAX;
1320 int best_rate_mv = rate_mv0;
1321 const int mi_row = xd->mi_row;
1322 const int mi_col = xd->mi_col;
1323 int mode_index_start, mode_index_end;
1324 const int txfm_rd_gate_level =
1325 get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1326 cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1327 TX_SEARCH_MOTION_MODE, eval_motion_mode);
1328
1329 // Modify the start and end index according to speed features. For example,
1330 // if SIMPLE_TRANSLATION has already been searched according to
1331 // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1332 // to avoid searching it again.
1333 update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1334 last_motion_mode_allowed, interintra_allowed,
1335 eval_motion_mode);
1336 // Main function loop. This loops over all of the possible motion modes and
1337 // computes RD to determine the best one. This process includes computing
1338 // any necessary side information for the motion mode and performing the
1339 // transform search.
1340 for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1341 mode_index++) {
1342 if (args->skip_motion_mode && mode_index) continue;
1343 int tmp_rate2 = rate2_nocoeff;
1344 const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1345 int tmp_rate_mv = rate_mv0;
1346
1347 *mbmi = base_mbmi;
1348 if (is_interintra_mode) {
1349 // Only use SIMPLE_TRANSLATION for interintra
1350 mbmi->motion_mode = SIMPLE_TRANSLATION;
1351 } else {
1352 mbmi->motion_mode = (MOTION_MODE)mode_index;
1353 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1354 }
1355
1356 // Do not search OBMC if the probability of selecting it is below a
1357 // predetermined threshold for this update_type and block size.
1358 const FRAME_UPDATE_TYPE update_type =
1359 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1360 int use_actual_frame_probs = 1;
1361 int prune_obmc;
1362 #if CONFIG_FPMT_TEST
1363 use_actual_frame_probs =
1364 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1365 if (!use_actual_frame_probs) {
1366 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1367 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1368 }
1369 #endif
1370 if (use_actual_frame_probs) {
1371 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1372 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1373 }
1374 if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1375 mbmi->motion_mode == OBMC_CAUSAL)
1376 continue;
1377
1378 if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1379 // SIMPLE_TRANSLATION mode: no need to recalculate.
1380 // The prediction is calculated before motion_mode_rd() is called in
1381 // handle_inter_mode()
1382 } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1383 const uint32_t cur_mv = mbmi->mv[0].as_int;
1384 // OBMC_CAUSAL not allowed for compound prediction
1385 assert(!is_comp_pred);
1386 if (have_newmv_in_inter_mode(this_mode)) {
1387 av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1388 &mbmi->mv[0], NULL);
1389 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1390 }
1391 if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1392 // Build the predictor according to the current motion vector if it has
1393 // not already been built
1394 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1395 0, av1_num_planes(cm) - 1);
1396 }
1397 // Build the inter predictor by blending the predictor corresponding to
1398 // this MV, and the neighboring blocks using the OBMC model
1399 av1_build_obmc_inter_prediction(
1400 cm, xd, args->above_pred_buf, args->above_pred_stride,
1401 args->left_pred_buf, args->left_pred_stride);
1402 #if !CONFIG_REALTIME_ONLY
1403 } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1404 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1405 mbmi->motion_mode = WARPED_CAUSAL;
1406 mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1407 mbmi->interp_filters =
1408 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1409
1410 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1411 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1412 // Select the samples according to motion vector difference
1413 if (mbmi->num_proj_ref > 1) {
1414 mbmi->num_proj_ref = av1_selectSamples(
1415 &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1416 }
1417
1418 // Compute the warped motion parameters with a least squares fit
1419 // using the collected samples
1420 if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1421 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1422 &mbmi->wm_params, mi_row, mi_col)) {
1423 assert(!is_comp_pred);
1424 if (have_newmv_in_inter_mode(this_mode)) {
1425 // Refine MV for NEWMV mode
1426 const int_mv mv0 = mbmi->mv[0];
1427 const WarpedMotionParams wm_params0 = mbmi->wm_params;
1428 const int num_proj_ref0 = mbmi->num_proj_ref;
1429
1430 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1431 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1432 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1433 &ref_mv.as_mv, NULL);
1434
1435 // Refine MV in a small range.
1436 av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1437 total_samples, cpi->sf.mv_sf.warp_search_method,
1438 cpi->sf.mv_sf.warp_search_iters);
1439
1440 if (mv0.as_int != mbmi->mv[0].as_int) {
1441 // Keep the refined MV and WM parameters.
1442 tmp_rate_mv = av1_mv_bit_cost(
1443 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1444 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1445 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1446 } else {
1447 // Restore the old MV and WM parameters.
1448 mbmi->mv[0] = mv0;
1449 mbmi->wm_params = wm_params0;
1450 mbmi->num_proj_ref = num_proj_ref0;
1451 }
1452 }
1453
1454 // Build the warped predictor
1455 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1456 av1_num_planes(cm) - 1);
1457 } else {
1458 continue;
1459 }
1460 #endif // !CONFIG_REALTIME_ONLY
1461 } else if (is_interintra_mode) {
1462 const int ret =
1463 av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1464 &tmp_rate_mv, &tmp_rate2, orig_dst);
1465 if (ret < 0) continue;
1466 }
1467
1468 // If we are searching newmv and the mv is the same as refmv, skip the
1469 // current mode
1470 if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1471
1472 // Update rd_stats for the current motion mode
1473 txfm_info->skip_txfm = 0;
1474 rd_stats->dist = 0;
1475 rd_stats->sse = 0;
1476 rd_stats->skip_txfm = 1;
1477 rd_stats->rate = tmp_rate2;
1478 const ModeCosts *mode_costs = &x->mode_costs;
1479 if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1480 if (interintra_allowed) {
1481 rd_stats->rate +=
1482 mode_costs->interintra_cost[size_group_lookup[bsize]]
1483 [mbmi->ref_frame[1] == INTRA_FRAME];
1484 }
1485 if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1486 (mbmi->ref_frame[1] != INTRA_FRAME)) {
1487 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1488 rd_stats->rate +=
1489 mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1490 } else {
1491 rd_stats->rate +=
1492 mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1493 }
1494 }
1495
1496 int64_t this_yrd = INT64_MAX;
1497
1498 if (!do_tx_search) {
1499 // Avoid doing a transform search here to speed up the overall mode
1500 // search. It will be done later in the mode search if the current
1501 // motion mode seems promising.
1502 int64_t curr_sse = -1;
1503 int64_t sse_y = -1;
1504 int est_residue_cost = 0;
1505 int64_t est_dist = 0;
1506 int64_t est_rd = 0;
1507 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1508 curr_sse = get_sse(cpi, x, &sse_y);
1509 const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1510 &est_residue_cost, &est_dist);
1511 (void)has_est_rd;
1512 assert(has_est_rd);
1513 } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1514 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1515 model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1516 cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1517 NULL, &curr_sse, NULL, NULL, NULL);
1518 sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1519 }
1520 est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1521 if (est_rd * 0.80 > *best_est_rd) {
1522 mbmi->ref_frame[1] = ref_frame_1;
1523 continue;
1524 }
1525 const int mode_rate = rd_stats->rate;
1526 rd_stats->rate += est_residue_cost;
1527 rd_stats->dist = est_dist;
1528 rd_stats->rdcost = est_rd;
1529 if (rd_stats->rdcost < *best_est_rd) {
1530 *best_est_rd = rd_stats->rdcost;
1531 assert(sse_y >= 0);
1532 ref_skip_rd[1] = txfm_rd_gate_level
1533 ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1534 : INT64_MAX;
1535 }
1536 if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1537 if (!is_comp_pred) {
1538 assert(curr_sse >= 0);
1539 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1540 rd_stats->rdcost, rd_stats, rd_stats_y,
1541 rd_stats_uv, mbmi);
1542 }
1543 } else {
1544 assert(curr_sse >= 0);
1545 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1546 rd_stats->rdcost, rd_stats, rd_stats_y,
1547 rd_stats_uv, mbmi);
1548 }
1549 mbmi->skip_txfm = 0;
1550 } else {
1551 // Perform full transform search
1552 int64_t skip_rd = INT64_MAX;
1553 int64_t skip_rdy = INT64_MAX;
1554 if (txfm_rd_gate_level) {
1555 // Check if the mode is good enough based on skip RD
1556 int64_t sse_y = INT64_MAX;
1557 int64_t curr_sse = get_sse(cpi, x, &sse_y);
1558 skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1559 skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1560 int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1561 txfm_rd_gate_level, 0);
1562 if (!eval_txfm) continue;
1563 }
1564
1565 // Do transform search
1566 const int mode_rate = rd_stats->rate;
1567 if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1568 rd_stats->rate, ref_best_rd)) {
1569 if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1570 return INT64_MAX;
1571 }
1572 continue;
1573 }
1574 const int skip_ctx = av1_get_skip_txfm_context(xd);
1575 const int y_rate =
1576 rd_stats->skip_txfm
1577 ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1578 : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1579 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1580
1581 const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1582 if (curr_rd < ref_best_rd) {
1583 ref_best_rd = curr_rd;
1584 ref_skip_rd[0] = skip_rd;
1585 ref_skip_rd[1] = skip_rdy;
1586 }
1587 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1588 inter_mode_data_push(
1589 tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1590 rd_stats_y->rate + rd_stats_uv->rate +
1591 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1592 }
1593 }
1594
1595 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1596 if (is_nontrans_global_motion(xd, xd->mi[0])) {
1597 mbmi->interp_filters =
1598 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1599 }
1600 }
1601
1602 const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1603 if (mode_index == 0) {
1604 args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1605 }
1606 if (mode_index == 0 || tmp_rd < best_rd) {
1607 // Update best_rd data if this is the best motion mode so far
1608 best_mbmi = *mbmi;
1609 best_rd = tmp_rd;
1610 best_rd_stats = *rd_stats;
1611 best_rd_stats_y = *rd_stats_y;
1612 best_rate_mv = tmp_rate_mv;
1613 *yrd = this_yrd;
1614 if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1615 memcpy(best_blk_skip, txfm_info->blk_skip,
1616 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1617 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1618 best_xskip_txfm = mbmi->skip_txfm;
1619 }
1620 }
1621 // Update RD and mbmi stats for selected motion mode
1622 mbmi->ref_frame[1] = ref_frame_1;
1623 *rate_mv = best_rate_mv;
1624 if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1625 av1_invalid_rd_stats(rd_stats);
1626 restore_dst_buf(xd, *orig_dst, num_planes);
1627 return INT64_MAX;
1628 }
1629 *mbmi = best_mbmi;
1630 *rd_stats = best_rd_stats;
1631 *rd_stats_y = best_rd_stats_y;
1632 if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1633 memcpy(txfm_info->blk_skip, best_blk_skip,
1634 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1635 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1636 txfm_info->skip_txfm = best_xskip_txfm;
1637
1638 restore_dst_buf(xd, *orig_dst, num_planes);
1639 return 0;
1640 }
1641
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t best_rd)1642 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1643 MACROBLOCK *const x, BLOCK_SIZE bsize,
1644 const BUFFER_SET *const orig_dst, int64_t best_rd) {
1645 assert(bsize < BLOCK_SIZES_ALL);
1646 const AV1_COMMON *cm = &cpi->common;
1647 const int num_planes = av1_num_planes(cm);
1648 MACROBLOCKD *const xd = &x->e_mbd;
1649 const int mi_row = xd->mi_row;
1650 const int mi_col = xd->mi_col;
1651 int64_t total_sse = 0;
1652 int64_t this_rd = INT64_MAX;
1653 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1654 rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1655
1656 for (int plane = 0; plane < num_planes; ++plane) {
1657 // Call av1_enc_build_inter_predictor() for one plane at a time.
1658 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1659 plane, plane);
1660 const struct macroblockd_plane *const pd = &xd->plane[plane];
1661 const BLOCK_SIZE plane_bsize =
1662 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1663
1664 av1_subtract_plane(x, plane_bsize, plane);
1665
1666 int64_t sse =
1667 av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1668 if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1669 sse <<= 4;
1670 total_sse += sse;
1671 // When current rd cost is more than the best rd, skip evaluation of
1672 // remaining planes.
1673 this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1674 if (this_rd > best_rd) break;
1675 }
1676
1677 rd_stats->dist = rd_stats->sse = total_sse;
1678 rd_stats->rdcost = this_rd;
1679
1680 restore_dst_buf(xd, *orig_dst, num_planes);
1681 return 0;
1682 }
1683
1684 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1685 // mode
1686 // Note(rachelbarker): This speed feature currently does not interact correctly
1687 // with global motion. The issue is that, when global motion is used, GLOBALMV
1688 // produces a different prediction to NEARESTMV/NEARMV even if the motion
1689 // vectors are the same. Thus GLOBALMV should not be pruned in this case.
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1690 static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1691 int ref_idx,
1692 const MV_REFERENCE_FRAME *ref_frame,
1693 PREDICTION_MODE single_mode) {
1694 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1695 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1696 assert(single_mode != NEWMV);
1697 if (single_mode == NEARESTMV) {
1698 return 0;
1699 } else if (single_mode == NEARMV) {
1700 // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1701 // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1702 if (ref_mv_count < 2) return 1;
1703 } else if (single_mode == GLOBALMV) {
1704 // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1705 if (ref_mv_count == 0) return 1;
1706 // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1707 else if (ref_mv_count == 1)
1708 return 0;
1709
1710 int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1711 // Check GLOBALMV is matching with any mv in ref_mv_stack
1712 for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1713 int_mv this_mv;
1714
1715 if (ref_idx == 0)
1716 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1717 else
1718 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1719
1720 if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1721 return 1;
1722 }
1723 }
1724 return 0;
1725 }
1726
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1727 static INLINE int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1728 int ref_idx, int ref_mv_idx,
1729 int skip_repeated_ref_mv,
1730 const MV_REFERENCE_FRAME *ref_frame,
1731 const MB_MODE_INFO_EXT *mbmi_ext) {
1732 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1733 assert(is_inter_singleref_mode(single_mode));
1734 if (single_mode == NEWMV) {
1735 this_mv->as_int = INVALID_MV;
1736 } else if (single_mode == GLOBALMV) {
1737 if (skip_repeated_ref_mv &&
1738 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1739 return 0;
1740 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1741 } else {
1742 assert(single_mode == NEARMV || single_mode == NEARESTMV);
1743 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1744 const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1745 if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1746 assert(ref_mv_offset >= 0);
1747 if (ref_idx == 0) {
1748 *this_mv =
1749 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1750 } else {
1751 *this_mv =
1752 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1753 }
1754 } else {
1755 if (skip_repeated_ref_mv &&
1756 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1757 return 0;
1758 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1759 }
1760 }
1761 return 1;
1762 }
1763
1764 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1765 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type,PREDICTION_MODE best_mode)1766 static INLINE int skip_nearest_near_mv_using_refmv_weight(
1767 const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1768 const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
1769 if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1770 // Do not skip the mode if the current block has not yet obtained a valid
1771 // inter mode.
1772 if (!is_inter_mode(best_mode)) return 0;
1773
1774 const MACROBLOCKD *xd = &x->e_mbd;
1775 // Do not skip the mode if both the top and left neighboring blocks are not
1776 // available.
1777 if (!xd->left_available || !xd->up_available) return 0;
1778 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1779 const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1780 const int ref_mv_count =
1781 AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1782
1783 if (ref_mv_count == 0) return 0;
1784 // If ref mv list has at least one nearest candidate do not prune NEARESTMV
1785 if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1786
1787 // Count number of ref mvs populated from nearest candidates
1788 int nearest_refmv_count = 0;
1789 for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1790 if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1791 }
1792
1793 // nearest_refmv_count indicates the closeness of block motion characteristics
1794 // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1795 // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1796 // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1797 // mode since these modes work well for blocks that shares similar motion
1798 // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1799 // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1800 // mode is pruned if none of the ref mvs are populated from nearest candidate.
1801 const int prune_thresh = 1 + (ref_mv_count >= 2);
1802 if (nearest_refmv_count < prune_thresh) return 1;
1803 return 0;
1804 }
1805
1806 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1807 static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1808 const AV1_COMMON *cm, const MACROBLOCK *x,
1809 int skip_repeated_ref_mv) {
1810 const MACROBLOCKD *xd = &x->e_mbd;
1811 const MB_MODE_INFO *mbmi = xd->mi[0];
1812 const int is_comp_pred = has_second_ref(mbmi);
1813
1814 int ret = 1;
1815 for (int i = 0; i < is_comp_pred + 1; ++i) {
1816 int_mv this_mv;
1817 this_mv.as_int = INVALID_MV;
1818 ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1819 skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1820 if (!ret) return 0;
1821 const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1822 if (single_mode == NEWMV) {
1823 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1824 cur_mv[i] =
1825 (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1826 .this_mv
1827 : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1828 .comp_mv;
1829 } else {
1830 ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1831 }
1832 }
1833 return ret;
1834 }
1835
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1836 static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
1837 const MB_MODE_INFO_EXT *mbmi_ext,
1838 const int (*const drl_mode_cost0)[2],
1839 int8_t ref_frame_type) {
1840 int cost = 0;
1841 if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1842 for (int idx = 0; idx < 2; ++idx) {
1843 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1844 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1845 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1846 if (mbmi->ref_mv_idx == idx) return cost;
1847 }
1848 }
1849 return cost;
1850 }
1851
1852 if (have_nearmv_in_inter_mode(mbmi->mode)) {
1853 for (int idx = 1; idx < 3; ++idx) {
1854 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1855 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1856 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1857 if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1858 }
1859 }
1860 return cost;
1861 }
1862 return cost;
1863 }
1864
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1865 static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
1866 const MB_MODE_INFO *const mbmi,
1867 PREDICTION_MODE this_mode) {
1868 for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1869 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1870 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1871 if (single_mode == NEWMV &&
1872 args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1873 return 0;
1874 }
1875 }
1876 return 1;
1877 }
1878
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1879 static int get_drl_refmv_count(const MACROBLOCK *const x,
1880 const MV_REFERENCE_FRAME *ref_frame,
1881 PREDICTION_MODE mode) {
1882 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1883 const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1884 const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1885 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1886 const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1887 const int has_drl =
1888 (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1889 const int ref_set =
1890 has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1891
1892 return ref_set;
1893 }
1894
1895 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1896 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1897 const int qindex,
1898 const int ref_mv_idx) {
1899 if (reduce_inter_modes >= 3) return 1;
1900 // Q-index logic based pruning is enabled only for
1901 // reduce_inter_modes = 2.
1902 assert(reduce_inter_modes == 2);
1903 // When reduce_inter_modes=2, pruning happens as below based on q index.
1904 // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1905 // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1906 // For q index range between 171 and 255: no pruning.
1907 const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1908 return (ref_mv_idx >= min_prune_ref_mv_idx);
1909 }
1910
1911 // Whether this reference motion vector can be skipped, based on initial
1912 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1913 static bool ref_mv_idx_early_breakout(
1914 const SPEED_FEATURES *const sf,
1915 const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1916 const HandleInterModeArgs *const args, int64_t ref_best_rd,
1917 int ref_mv_idx) {
1918 MACROBLOCKD *xd = &x->e_mbd;
1919 MB_MODE_INFO *mbmi = xd->mi[0];
1920 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1921 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1922 const int is_comp_pred = has_second_ref(mbmi);
1923 if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1924 if (mbmi->ref_frame[0] == LAST2_FRAME ||
1925 mbmi->ref_frame[0] == LAST3_FRAME ||
1926 mbmi->ref_frame[1] == LAST2_FRAME ||
1927 mbmi->ref_frame[1] == LAST3_FRAME) {
1928 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1929 if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1930 REF_CAT_LEVEL) {
1931 return true;
1932 }
1933 }
1934 // TODO(any): Experiment with reduce_inter_modes for compound prediction
1935 if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1936 have_newmv_in_inter_mode(mbmi->mode)) {
1937 if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1938 mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1939 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1940 const int do_prune = prune_ref_mv_idx_using_qindex(
1941 sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1942 if (do_prune &&
1943 (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1944 REF_CAT_LEVEL)) {
1945 return true;
1946 }
1947 }
1948 }
1949 }
1950
1951 mbmi->ref_mv_idx = ref_mv_idx;
1952 if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1953 return true;
1954 }
1955 size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1956 const int drl_cost = get_drl_cost(
1957 mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1958 est_rd_rate += drl_cost;
1959 if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1960 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1961 return true;
1962 }
1963 return false;
1964 }
1965
1966 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1967 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1968 RD_STATS *rd_stats,
1969 HandleInterModeArgs *args,
1970 int ref_mv_idx, int64_t ref_best_rd,
1971 BLOCK_SIZE bsize) {
1972 MACROBLOCKD *xd = &x->e_mbd;
1973 MB_MODE_INFO *mbmi = xd->mi[0];
1974 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1975 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1976 const AV1_COMMON *cm = &cpi->common;
1977 const int is_comp_pred = has_second_ref(mbmi);
1978 const ModeCosts *mode_costs = &x->mode_costs;
1979
1980 struct macroblockd_plane *p = xd->plane;
1981 const BUFFER_SET orig_dst = {
1982 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1983 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1984 };
1985 av1_init_rd_stats(rd_stats);
1986
1987 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1988 mbmi->comp_group_idx = 0;
1989 mbmi->compound_idx = 1;
1990 if (mbmi->ref_frame[1] == INTRA_FRAME) {
1991 mbmi->ref_frame[1] = NONE_FRAME;
1992 }
1993 int16_t mode_ctx =
1994 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1995
1996 mbmi->num_proj_ref = 0;
1997 mbmi->motion_mode = SIMPLE_TRANSLATION;
1998 mbmi->ref_mv_idx = ref_mv_idx;
1999
2000 rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
2001 const int drl_cost =
2002 get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2003 rd_stats->rate += drl_cost;
2004
2005 int_mv cur_mv[2];
2006 if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2007 return INT64_MAX;
2008 }
2009 assert(have_nearmv_in_inter_mode(mbmi->mode));
2010 for (int i = 0; i < is_comp_pred + 1; ++i) {
2011 mbmi->mv[i].as_int = cur_mv[i].as_int;
2012 }
2013 const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2014 rd_stats->rate += ref_mv_cost;
2015
2016 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
2017 return INT64_MAX;
2018 }
2019
2020 mbmi->motion_mode = SIMPLE_TRANSLATION;
2021 mbmi->num_proj_ref = 0;
2022 if (is_comp_pred) {
2023 // Only compound_average
2024 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2025 mbmi->comp_group_idx = 0;
2026 mbmi->compound_idx = 1;
2027 }
2028 set_default_interp_filters(mbmi, cm->features.interp_filter);
2029
2030 const int mi_row = xd->mi_row;
2031 const int mi_col = xd->mi_col;
2032 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2033 AOM_PLANE_Y, AOM_PLANE_Y);
2034 int est_rate;
2035 int64_t est_dist;
2036 model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2037 NULL, NULL, NULL, NULL, NULL);
2038 return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2039 }
2040
2041 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2042 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2043 // it is included.
mask_set_bit(int * mask,int index)2044 static INLINE void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2045
mask_check_bit(int mask,int index)2046 static INLINE bool mask_check_bit(int mask, int index) {
2047 return (mask >> index) & 0x1;
2048 }
2049
2050 // Before performing the full MV search in handle_inter_mode, do a simple
2051 // translation search and see if we can eliminate any motion vectors.
2052 // Returns an integer where, if the i-th bit is set, it means that the i-th
2053 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2054 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2055 RD_STATS *rd_stats,
2056 HandleInterModeArgs *const args,
2057 int64_t ref_best_rd, BLOCK_SIZE bsize,
2058 const int ref_set) {
2059 // If the number of ref mv count is equal to 1, do not prune the same. It
2060 // is better to evaluate the same than to prune it.
2061 if (ref_set == 1) return 1;
2062 AV1_COMMON *const cm = &cpi->common;
2063 const MACROBLOCKD *const xd = &x->e_mbd;
2064 const MB_MODE_INFO *const mbmi = xd->mi[0];
2065 const PREDICTION_MODE this_mode = mbmi->mode;
2066
2067 // Only search indices if they have some chance of being good.
2068 int good_indices = 0;
2069 for (int i = 0; i < ref_set; ++i) {
2070 if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2071 ref_best_rd, i)) {
2072 continue;
2073 }
2074 mask_set_bit(&good_indices, i);
2075 }
2076
2077 // Only prune in NEARMV mode, if the speed feature is set, and the block size
2078 // is large enough. If these conditions are not met, return all good indices
2079 // found so far.
2080 if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2081 return good_indices;
2082 if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2083 if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2084 // Do not prune when there is internal resizing. TODO(elliottk) fix this
2085 // so b/2384 can be resolved.
2086 if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2087 (mbmi->ref_frame[1] > 0 &&
2088 av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2089 return good_indices;
2090 }
2091
2092 // Calculate the RD cost for the motion vectors using simple translation.
2093 int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2094 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2095 // If this index is bad, ignore it.
2096 if (!mask_check_bit(good_indices, ref_mv_idx)) {
2097 continue;
2098 }
2099 idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2100 cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2101 }
2102 // Find the index with the best RD cost.
2103 int best_idx = 0;
2104 for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2105 if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2106 best_idx = i;
2107 }
2108 }
2109 // Only include indices that are good and within a % of the best.
2110 const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2111 // If the simple translation cost is not within this multiple of the
2112 // best RD, skip it. Note that the cutoff is derived experimentally.
2113 const double ref_dth = 5;
2114 int result = 0;
2115 for (int i = 0; i < ref_set; ++i) {
2116 if (mask_check_bit(good_indices, i) &&
2117 (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2118 (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2119 mask_set_bit(&result, i);
2120 }
2121 }
2122 return result;
2123 }
2124
2125 /*!\brief Motion mode information for inter mode search speedup.
2126 *
2127 * Used in a speed feature to search motion modes other than
2128 * SIMPLE_TRANSLATION only on winning candidates.
2129 */
2130 typedef struct motion_mode_candidate {
2131 /*!
2132 * Mode info for the motion mode candidate.
2133 */
2134 MB_MODE_INFO mbmi;
2135 /*!
2136 * Rate describing the cost of the motion vectors for this candidate.
2137 */
2138 int rate_mv;
2139 /*!
2140 * Rate before motion mode search and transform coding is applied.
2141 */
2142 int rate2_nocoeff;
2143 /*!
2144 * An integer value 0 or 1 which indicates whether or not to skip the motion
2145 * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2146 * candidate.
2147 */
2148 int skip_motion_mode;
2149 /*!
2150 * Total RD cost for this candidate.
2151 */
2152 int64_t rd_cost;
2153 } motion_mode_candidate;
2154
2155 /*!\cond */
2156 typedef struct motion_mode_best_st_candidate {
2157 motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2158 int num_motion_mode_cand;
2159 } motion_mode_best_st_candidate;
2160
2161 // Checks if the current reference frame matches with neighbouring block's
2162 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2163 static AOM_INLINE int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2164 MB_MODE_INFO *nb_mbmi) {
2165 MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2166 nb_mbmi->ref_frame[1] };
2167 MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2168 cur_mbmi->ref_frame[1] };
2169 const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2170 int match_found = 0;
2171
2172 for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2173 if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2174 (cur_ref_frames[i] == nb_ref_frames[1]))
2175 match_found = 1;
2176 }
2177 return match_found;
2178 }
2179
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2180 static AOM_INLINE int find_ref_match_in_above_nbs(const int total_mi_cols,
2181 MACROBLOCKD *xd) {
2182 if (!xd->up_available) return 1;
2183 const int mi_col = xd->mi_col;
2184 MB_MODE_INFO **cur_mbmi = xd->mi;
2185 // prev_row_mi points into the mi array, starting at the beginning of the
2186 // previous row.
2187 MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2188 const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2189 uint8_t mi_step;
2190 for (int above_mi_col = mi_col; above_mi_col < end_col;
2191 above_mi_col += mi_step) {
2192 MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2193 mi_step = mi_size_wide[above_mi[0]->bsize];
2194 int match_found = 0;
2195 if (is_inter_block(*above_mi))
2196 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2197 if (match_found) return 1;
2198 }
2199 return 0;
2200 }
2201
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2202 static AOM_INLINE int find_ref_match_in_left_nbs(const int total_mi_rows,
2203 MACROBLOCKD *xd) {
2204 if (!xd->left_available) return 1;
2205 const int mi_row = xd->mi_row;
2206 MB_MODE_INFO **cur_mbmi = xd->mi;
2207 // prev_col_mi points into the mi array, starting at the top of the
2208 // previous column
2209 MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2210 const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2211 uint8_t mi_step;
2212 for (int left_mi_row = mi_row; left_mi_row < end_row;
2213 left_mi_row += mi_step) {
2214 MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2215 mi_step = mi_size_high[left_mi[0]->bsize];
2216 int match_found = 0;
2217 if (is_inter_block(*left_mi))
2218 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2219 if (match_found) return 1;
2220 }
2221 return 0;
2222 }
2223 /*!\endcond */
2224
2225 /*! \brief Struct used to hold TPL data to
2226 * narrow down parts of the inter mode search.
2227 */
2228 typedef struct {
2229 /*!
2230 * The best inter cost out of all of the reference frames.
2231 */
2232 int64_t best_inter_cost;
2233 /*!
2234 * The inter cost for each reference frame.
2235 */
2236 int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2237 } PruneInfoFromTpl;
2238
2239 #if !CONFIG_REALTIME_ONLY
2240 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2241 static AOM_INLINE void get_block_level_tpl_stats(
2242 AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2243 PruneInfoFromTpl *inter_cost_info_from_tpl) {
2244 AV1_COMMON *const cm = &cpi->common;
2245
2246 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2247 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2248 const int tpl_idx = cpi->gf_frame_index;
2249 TplParams *const tpl_data = &cpi->ppi->tpl_data;
2250 if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2251 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2252 const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2253 const int mi_wide = mi_size_wide[bsize];
2254 const int mi_high = mi_size_high[bsize];
2255 const int tpl_stride = tpl_frame->stride;
2256 const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2257 const int mi_col_sr =
2258 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2259 const int mi_col_end_sr =
2260 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2261 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2262
2263 const int row_step = step;
2264 const int col_step_sr =
2265 coded_to_superres_mi(step, cm->superres_scale_denominator);
2266 for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2267 row += row_step) {
2268 for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2269 col += col_step_sr) {
2270 const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2271 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2272
2273 // Sums up the inter cost of corresponding ref frames
2274 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2275 inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2276 this_stats->pred_error[ref_idx];
2277 }
2278 }
2279 }
2280
2281 // Computes the best inter cost (minimum inter_cost)
2282 int64_t best_inter_cost = INT64_MAX;
2283 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2284 const int64_t cur_inter_cost =
2285 inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2286 // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2287 // calculating the minimum inter_cost
2288 if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2289 valid_refs[ref_idx])
2290 best_inter_cost = cur_inter_cost;
2291 }
2292 inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2293 }
2294 #endif
2295
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2296 static AOM_INLINE int prune_modes_based_on_tpl_stats(
2297 PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2298 const PREDICTION_MODE this_mode, int prune_mode_level) {
2299 const int have_newmv = have_newmv_in_inter_mode(this_mode);
2300 if ((prune_mode_level < 2) && have_newmv) return 0;
2301
2302 const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2303 if (best_inter_cost == INT64_MAX) return 0;
2304
2305 const int prune_level = prune_mode_level - 1;
2306 int64_t cur_inter_cost;
2307
2308 const int is_globalmv =
2309 (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2310 const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2311
2312 // Thresholds used for pruning:
2313 // Lower value indicates aggressive pruning and higher value indicates
2314 // conservative pruning which is set based on ref_mv_idx and speed feature.
2315 // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2316 // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2317 static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2318 { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2319 };
2320
2321 const int is_comp_pred = (refs[1] > INTRA_FRAME);
2322 if (!is_comp_pred) {
2323 cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2324 } else {
2325 const int64_t inter_cost_ref0 =
2326 inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2327 const int64_t inter_cost_ref1 =
2328 inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2329 // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2330 // more aggressive pruning
2331 cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2332 }
2333
2334 // Prune the mode if cur_inter_cost is greater than threshold times
2335 // best_inter_cost
2336 if (cur_inter_cost >
2337 ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2338 best_inter_cost) >>
2339 2))
2340 return 1;
2341 return 0;
2342 }
2343
2344 /*!\brief High level function to select parameters for compound mode.
2345 *
2346 * \ingroup inter_mode_search
2347 * The main search functionality is done in the call to av1_compound_type_rd().
2348 *
2349 * \param[in] cpi Top-level encoder structure.
2350 * \param[in] x Pointer to struct holding all the data for
2351 * the current macroblock.
2352 * \param[in] args HandleInterModeArgs struct holding
2353 * miscellaneous arguments for inter mode
2354 * search. See the documentation for this
2355 * struct for a description of each member.
2356 * \param[in] ref_best_rd Best RD found so far for this block.
2357 * It is used for early termination of this
2358 * search if the RD exceeds this value.
2359 * \param[in,out] cur_mv Current motion vector.
2360 * \param[in] bsize Current block size.
2361 * \param[in,out] compmode_interinter_cost RD of the selected interinter
2362 compound mode.
2363 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2364 * allocated buffers for the compound
2365 * predictors and masks in the compound type
2366 * search.
2367 * \param[in,out] orig_dst A prediction buffer to hold a computed
2368 * prediction. This will eventually hold the
2369 * final prediction, and the tmp_dst info will
2370 * be copied here.
2371 * \param[in] tmp_dst A temporary prediction buffer to hold a
2372 * computed prediction.
2373 * \param[in,out] rate_mv The rate associated with the motion vectors.
2374 * This will be modified if a motion search is
2375 * done in the motion mode search.
2376 * \param[in,out] rd_stats Struct to keep track of the overall RD
2377 * information.
2378 * \param[in,out] skip_rd An array of length 2 where skip_rd[0] is the
2379 * best total RD for a skip mode so far, and
2380 * skip_rd[1] is the best RD for a skip mode so
2381 * far in luma. This is used as a speed feature
2382 * to skip the transform search if the computed
2383 * skip RD for the current mode is not better
2384 * than the best skip_rd so far.
2385 * \param[in,out] skip_build_pred Indicates whether or not to build the inter
2386 * predictor. If this is 0, the inter predictor
2387 * has already been built and thus we can avoid
2388 * repeating computation.
2389 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2390 * a viable candidate.
2391 */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2392 static int process_compound_inter_mode(
2393 AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2394 int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2395 int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2396 const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2397 RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2398 MACROBLOCKD *xd = &x->e_mbd;
2399 MB_MODE_INFO *mbmi = xd->mi[0];
2400 const AV1_COMMON *cm = &cpi->common;
2401 const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2402 cm->seq_params->enable_masked_compound;
2403 int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2404 (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2405
2406 const int num_planes = av1_num_planes(cm);
2407 const int mi_row = xd->mi_row;
2408 const int mi_col = xd->mi_col;
2409 int is_luma_interp_done = 0;
2410 set_default_interp_filters(mbmi, cm->features.interp_filter);
2411
2412 int64_t best_rd_compound;
2413 int64_t rd_thresh;
2414 const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2415 const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2416 rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2417 comp_type_rd_scale);
2418 // Select compound type and any parameters related to that type
2419 // (for example, the mask parameters if it is a masked mode) and compute
2420 // the RD
2421 *compmode_interinter_cost = av1_compound_type_rd(
2422 cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2423 orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2424 ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2425 if (ref_best_rd < INT64_MAX &&
2426 (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2427 ref_best_rd) {
2428 restore_dst_buf(xd, *orig_dst, num_planes);
2429 return 1;
2430 }
2431
2432 // Build only uv predictor for COMPOUND_AVERAGE.
2433 // Note there is no need to call av1_enc_build_inter_predictor
2434 // for luma if COMPOUND_AVERAGE is selected because it is the first
2435 // candidate in av1_compound_type_rd, which means it used the dst_buf
2436 // rather than the tmp_buf.
2437 if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2438 if (num_planes > 1) {
2439 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2440 AOM_PLANE_U, num_planes - 1);
2441 }
2442 *skip_build_pred = 1;
2443 }
2444 return 0;
2445 }
2446
2447 // Speed feature to prune out MVs that are similar to previous MVs if they
2448 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2449 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2450 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2451 MB_MODE_INFO *mbmi, int pruning_factor) {
2452 int i;
2453 const int is_comp_pred = has_second_ref(mbmi);
2454 const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2455
2456 // Skip the evaluation if an MV match is found.
2457 if (ref_mv_idx > 0) {
2458 for (int idx = 0; idx < ref_mv_idx; ++idx) {
2459 if (save_mv[idx][0].as_int == INVALID_MV) continue;
2460
2461 int mv_diff = 0;
2462 for (i = 0; i < 1 + is_comp_pred; ++i) {
2463 mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2464 abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2465 }
2466
2467 // If this mode is not the best one, and current MV is similar to
2468 // previous stored MV, terminate this ref_mv_idx evaluation.
2469 if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2470 }
2471 }
2472
2473 if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2474 for (i = 0; i < is_comp_pred + 1; ++i)
2475 save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2476 }
2477
2478 return 0;
2479 }
2480
2481 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2482 *
2483 * \ingroup inter_mode_search
2484 *
2485 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2486 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2487 * Else returns 0.
2488 *
2489 * Note that the sse of here comes from single_motion_search. So it is
2490 * interpolated with the filter in motion search, not the actual interpolation
2491 * filter used in encoding.
2492 *
2493 * \param[in] fn_ptr A table of function pointers to compute SSE.
2494 * \param[in] x Pointer to struct holding all the data for
2495 * the current macroblock.
2496 * \param[in] bsize The current block_size.
2497 * \param[in] args The args to handle_inter_mode, used to track
2498 * the best SSE.
2499 * \param[in] prune_zero_mv_with_sse The argument holds speed feature
2500 * prune_zero_mv_with_sse value
2501 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2502 */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args,int prune_zero_mv_with_sse)2503 static AOM_INLINE int prune_zero_mv_with_sse(
2504 const aom_variance_fn_ptr_t *fn_ptr, const MACROBLOCK *x, BLOCK_SIZE bsize,
2505 const HandleInterModeArgs *args, int prune_zero_mv_with_sse) {
2506 const MACROBLOCKD *xd = &x->e_mbd;
2507 const MB_MODE_INFO *mbmi = xd->mi[0];
2508
2509 const int is_comp_pred = has_second_ref(mbmi);
2510 const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2511
2512 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2513 if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2514 // Pruning logic only works for IDENTITY type models
2515 // Note: In theory we could apply similar logic for TRANSLATION
2516 // type models, but we do not code these due to a spec bug
2517 // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2518 assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2519 return 0;
2520 }
2521
2522 // Don't prune if we have invalid data
2523 assert(mbmi->mv[idx].as_int == 0);
2524 if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2525 return 0;
2526 }
2527 }
2528
2529 // Sum up the sse of ZEROMV and best NEWMV
2530 unsigned int this_sse_sum = 0;
2531 unsigned int best_sse_sum = 0;
2532 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2533 const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2534 const struct macroblockd_plane *pd = xd->plane;
2535 const struct buf_2d *src_buf = &p->src;
2536 const struct buf_2d *ref_buf = &pd->pre[idx];
2537 const uint8_t *src = src_buf->buf;
2538 const uint8_t *ref = ref_buf->buf;
2539 const int src_stride = src_buf->stride;
2540 const int ref_stride = ref_buf->stride;
2541
2542 unsigned int this_sse;
2543 fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2544 this_sse_sum += this_sse;
2545
2546 const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2547 best_sse_sum += best_sse;
2548 }
2549
2550 const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2551 if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2552 return 1;
2553 }
2554
2555 return 0;
2556 }
2557
2558 /*!\brief Searches for interpolation filter in realtime mode during winner eval
2559 *
2560 * \ingroup inter_mode_search
2561 *
2562 * Does a simple interpolation filter search during winner mode evaluation. This
2563 * is currently only used by realtime mode as \ref
2564 * av1_interpolation_filter_search is not called during realtime encoding.
2565 *
2566 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2567 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2568 * higher res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2569 * *
2570 * \param[in] cpi Pointer to the compressor. Used for feature
2571 * flags.
2572 * \param[in,out] x Pointer to macroblock. This is primarily
2573 * used to access the buffers.
2574 * \param[in] mi_row The current row in mi unit (4X4 pixels).
2575 * \param[in] mi_col The current col in mi unit (4X4 pixels).
2576 * \param[in] bsize The current block_size.
2577 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2578 */
fast_interp_search(const AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize)2579 static AOM_INLINE bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2580 int mi_row, int mi_col,
2581 BLOCK_SIZE bsize) {
2582 static const InterpFilters filters_ref_set[3] = {
2583 { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2584 { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2585 { MULTITAP_SHARP, MULTITAP_SHARP }
2586 };
2587
2588 const AV1_COMMON *const cm = &cpi->common;
2589 MACROBLOCKD *const xd = &x->e_mbd;
2590 MB_MODE_INFO *const mi = xd->mi[0];
2591 int64_t best_cost = INT64_MAX;
2592 int best_filter_index = -1;
2593 // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2594 const int num_planes = av1_num_planes(cm);
2595 const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2596 assert(is_inter_mode(mi->mode));
2597 assert(mi->motion_mode == SIMPLE_TRANSLATION);
2598 assert(!is_inter_compound_mode(mi->mode));
2599
2600 if (!av1_is_interp_needed(xd)) {
2601 return false;
2602 }
2603
2604 struct macroblockd_plane *pd = xd->plane;
2605 const BUFFER_SET orig_dst = {
2606 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2607 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2608 };
2609 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2610 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2611 tmp_buf + 2 * MAX_SB_SQUARE },
2612 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2613 const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2614
2615 for (int i = 0; i < 3; ++i) {
2616 if (is_240p_or_lesser) {
2617 if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2618 continue;
2619 }
2620 } else {
2621 if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2622 continue;
2623 }
2624 }
2625 int64_t cost;
2626 RD_STATS tmp_rd = { 0 };
2627
2628 mi->interp_filters.as_filters = filters_ref_set[i];
2629 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2630
2631 model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2632 ? MODELRD_LEGACY
2633 : MODELRD_TYPE_INTERP_FILTER](
2634 cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2635 &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2636
2637 tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2638 cm->seq_params->enable_dual_filter);
2639 cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2640 if (cost < best_cost) {
2641 best_filter_index = i;
2642 best_cost = cost;
2643 swap_dst_buf(xd, dst_bufs, num_planes);
2644 }
2645 }
2646 assert(best_filter_index >= 0);
2647
2648 mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2649
2650 const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2651
2652 if (is_best_pred_in_orig) {
2653 swap_dst_buf(xd, dst_bufs, num_planes);
2654 } else {
2655 // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2656 // is_best_pred_in_orig is false, that means the current buffer is the
2657 // original one.
2658 assert(&orig_dst == dst_bufs[0]);
2659 assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2660 const int width = block_size_wide[bsize];
2661 const int height = block_size_high[bsize];
2662 #if CONFIG_AV1_HIGHBITDEPTH
2663 const bool is_hbd = is_cur_buf_hbd(xd);
2664 if (is_hbd) {
2665 aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2666 tmp_dst.stride[AOM_PLANE_Y],
2667 CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2668 orig_dst.stride[AOM_PLANE_Y], width, height);
2669 } else {
2670 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2671 orig_dst.plane[AOM_PLANE_Y],
2672 orig_dst.stride[AOM_PLANE_Y], width, height);
2673 }
2674 #else
2675 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2676 orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2677 width, height);
2678 #endif
2679 }
2680
2681 // Build the YUV predictor.
2682 if (num_planes > 1) {
2683 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2684 AOM_PLANE_U, AOM_PLANE_V);
2685 }
2686
2687 return true;
2688 }
2689
2690 /*!\brief AV1 inter mode RD computation
2691 *
2692 * \ingroup inter_mode_search
2693 * Do the RD search for a given inter mode and compute all information relevant
2694 * to the input mode. It will compute the best MV,
2695 * compound parameters (if the mode is a compound mode) and interpolation filter
2696 * parameters.
2697 *
2698 * \param[in] cpi Top-level encoder structure.
2699 * \param[in] tile_data Pointer to struct holding adaptive
2700 * data/contexts/models for the tile during
2701 * encoding.
2702 * \param[in] x Pointer to structure holding all the data
2703 * for the current macroblock.
2704 * \param[in] bsize Current block size.
2705 * \param[in,out] rd_stats Struct to keep track of the overall RD
2706 * information.
2707 * \param[in,out] rd_stats_y Struct to keep track of the RD information
2708 * for only the Y plane.
2709 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
2710 * for only the UV planes.
2711 * \param[in] args HandleInterModeArgs struct holding
2712 * miscellaneous arguments for inter mode
2713 * search. See the documentation for this
2714 * struct for a description of each member.
2715 * \param[in] ref_best_rd Best RD found so far for this block.
2716 * It is used for early termination of this
2717 * search if the RD exceeds this value.
2718 * \param[in] tmp_buf Temporary buffer used to hold predictors
2719 * built in this search.
2720 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2721 * allocated buffers for the compound
2722 * predictors and masks in the compound type
2723 * search.
2724 * \param[in,out] best_est_rd Estimated RD for motion mode search if
2725 * do_tx_search (see below) is 0.
2726 * \param[in] do_tx_search Parameter to indicate whether or not to do
2727 * a full transform search. This will compute
2728 * an estimated RD for the modes without the
2729 * transform search and later perform the full
2730 * transform search on the best candidates.
2731 * \param[in,out] inter_modes_info InterModesInfo struct to hold inter mode
2732 * information to perform a full transform
2733 * search only on winning candidates searched
2734 * with an estimate for transform coding RD.
2735 * \param[in,out] motion_mode_cand A motion_mode_candidate struct to store
2736 * motion mode information used in a speed
2737 * feature to search motion modes other than
2738 * SIMPLE_TRANSLATION only on winning
2739 * candidates.
2740 * \param[in,out] skip_rd A length 2 array, where skip_rd[0] is the
2741 * best total RD for a skip mode so far, and
2742 * skip_rd[1] is the best RD for a skip mode so
2743 * far in luma. This is used as a speed feature
2744 * to skip the transform search if the computed
2745 * skip RD for the current mode is not better
2746 * than the best skip_rd so far.
2747 * \param[in] inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2748 * narrow down the search based on data
2749 * collected in the TPL model.
2750 * \param[out] yrd Stores the rdcost corresponding to encoding
2751 * the luma plane.
2752 *
2753 * \return The RD cost for the mode being searched.
2754 */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2755 static int64_t handle_inter_mode(
2756 AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2757 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2758 RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2759 uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2760 int64_t *best_est_rd, const int do_tx_search,
2761 InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2762 int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2763 int64_t *yrd) {
2764 const AV1_COMMON *cm = &cpi->common;
2765 const int num_planes = av1_num_planes(cm);
2766 MACROBLOCKD *xd = &x->e_mbd;
2767 MB_MODE_INFO *mbmi = xd->mi[0];
2768 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2769 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2770 const int is_comp_pred = has_second_ref(mbmi);
2771 const PREDICTION_MODE this_mode = mbmi->mode;
2772
2773 #if CONFIG_REALTIME_ONLY
2774 const int prune_modes_based_on_tpl = 0;
2775 #else // CONFIG_REALTIME_ONLY
2776 const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2777 const int prune_modes_based_on_tpl =
2778 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2779 av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2780 #endif // CONFIG_REALTIME_ONLY
2781 int i;
2782 // Reference frames for this mode
2783 const int refs[2] = { mbmi->ref_frame[0],
2784 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2785 int rate_mv = 0;
2786 int64_t rd = INT64_MAX;
2787 // Do first prediction into the destination buffer. Do the next
2788 // prediction into a temporary buffer. Then keep track of which one
2789 // of these currently holds the best predictor, and use the other
2790 // one for future predictions. In the end, copy from tmp_buf to
2791 // dst if necessary.
2792 struct macroblockd_plane *pd = xd->plane;
2793 const BUFFER_SET orig_dst = {
2794 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2795 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2796 };
2797 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2798 tmp_buf + 2 * MAX_SB_SQUARE },
2799 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2800
2801 int64_t ret_val = INT64_MAX;
2802 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2803 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2804 int64_t best_rd = INT64_MAX;
2805 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2806 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2807 int64_t best_yrd = INT64_MAX;
2808 MB_MODE_INFO best_mbmi = *mbmi;
2809 int best_xskip_txfm = 0;
2810 int64_t newmv_ret_val = INT64_MAX;
2811 inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2812
2813 // Do not prune the mode based on inter cost from tpl if the current ref frame
2814 // is the winner ref in neighbouring blocks.
2815 int ref_match_found_in_above_nb = 0;
2816 int ref_match_found_in_left_nb = 0;
2817 if (prune_modes_based_on_tpl) {
2818 ref_match_found_in_above_nb =
2819 find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2820 ref_match_found_in_left_nb =
2821 find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2822 }
2823
2824 // First, perform a simple translation search for each of the indices. If
2825 // an index performs well, it will be fully searched in the main loop
2826 // of this function.
2827 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2828 // Save MV results from first 2 ref_mv_idx.
2829 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2830 int best_ref_mv_idx = -1;
2831 const int idx_mask =
2832 ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2833 const int16_t mode_ctx =
2834 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2835 const ModeCosts *mode_costs = &x->mode_costs;
2836 const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2837 const int base_rate =
2838 args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2839
2840 for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2841 save_mv[i][0].as_int = INVALID_MV;
2842 save_mv[i][1].as_int = INVALID_MV;
2843 }
2844 args->start_mv_cnt = 0;
2845
2846 // Main loop of this function. This will iterate over all of the ref mvs
2847 // in the dynamic reference list and do the following:
2848 // 1.) Get the current MV. Create newmv MV if necessary
2849 // 2.) Search compound type and parameters if applicable
2850 // 3.) Do interpolation filter search
2851 // 4.) Build the inter predictor
2852 // 5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2853 // WARPED_CAUSAL)
2854 // 6.) Update stats if best so far
2855 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2856 mbmi->ref_mv_idx = ref_mv_idx;
2857
2858 mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2859 mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2860 const int drl_cost = get_drl_cost(
2861 mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2862 mode_info[ref_mv_idx].drl_cost = drl_cost;
2863 mode_info[ref_mv_idx].skip = 0;
2864
2865 if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2866 // MV did not perform well in simple translation search. Skip it.
2867 continue;
2868 }
2869 if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2870 !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2871 // Skip mode if TPL model indicates it will not be beneficial.
2872 if (prune_modes_based_on_tpl_stats(
2873 inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2874 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2875 continue;
2876 }
2877 av1_init_rd_stats(rd_stats);
2878
2879 // Initialize compound mode data
2880 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2881 mbmi->comp_group_idx = 0;
2882 mbmi->compound_idx = 1;
2883 if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2884
2885 mbmi->num_proj_ref = 0;
2886 mbmi->motion_mode = SIMPLE_TRANSLATION;
2887
2888 // Compute cost for signalling this DRL index
2889 rd_stats->rate = base_rate;
2890 rd_stats->rate += drl_cost;
2891
2892 int rs = 0;
2893 int compmode_interinter_cost = 0;
2894
2895 int_mv cur_mv[2];
2896
2897 // TODO(Cherma): Extend this speed feature to support compound mode
2898 int skip_repeated_ref_mv =
2899 is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2900 // Generate the current mv according to the prediction mode
2901 if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2902 continue;
2903 }
2904
2905 // The above call to build_cur_mv does not handle NEWMV modes. Build
2906 // the mv here if we have NEWMV for any predictors.
2907 if (have_newmv_in_inter_mode(this_mode)) {
2908 #if CONFIG_COLLECT_COMPONENT_TIMING
2909 start_timing(cpi, handle_newmv_time);
2910 #endif
2911 newmv_ret_val =
2912 handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2913 #if CONFIG_COLLECT_COMPONENT_TIMING
2914 end_timing(cpi, handle_newmv_time);
2915 #endif
2916
2917 if (newmv_ret_val != 0) continue;
2918
2919 if (is_inter_singleref_mode(this_mode) &&
2920 cur_mv[0].as_int != INVALID_MV) {
2921 const MV_REFERENCE_FRAME ref = refs[0];
2922 const unsigned int this_sse = x->pred_sse[ref];
2923 if (this_sse < args->best_single_sse_in_refs[ref]) {
2924 args->best_single_sse_in_refs[ref] = this_sse;
2925 }
2926
2927 if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2928 const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
2929 const int pix_idx = num_pels_log2_lookup[bsize] - 4;
2930 const double scale_factor[3][11] = {
2931 { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
2932 { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
2933 { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
2934 };
2935 assert(pix_idx >= 0);
2936 assert(th_idx <= 2);
2937 if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
2938 continue;
2939 }
2940 }
2941
2942 rd_stats->rate += rate_mv;
2943 }
2944 // Copy the motion vector for this mode into mbmi struct
2945 for (i = 0; i < is_comp_pred + 1; ++i) {
2946 mbmi->mv[i].as_int = cur_mv[i].as_int;
2947 }
2948
2949 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2950 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2951 continue;
2952 }
2953
2954 // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2955 // is enabled, and the current MV is similar to a previous one.
2956 if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2957 prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2958 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2959 continue;
2960
2961 if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2962 (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2963 if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2964 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2965 continue;
2966 }
2967 }
2968
2969 int skip_build_pred = 0;
2970 const int mi_row = xd->mi_row;
2971 const int mi_col = xd->mi_col;
2972
2973 // Handle a compound predictor, continue if it is determined this
2974 // cannot be the best compound mode
2975 if (is_comp_pred) {
2976 #if CONFIG_COLLECT_COMPONENT_TIMING
2977 start_timing(cpi, compound_type_rd_time);
2978 #endif
2979 const int not_best_mode = process_compound_inter_mode(
2980 cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2981 rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2982 &skip_build_pred);
2983 #if CONFIG_COLLECT_COMPONENT_TIMING
2984 end_timing(cpi, compound_type_rd_time);
2985 #endif
2986 if (not_best_mode) continue;
2987 }
2988
2989 if (!args->skip_ifs) {
2990 #if CONFIG_COLLECT_COMPONENT_TIMING
2991 start_timing(cpi, interpolation_filter_search_time);
2992 #endif
2993 // Determine the interpolation filter for this mode
2994 ret_val = av1_interpolation_filter_search(
2995 x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2996 &skip_build_pred, args, ref_best_rd);
2997 #if CONFIG_COLLECT_COMPONENT_TIMING
2998 end_timing(cpi, interpolation_filter_search_time);
2999 #endif
3000 if (args->modelled_rd != NULL && !is_comp_pred) {
3001 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3002 }
3003 if (ret_val != 0) {
3004 restore_dst_buf(xd, orig_dst, num_planes);
3005 continue;
3006 } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3007 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3008 restore_dst_buf(xd, orig_dst, num_planes);
3009 continue;
3010 }
3011
3012 // Compute modelled RD if enabled
3013 if (args->modelled_rd != NULL) {
3014 if (is_comp_pred) {
3015 const int mode0 = compound_ref0_mode(this_mode);
3016 const int mode1 = compound_ref1_mode(this_mode);
3017 const int64_t mrd =
3018 AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3019 args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3020 if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3021 restore_dst_buf(xd, orig_dst, num_planes);
3022 continue;
3023 }
3024 }
3025 }
3026 }
3027
3028 rd_stats->rate += compmode_interinter_cost;
3029 if (skip_build_pred != 1) {
3030 // Build this inter predictor if it has not been previously built
3031 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3032 av1_num_planes(cm) - 1);
3033 }
3034
3035 #if CONFIG_COLLECT_COMPONENT_TIMING
3036 start_timing(cpi, motion_mode_rd_time);
3037 #endif
3038 int rate2_nocoeff = rd_stats->rate;
3039 // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3040 // OBMC_CAUSAL or WARPED_CAUSAL
3041 int64_t this_yrd;
3042 ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3043 rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3044 &orig_dst, best_est_rd, do_tx_search,
3045 inter_modes_info, 0, &this_yrd);
3046 #if CONFIG_COLLECT_COMPONENT_TIMING
3047 end_timing(cpi, motion_mode_rd_time);
3048 #endif
3049 assert(
3050 IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3051
3052 if (ret_val != INT64_MAX) {
3053 int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3054 const THR_MODES mode_enum = get_prediction_mode_idx(
3055 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3056 // Collect mode stats for multiwinner mode processing
3057 store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3058 rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3059 cpi->sf.winner_mode_sf.multi_winner_mode_type,
3060 do_tx_search);
3061 if (tmp_rd < best_rd) {
3062 best_yrd = this_yrd;
3063 // Update the best rd stats if we found the best mode so far
3064 best_rd_stats = *rd_stats;
3065 best_rd_stats_y = *rd_stats_y;
3066 best_rd_stats_uv = *rd_stats_uv;
3067 best_rd = tmp_rd;
3068 best_mbmi = *mbmi;
3069 best_xskip_txfm = txfm_info->skip_txfm;
3070 memcpy(best_blk_skip, txfm_info->blk_skip,
3071 sizeof(best_blk_skip[0]) * xd->height * xd->width);
3072 av1_copy_array(best_tx_type_map, xd->tx_type_map,
3073 xd->height * xd->width);
3074 motion_mode_cand->rate_mv = rate_mv;
3075 motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3076 }
3077
3078 if (tmp_rd < ref_best_rd) {
3079 ref_best_rd = tmp_rd;
3080 best_ref_mv_idx = ref_mv_idx;
3081 }
3082 }
3083 restore_dst_buf(xd, orig_dst, num_planes);
3084 }
3085
3086 if (best_rd == INT64_MAX) return INT64_MAX;
3087
3088 // re-instate status of the best choice
3089 *rd_stats = best_rd_stats;
3090 *rd_stats_y = best_rd_stats_y;
3091 *rd_stats_uv = best_rd_stats_uv;
3092 *yrd = best_yrd;
3093 *mbmi = best_mbmi;
3094 txfm_info->skip_txfm = best_xskip_txfm;
3095 assert(IMPLIES(mbmi->comp_group_idx == 1,
3096 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3097 memcpy(txfm_info->blk_skip, best_blk_skip,
3098 sizeof(best_blk_skip[0]) * xd->height * xd->width);
3099 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3100
3101 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3102
3103 return rd_stats->rdcost;
3104 }
3105
3106 /*!\brief Search for the best intrabc predictor
3107 *
3108 * \ingroup intra_mode_search
3109 * \callergraph
3110 * This function performs a motion search to find the best intrabc predictor.
3111 *
3112 * \returns Returns the best overall rdcost (including the non-intrabc modes
3113 * search before this function).
3114 */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)3115 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3116 PICK_MODE_CONTEXT *ctx,
3117 RD_STATS *rd_stats, BLOCK_SIZE bsize,
3118 int64_t best_rd) {
3119 const AV1_COMMON *const cm = &cpi->common;
3120 if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3121 !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3122 return INT64_MAX;
3123 const int num_planes = av1_num_planes(cm);
3124
3125 MACROBLOCKD *const xd = &x->e_mbd;
3126 const TileInfo *tile = &xd->tile;
3127 MB_MODE_INFO *mbmi = xd->mi[0];
3128 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3129
3130 const int mi_row = xd->mi_row;
3131 const int mi_col = xd->mi_col;
3132 const int w = block_size_wide[bsize];
3133 const int h = block_size_high[bsize];
3134 const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3135 const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3136
3137 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3138 const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3139 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3140 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3141 mbmi_ext->mode_context);
3142 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3143 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3144 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3145 int_mv nearestmv, nearmv;
3146 av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3147 0);
3148
3149 if (nearestmv.as_int == INVALID_MV) {
3150 nearestmv.as_int = 0;
3151 }
3152 if (nearmv.as_int == INVALID_MV) {
3153 nearmv.as_int = 0;
3154 }
3155
3156 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3157 if (dv_ref.as_int == 0) {
3158 av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3159 }
3160 // Ref DV should not have sub-pel.
3161 assert((dv_ref.as_mv.col & 7) == 0);
3162 assert((dv_ref.as_mv.row & 7) == 0);
3163 mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3164
3165 struct buf_2d yv12_mb[MAX_MB_PLANE];
3166 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3167 for (int i = 0; i < num_planes; ++i) {
3168 xd->plane[i].pre[0] = yv12_mb[i];
3169 }
3170
3171 enum IntrabcMotionDirection {
3172 IBC_MOTION_ABOVE,
3173 IBC_MOTION_LEFT,
3174 IBC_MOTION_DIRECTIONS
3175 };
3176
3177 MB_MODE_INFO best_mbmi = *mbmi;
3178 RD_STATS best_rdstats = *rd_stats;
3179 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3180 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3181 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3182
3183 FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3184 const SEARCH_METHODS search_method =
3185 av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3186 const search_site_config *lookahead_search_sites =
3187 cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3188 const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3189 av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3190 &dv_ref.as_mv, start_mv,
3191 lookahead_search_sites, search_method,
3192 /*fine_search_interval=*/0);
3193 const IntraBCMVCosts *const dv_costs = x->dv_costs;
3194 av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3195
3196 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3197 dir < IBC_MOTION_DIRECTIONS; ++dir) {
3198 switch (dir) {
3199 case IBC_MOTION_ABOVE:
3200 fullms_params.mv_limits.col_min =
3201 (tile->mi_col_start - mi_col) * MI_SIZE;
3202 fullms_params.mv_limits.col_max =
3203 (tile->mi_col_end - mi_col) * MI_SIZE - w;
3204 fullms_params.mv_limits.row_min =
3205 (tile->mi_row_start - mi_row) * MI_SIZE;
3206 fullms_params.mv_limits.row_max =
3207 (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3208 break;
3209 case IBC_MOTION_LEFT:
3210 fullms_params.mv_limits.col_min =
3211 (tile->mi_col_start - mi_col) * MI_SIZE;
3212 fullms_params.mv_limits.col_max =
3213 (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3214 // TODO(aconverse@google.com): Minimize the overlap between above and
3215 // left areas.
3216 fullms_params.mv_limits.row_min =
3217 (tile->mi_row_start - mi_row) * MI_SIZE;
3218 int bottom_coded_mi_edge =
3219 AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3220 fullms_params.mv_limits.row_max =
3221 (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3222 break;
3223 default: assert(0);
3224 }
3225 assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3226 assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3227 assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3228 assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3229
3230 av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3231
3232 if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3233 fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3234 continue;
3235 }
3236
3237 const int step_param = cpi->mv_search_params.mv_step_param;
3238 IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3239 int_mv best_mv, best_hash_mv;
3240 FULLPEL_MV_STATS best_mv_stats;
3241
3242 int bestsme =
3243 av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3244 &best_mv.as_fullmv, &best_mv_stats, NULL);
3245 const int hashsme = av1_intrabc_hash_search(
3246 cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3247 if (hashsme < bestsme) {
3248 best_mv = best_hash_mv;
3249 bestsme = hashsme;
3250 }
3251
3252 if (bestsme == INT_MAX) continue;
3253 const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3254 if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3255 get_fullmv_from_mv(&dv)))
3256 continue;
3257 if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3258 cm->seq_params->mib_size_log2))
3259 continue;
3260
3261 // DV should not have sub-pel.
3262 assert((dv.col & 7) == 0);
3263 assert((dv.row & 7) == 0);
3264 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3265 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3266 mbmi->use_intrabc = 1;
3267 mbmi->mode = DC_PRED;
3268 mbmi->uv_mode = UV_DC_PRED;
3269 mbmi->motion_mode = SIMPLE_TRANSLATION;
3270 mbmi->mv[0].as_mv = dv;
3271 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3272 mbmi->skip_txfm = 0;
3273 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3274 av1_num_planes(cm) - 1);
3275
3276 // TODO(aconverse@google.com): The full motion field defining discount
3277 // in MV_COST_WEIGHT is too large. Explore other values.
3278 const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3279 dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3280 const int rate_mode = x->mode_costs.intrabc_cost[1];
3281 RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3282 if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3283 &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3284 continue;
3285 rd_stats_yuv.rdcost =
3286 RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3287 if (rd_stats_yuv.rdcost < best_rd) {
3288 best_rd = rd_stats_yuv.rdcost;
3289 best_mbmi = *mbmi;
3290 best_rdstats = rd_stats_yuv;
3291 memcpy(best_blk_skip, txfm_info->blk_skip,
3292 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3293 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3294 }
3295 }
3296 *mbmi = best_mbmi;
3297 *rd_stats = best_rdstats;
3298 memcpy(txfm_info->blk_skip, best_blk_skip,
3299 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3300 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3301 #if CONFIG_RD_DEBUG
3302 mbmi->rd_stats = *rd_stats;
3303 #endif
3304 return best_rd;
3305 }
3306
3307 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3308 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3309 // the typedef will prevent doxygen from finding this function and generating
3310 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3311 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3312 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3313 struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3314 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3315 const AV1_COMMON *const cm = &cpi->common;
3316 MACROBLOCKD *const xd = &x->e_mbd;
3317 MB_MODE_INFO *const mbmi = xd->mi[0];
3318 const int num_planes = av1_num_planes(cm);
3319 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3320 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3321 uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3322 int64_t dist_y = 0, dist_uv = 0;
3323
3324 ctx->rd_stats.skip_txfm = 0;
3325 mbmi->ref_frame[0] = INTRA_FRAME;
3326 mbmi->ref_frame[1] = NONE_FRAME;
3327 mbmi->use_intrabc = 0;
3328 mbmi->mv[0].as_int = 0;
3329 mbmi->skip_mode = 0;
3330
3331 const int64_t intra_yrd =
3332 av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3333 &y_skip_txfm, bsize, best_rd, ctx);
3334
3335 // Initialize default mode evaluation params
3336 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3337
3338 if (intra_yrd < best_rd) {
3339 // Search intra modes for uv planes if needed
3340 if (num_planes > 1) {
3341 // Set up the tx variables for reproducing the y predictions in case we
3342 // need it for chroma-from-luma.
3343 if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3344 memcpy(txfm_info->blk_skip, ctx->blk_skip,
3345 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3346 av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3347 }
3348 const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3349 av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3350 &dist_uv, &uv_skip_txfm, bsize,
3351 max_uv_tx_size);
3352 }
3353
3354 // Intra block is always coded as non-skip
3355 rd_cost->rate =
3356 rate_y + rate_uv +
3357 x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3358 rd_cost->dist = dist_y + dist_uv;
3359 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3360 rd_cost->skip_txfm = 0;
3361 } else {
3362 rd_cost->rate = INT_MAX;
3363 }
3364
3365 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3366 best_rd = rd_cost->rdcost;
3367 if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3368 ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3369 memcpy(ctx->blk_skip, txfm_info->blk_skip,
3370 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3371 assert(rd_cost->rate != INT_MAX);
3372 }
3373 if (rd_cost->rate == INT_MAX) return;
3374
3375 ctx->mic = *xd->mi[0];
3376 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3377 av1_ref_frame_type(xd->mi[0]->ref_frame));
3378 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3379 }
3380
3381 static AOM_INLINE void calc_target_weighted_pred(
3382 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3383 const uint8_t *above, int above_stride, const uint8_t *left,
3384 int left_stride);
3385
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3386 static AOM_INLINE void rd_pick_skip_mode(
3387 RD_STATS *rd_cost, InterModeSearchState *search_state,
3388 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3389 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3390 const AV1_COMMON *const cm = &cpi->common;
3391 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3392 const int num_planes = av1_num_planes(cm);
3393 MACROBLOCKD *const xd = &x->e_mbd;
3394 MB_MODE_INFO *const mbmi = xd->mi[0];
3395
3396 x->compound_idx = 1; // COMPOUND_AVERAGE
3397 RD_STATS skip_mode_rd_stats;
3398 av1_invalid_rd_stats(&skip_mode_rd_stats);
3399
3400 if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3401 skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3402 return;
3403 }
3404
3405 const MV_REFERENCE_FRAME ref_frame =
3406 LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3407 const MV_REFERENCE_FRAME second_ref_frame =
3408 LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3409 const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3410 const THR_MODES mode_index =
3411 get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3412
3413 if (mode_index == THR_INVALID) {
3414 return;
3415 }
3416
3417 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3418 cpi->sf.inter_sf.disable_onesided_comp) &&
3419 cpi->all_one_sided_refs) {
3420 return;
3421 }
3422
3423 mbmi->mode = this_mode;
3424 mbmi->uv_mode = UV_DC_PRED;
3425 mbmi->ref_frame[0] = ref_frame;
3426 mbmi->ref_frame[1] = second_ref_frame;
3427 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3428 if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3429 MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3430 if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3431 mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3432 return;
3433 }
3434 av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3435 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3436 mbmi_ext->mode_context);
3437 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3438 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3439 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3440 }
3441
3442 assert(this_mode == NEAREST_NEARESTMV);
3443 if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3444 return;
3445 }
3446
3447 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3448 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3449 mbmi->comp_group_idx = 0;
3450 mbmi->compound_idx = x->compound_idx;
3451 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3452 mbmi->motion_mode = SIMPLE_TRANSLATION;
3453 mbmi->ref_mv_idx = 0;
3454 mbmi->skip_mode = mbmi->skip_txfm = 1;
3455 mbmi->palette_mode_info.palette_size[0] = 0;
3456 mbmi->palette_mode_info.palette_size[1] = 0;
3457
3458 set_default_interp_filters(mbmi, cm->features.interp_filter);
3459
3460 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3461 for (int i = 0; i < num_planes; i++) {
3462 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3463 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3464 }
3465
3466 BUFFER_SET orig_dst;
3467 for (int i = 0; i < num_planes; i++) {
3468 orig_dst.plane[i] = xd->plane[i].dst.buf;
3469 orig_dst.stride[i] = xd->plane[i].dst.stride;
3470 }
3471
3472 // Compare the use of skip_mode with the best intra/inter mode obtained.
3473 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3474 int64_t best_intra_inter_mode_cost = INT64_MAX;
3475 if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3476 const ModeCosts *mode_costs = &x->mode_costs;
3477 best_intra_inter_mode_cost = RDCOST(
3478 x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3479 rd_cost->dist);
3480 // Account for non-skip mode rate in total rd stats
3481 rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3482 av1_rd_cost_update(x->rdmult, rd_cost);
3483 }
3484
3485 // Obtain the rdcost for skip_mode.
3486 skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3487 best_intra_inter_mode_cost);
3488
3489 if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3490 (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3491 assert(mode_index != THR_INVALID);
3492 search_state->best_mbmode.skip_mode = 1;
3493 search_state->best_mbmode = *mbmi;
3494 memset(search_state->best_mbmode.inter_tx_size,
3495 search_state->best_mbmode.tx_size,
3496 sizeof(search_state->best_mbmode.inter_tx_size));
3497 set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3498 search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3499 xd);
3500 search_state->best_mode_index = mode_index;
3501
3502 // Update rd_cost
3503 rd_cost->rate = skip_mode_rd_stats.rate;
3504 rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3505 rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3506
3507 search_state->best_rd = rd_cost->rdcost;
3508 search_state->best_skip2 = 1;
3509 search_state->best_mode_skippable = 1;
3510
3511 x->txfm_search_info.skip_txfm = 1;
3512 }
3513 }
3514
3515 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3516 static AOM_INLINE MB_MODE_INFO *get_winner_mode_stats(
3517 MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3518 int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3519 RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3520 THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3521 int mode_idx) {
3522 MB_MODE_INFO *winner_mbmi;
3523 if (multi_winner_mode_type) {
3524 assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3525 WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3526 winner_mbmi = &winner_mode_stat->mbmi;
3527
3528 *winner_rd_cost = &winner_mode_stat->rd_cost;
3529 *winner_rate_y = winner_mode_stat->rate_y;
3530 *winner_rate_uv = winner_mode_stat->rate_uv;
3531 *winner_mode_index = winner_mode_stat->mode_index;
3532 } else {
3533 winner_mbmi = best_mbmode;
3534 *winner_rd_cost = best_rd_cost;
3535 *winner_rate_y = best_rate_y;
3536 *winner_rate_uv = best_rate_uv;
3537 *winner_mode_index = *best_mode_index;
3538 }
3539 return winner_mbmi;
3540 }
3541
3542 // speed feature: fast intra/inter transform type search
3543 // Used for speed >= 2
3544 // When this speed feature is on, in rd mode search, only DCT is used.
3545 // After the mode is determined, this function is called, to select
3546 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3547 static AOM_INLINE void refine_winner_mode_tx(
3548 const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3549 PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3550 MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3551 int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3552 const AV1_COMMON *const cm = &cpi->common;
3553 MACROBLOCKD *const xd = &x->e_mbd;
3554 MB_MODE_INFO *const mbmi = xd->mi[0];
3555 TxfmSearchParams *txfm_params = &x->txfm_search_params;
3556 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3557 int64_t best_rd;
3558 const int num_planes = av1_num_planes(cm);
3559
3560 if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3561 rd_cost->skip_txfm))
3562 return;
3563
3564 // Set params for winner mode evaluation
3565 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3566
3567 // No best mode identified so far
3568 if (*best_mode_index == THR_INVALID) return;
3569
3570 best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3571 for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3572 RD_STATS *winner_rd_stats = NULL;
3573 int winner_rate_y = 0, winner_rate_uv = 0;
3574 THR_MODES winner_mode_index = 0;
3575
3576 // TODO(any): Combine best mode and multi-winner mode processing paths
3577 // Get winner mode stats for current mode index
3578 MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3579 x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3580 &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3581 cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3582
3583 if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3584 winner_mode_index != THR_INVALID &&
3585 is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3586 rd_cost->skip_txfm)) {
3587 RD_STATS rd_stats = *winner_rd_stats;
3588 int skip_blk = 0;
3589 RD_STATS rd_stats_y, rd_stats_uv;
3590 const int skip_ctx = av1_get_skip_txfm_context(xd);
3591
3592 *mbmi = *winner_mbmi;
3593
3594 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3595
3596 // Select prediction reference frames.
3597 for (int i = 0; i < num_planes; i++) {
3598 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3599 if (has_second_ref(mbmi))
3600 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3601 }
3602
3603 if (is_inter_mode(mbmi->mode)) {
3604 const int mi_row = xd->mi_row;
3605 const int mi_col = xd->mi_col;
3606 bool is_predictor_built = false;
3607 const PREDICTION_MODE prediction_mode = mbmi->mode;
3608 // Do interpolation filter search for realtime mode if applicable.
3609 if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3610 cpi->oxcf.mode == REALTIME &&
3611 cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3612 is_inter_mode(prediction_mode) &&
3613 mbmi->motion_mode == SIMPLE_TRANSLATION &&
3614 !is_inter_compound_mode(prediction_mode)) {
3615 is_predictor_built =
3616 fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3617 }
3618 if (!is_predictor_built) {
3619 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3620 av1_num_planes(cm) - 1);
3621 }
3622 if (mbmi->motion_mode == OBMC_CAUSAL)
3623 av1_build_obmc_inter_predictors_sb(cm, xd);
3624
3625 av1_subtract_plane(x, bsize, 0);
3626 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3627 !xd->lossless[mbmi->segment_id]) {
3628 av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3629 INT64_MAX);
3630 assert(rd_stats_y.rate != INT_MAX);
3631 } else {
3632 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3633 INT64_MAX);
3634 memset(mbmi->inter_tx_size, mbmi->tx_size,
3635 sizeof(mbmi->inter_tx_size));
3636 for (int i = 0; i < xd->height * xd->width; ++i)
3637 set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3638 }
3639 } else {
3640 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3641 INT64_MAX);
3642 }
3643
3644 if (num_planes > 1) {
3645 av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3646 } else {
3647 av1_init_rd_stats(&rd_stats_uv);
3648 }
3649
3650 const ModeCosts *mode_costs = &x->mode_costs;
3651 if (is_inter_mode(mbmi->mode) &&
3652 RDCOST(x->rdmult,
3653 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3654 rd_stats_uv.rate,
3655 (rd_stats_y.dist + rd_stats_uv.dist)) >
3656 RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3657 (rd_stats_y.sse + rd_stats_uv.sse))) {
3658 skip_blk = 1;
3659 rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3660 rd_stats_uv.rate = 0;
3661 rd_stats_y.dist = rd_stats_y.sse;
3662 rd_stats_uv.dist = rd_stats_uv.sse;
3663 } else {
3664 skip_blk = 0;
3665 rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3666 }
3667 int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3668 winner_rate_y - winner_rate_uv;
3669 int64_t this_rd =
3670 RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3671 if (best_rd > this_rd) {
3672 *best_mbmode = *mbmi;
3673 *best_mode_index = winner_mode_index;
3674 av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3675 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3676 rd_cost->rate = this_rate;
3677 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3678 rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3679 rd_cost->rdcost = this_rd;
3680 best_rd = this_rd;
3681 *best_skip2 = skip_blk;
3682 }
3683 }
3684 }
3685 }
3686
3687 /*!\cond */
3688 typedef struct {
3689 // Mask for each reference frame, specifying which prediction modes to NOT try
3690 // during search.
3691 uint32_t pred_modes[REF_FRAMES];
3692 // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3693 // reference frames (i, j).
3694 // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3695 // (NONE_FRAME).
3696 bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3697 } mode_skip_mask_t;
3698 /*!\endcond */
3699
3700 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3701 static AOM_INLINE void disable_reference(
3702 MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3703 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3704 ref_combo[ref][ref2 + 1] = true;
3705 }
3706 }
3707
3708 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3709 static AOM_INLINE void disable_inter_references_except_altref(
3710 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3711 disable_reference(LAST_FRAME, ref_combo);
3712 disable_reference(LAST2_FRAME, ref_combo);
3713 disable_reference(LAST3_FRAME, ref_combo);
3714 disable_reference(GOLDEN_FRAME, ref_combo);
3715 disable_reference(BWDREF_FRAME, ref_combo);
3716 disable_reference(ALTREF2_FRAME, ref_combo);
3717 }
3718
3719 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3720 { LAST_FRAME, NONE_FRAME }, { ALTREF_FRAME, NONE_FRAME },
3721 { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, NONE_FRAME },
3722 { INTRA_FRAME, NONE_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
3723 { LAST_FRAME, GOLDEN_FRAME }, { LAST_FRAME, INTRA_FRAME },
3724 { LAST_FRAME, BWDREF_FRAME }, { LAST_FRAME, LAST3_FRAME },
3725 { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3726 { BWDREF_FRAME, NONE_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
3727 { ALTREF_FRAME, INTRA_FRAME }, { BWDREF_FRAME, INTRA_FRAME },
3728 };
3729
3730 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3731
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3732 static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
3733 REF_SET ref_set) {
3734 if (ref_set == REF_SET_FULL) {
3735 // Everything available by default.
3736 memset(mask, 0, sizeof(*mask));
3737 } else {
3738 // All modes available by default.
3739 memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3740 // All references disabled first.
3741 for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3742 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3743 mask->ref_combo[ref1][ref2 + 1] = true;
3744 }
3745 }
3746 const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3747 int num_ref_combos;
3748
3749 // Then enable reduced set of references explicitly.
3750 switch (ref_set) {
3751 case REF_SET_REDUCED:
3752 ref_set_combos = reduced_ref_combos;
3753 num_ref_combos =
3754 (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3755 break;
3756 case REF_SET_REALTIME:
3757 ref_set_combos = real_time_ref_combos;
3758 num_ref_combos =
3759 (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3760 break;
3761 default: assert(0); num_ref_combos = 0;
3762 }
3763
3764 for (int i = 0; i < num_ref_combos; ++i) {
3765 const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3766 mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3767 }
3768 }
3769 }
3770
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3771 static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
3772 const AV1_COMP *cpi, MACROBLOCK *x,
3773 BLOCK_SIZE bsize) {
3774 const AV1_COMMON *const cm = &cpi->common;
3775 const struct segmentation *const seg = &cm->seg;
3776 MACROBLOCKD *const xd = &x->e_mbd;
3777 MB_MODE_INFO *const mbmi = xd->mi[0];
3778 unsigned char segment_id = mbmi->segment_id;
3779 const SPEED_FEATURES *const sf = &cpi->sf;
3780 const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
3781 REF_SET ref_set = REF_SET_FULL;
3782
3783 if (sf->rt_sf.use_real_time_ref_set)
3784 ref_set = REF_SET_REALTIME;
3785 else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3786 ref_set = REF_SET_REDUCED;
3787
3788 default_skip_mask(mask, ref_set);
3789
3790 int min_pred_mv_sad = INT_MAX;
3791 MV_REFERENCE_FRAME ref_frame;
3792 if (ref_set == REF_SET_REALTIME) {
3793 // For real-time encoding, we only look at a subset of ref frames. So the
3794 // threshold for pruning should be computed from this subset as well.
3795 const int num_rt_refs =
3796 sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3797 for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3798 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3799 if (ref != INTRA_FRAME) {
3800 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3801 }
3802 }
3803 } else {
3804 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3805 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3806 }
3807
3808 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3809 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3810 // Skip checking missing reference in both single and compound reference
3811 // modes.
3812 disable_reference(ref_frame, mask->ref_combo);
3813 } else {
3814 // Skip fixed mv modes for poor references
3815 if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3816 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3817 }
3818 }
3819 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3820 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3821 // Reference not used for the segment.
3822 disable_reference(ref_frame, mask->ref_combo);
3823 }
3824 }
3825 // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3826 // is disabled for this segment. This is to prevent the possibility that we
3827 // end up unable to pick any mode.
3828 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3829 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3830 // unless ARNR filtering is enabled in which case we want
3831 // an unfiltered alternative. We allow near/nearest as well
3832 // because they may result in zero-zero MVs but be cheaper.
3833 if (cpi->rc.is_src_frame_alt_ref &&
3834 (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3835 disable_inter_references_except_altref(mask->ref_combo);
3836
3837 mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3838 const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3839 int_mv near_mv, nearest_mv, global_mv;
3840 get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3841 &x->mbmi_ext);
3842 get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3843 get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3844
3845 if (near_mv.as_int != global_mv.as_int)
3846 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3847 if (nearest_mv.as_int != global_mv.as_int)
3848 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3849 }
3850 }
3851
3852 if (cpi->rc.is_src_frame_alt_ref) {
3853 if (inter_sf->alt_ref_search_fp &&
3854 (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3855 mask->pred_modes[ALTREF_FRAME] = 0;
3856 disable_inter_references_except_altref(mask->ref_combo);
3857 disable_reference(INTRA_FRAME, mask->ref_combo);
3858 }
3859 }
3860
3861 if (inter_sf->alt_ref_search_fp) {
3862 if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
3863 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
3864 // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3865 // those are past frames
3866 MV_REFERENCE_FRAME start_frame =
3867 inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3868 for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3869 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3870 0) {
3871 // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3872 // to the relative dist of LAST_FRAME.
3873 if (inter_sf->alt_ref_search_fp == 1 &&
3874 (abs(cpi->ref_frame_dist_info
3875 .ref_relative_dist[ref_frame - LAST_FRAME]) >
3876 1.5 * abs(cpi->ref_frame_dist_info
3877 .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3878 continue;
3879 }
3880 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3881 mask->pred_modes[ref_frame] |= INTER_ALL;
3882 }
3883 }
3884 }
3885 }
3886
3887 if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3888 if (x->best_pred_mv_sad[0] < INT_MAX) {
3889 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
3890 const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3891
3892 // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3893 for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3894 ref_frame = prune_ref_list[ref_idx];
3895 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3896 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3897 }
3898 }
3899 }
3900
3901 if (bsize > sf->part_sf.max_intra_bsize) {
3902 disable_reference(INTRA_FRAME, mask->ref_combo);
3903 }
3904
3905 if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3906 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3907 mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3908 mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3909 }
3910 }
3911
3912 mask->pred_modes[INTRA_FRAME] |=
3913 ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3914
3915 // Prune reference frames which are not the closest to the current
3916 // frame and with large pred_mv_sad.
3917 if (inter_sf->prune_single_ref) {
3918 assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
3919 const double prune_threshes[2] = { 1.20, 1.05 };
3920
3921 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3922 const RefFrameDistanceInfo *const ref_frame_dist_info =
3923 &cpi->ref_frame_dist_info;
3924 const int is_closest_ref =
3925 (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
3926 (ref_frame == ref_frame_dist_info->nearest_future_ref);
3927
3928 if (!is_closest_ref) {
3929 const int dir =
3930 (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3931 ? 0
3932 : 1;
3933 if (x->best_pred_mv_sad[dir] < INT_MAX &&
3934 x->pred_mv_sad[ref_frame] >
3935 prune_threshes[inter_sf->prune_single_ref - 1] *
3936 x->best_pred_mv_sad[dir])
3937 mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
3938 }
3939 }
3940 }
3941 }
3942
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3943 static AOM_INLINE void init_neighbor_pred_buf(
3944 const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
3945 int is_hbd) {
3946 if (is_hbd) {
3947 const int len = sizeof(uint16_t);
3948 args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3949 args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3950 (MAX_SB_SQUARE >> 1) * len);
3951 args->above_pred_buf[2] =
3952 CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3953 args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3954 args->left_pred_buf[1] =
3955 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3956 args->left_pred_buf[2] =
3957 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3958 } else {
3959 args->above_pred_buf[0] = obmc_buffer->above_pred;
3960 args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3961 args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3962 args->left_pred_buf[0] = obmc_buffer->left_pred;
3963 args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3964 args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3965 }
3966 }
3967
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3968 static AOM_INLINE int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3969 MV_REFERENCE_FRAME ref_frame) {
3970 const AV1_COMMON *const cm = &cpi->common;
3971 MV_REFERENCE_FRAME rf[2];
3972 av1_set_ref_frame(rf, ref_frame);
3973
3974 if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3975
3976 if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3977 cm->cur_frame->ref_display_order_hint)) {
3978 return 1;
3979 }
3980
3981 return 0;
3982 }
3983
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3984 static AOM_INLINE int is_ref_frame_used_by_compound_ref(
3985 int ref_frame, int skip_ref_frame_mask) {
3986 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3987 if (!(skip_ref_frame_mask & (1 << r))) {
3988 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3989 if (rf[0] == ref_frame || rf[1] == ref_frame) {
3990 return 1;
3991 }
3992 }
3993 }
3994 return 0;
3995 }
3996
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3997 static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3998 const MB_MODE_INFO *mi_cache) {
3999 if (!mi_cache) {
4000 return 0;
4001 }
4002
4003 if (ref_frame < REF_FRAMES) {
4004 return (ref_frame == mi_cache->ref_frame[0] ||
4005 ref_frame == mi_cache->ref_frame[1]);
4006 }
4007
4008 // if we are here, then the current mode is compound.
4009 MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4010 return ref_frame == cached_ref_type;
4011 }
4012
4013 // Please add/modify parameter setting in this function, making it consistent
4014 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])4015 static AOM_INLINE void set_params_rd_pick_inter_mode(
4016 const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4017 BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4018 unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4019 struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4020 const AV1_COMMON *const cm = &cpi->common;
4021 MACROBLOCKD *const xd = &x->e_mbd;
4022 MB_MODE_INFO *const mbmi = xd->mi[0];
4023 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4024 unsigned char segment_id = mbmi->segment_id;
4025
4026 init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4027 av1_collect_neighbors_ref_counts(xd);
4028 estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4029 ref_costs_comp);
4030
4031 const int mi_row = xd->mi_row;
4032 const int mi_col = xd->mi_col;
4033 x->best_pred_mv_sad[0] = INT_MAX;
4034 x->best_pred_mv_sad[1] = INT_MAX;
4035
4036 for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4037 ++ref_frame) {
4038 x->pred_mv_sad[ref_frame] = INT_MAX;
4039 mbmi_ext->mode_context[ref_frame] = 0;
4040 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4041 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4042 // Skip the ref frame if the mask says skip and the ref is not used by
4043 // compound ref.
4044 if (skip_ref_frame_mask & (1 << ref_frame) &&
4045 !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4046 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4047 continue;
4048 }
4049 assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4050 setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4051 }
4052 if (cpi->sf.inter_sf.alt_ref_search_fp ||
4053 cpi->sf.inter_sf.prune_single_ref ||
4054 cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4055 // Store the best pred_mv_sad across all past frames
4056 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4057 0)
4058 x->best_pred_mv_sad[0] =
4059 AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4060 else
4061 // Store the best pred_mv_sad across all future frames
4062 x->best_pred_mv_sad[1] =
4063 AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4064 }
4065 }
4066
4067 if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4068 // No second reference on RT ref set, so no need to initialize
4069 for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4070 ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4071 mbmi_ext->mode_context[ref_frame] = 0;
4072 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4073 const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4074 if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4075 (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4076 continue;
4077 }
4078
4079 if (skip_ref_frame_mask & (1 << ref_frame) &&
4080 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4081 continue;
4082 }
4083 // Ref mv list population is not required, when compound references are
4084 // pruned.
4085 if (prune_ref_frame(cpi, x, ref_frame)) continue;
4086
4087 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4088 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4089 mbmi_ext->mode_context);
4090 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4091 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4092 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4093 }
4094 }
4095
4096 av1_count_overlappable_neighbors(cm, xd);
4097 const FRAME_UPDATE_TYPE update_type =
4098 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4099 int use_actual_frame_probs = 1;
4100 int prune_obmc;
4101 #if CONFIG_FPMT_TEST
4102 use_actual_frame_probs =
4103 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4104 if (!use_actual_frame_probs) {
4105 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4106 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4107 }
4108 #endif
4109 if (use_actual_frame_probs) {
4110 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4111 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4112 }
4113 if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4114 if (check_num_overlappable_neighbors(mbmi) &&
4115 is_motion_variation_allowed_bsize(bsize)) {
4116 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4117 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4118 MAX_SB_SIZE >> 1 };
4119 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4120 MAX_SB_SIZE >> 1 };
4121 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4122 av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4123 dst_width1, dst_height1,
4124 args->above_pred_stride);
4125 av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4126 dst_width2, dst_height2,
4127 args->left_pred_stride);
4128 const int num_planes = av1_num_planes(cm);
4129 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4130 mi_col, 0, num_planes);
4131 calc_target_weighted_pred(
4132 cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4133 args->left_pred_buf[0], args->left_pred_stride[0]);
4134 }
4135 }
4136
4137 init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4138
4139 // Set params for mode evaluation
4140 set_mode_eval_params(cpi, x, MODE_EVAL);
4141
4142 x->comp_rd_stats_idx = 0;
4143
4144 for (int idx = 0; idx < REF_FRAMES; idx++) {
4145 args->best_single_sse_in_refs[idx] = INT32_MAX;
4146 }
4147 }
4148
init_single_inter_mode_search_state(InterModeSearchState * search_state)4149 static AOM_INLINE void init_single_inter_mode_search_state(
4150 InterModeSearchState *search_state) {
4151 for (int dir = 0; dir < 2; ++dir) {
4152 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4153 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4154 SingleInterModeState *state;
4155
4156 state = &search_state->single_state[dir][mode][ref_frame];
4157 state->ref_frame = NONE_FRAME;
4158 state->rd = INT64_MAX;
4159
4160 state = &search_state->single_state_modelled[dir][mode][ref_frame];
4161 state->ref_frame = NONE_FRAME;
4162 state->rd = INT64_MAX;
4163
4164 search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4165 }
4166 }
4167 }
4168
4169 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4170 search_state->best_single_rd[ref_frame] = INT64_MAX;
4171 search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4172 }
4173 av1_zero(search_state->single_state_cnt);
4174 av1_zero(search_state->single_state_modelled_cnt);
4175 }
4176
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)4177 static AOM_INLINE void init_inter_mode_search_state(
4178 InterModeSearchState *search_state, const AV1_COMP *cpi,
4179 const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4180 init_intra_mode_search_state(&search_state->intra_search_state);
4181 av1_invalid_rd_stats(&search_state->best_y_rdcost);
4182
4183 search_state->best_rd = best_rd_so_far;
4184 search_state->best_skip_rd[0] = INT64_MAX;
4185 search_state->best_skip_rd[1] = INT64_MAX;
4186
4187 av1_zero(search_state->best_mbmode);
4188
4189 search_state->best_rate_y = INT_MAX;
4190
4191 search_state->best_rate_uv = INT_MAX;
4192
4193 search_state->best_mode_skippable = 0;
4194
4195 search_state->best_skip2 = 0;
4196
4197 search_state->best_mode_index = THR_INVALID;
4198
4199 const MACROBLOCKD *const xd = &x->e_mbd;
4200 const MB_MODE_INFO *const mbmi = xd->mi[0];
4201 const unsigned char segment_id = mbmi->segment_id;
4202
4203 search_state->num_available_refs = 0;
4204 memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4205 memset(search_state->dist_order_refs, -1,
4206 sizeof(search_state->dist_order_refs));
4207
4208 for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4209 search_state->mode_threshold[i] = 0;
4210 const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4211 for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4212 search_state->mode_threshold[i] =
4213 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4214 RD_THRESH_FAC_FRAC_BITS;
4215
4216 search_state->best_intra_rd = INT64_MAX;
4217
4218 search_state->best_pred_sse = UINT_MAX;
4219
4220 av1_zero(search_state->single_newmv);
4221 av1_zero(search_state->single_newmv_rate);
4222 av1_zero(search_state->single_newmv_valid);
4223 for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4224 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4225 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4226 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4227 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4228 }
4229 }
4230 }
4231
4232 for (int i = 0; i < REFERENCE_MODES; ++i) {
4233 search_state->best_pred_rd[i] = INT64_MAX;
4234 }
4235
4236 if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4237 for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4238 search_state->mode_threshold[i] =
4239 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4240 RD_THRESH_FAC_FRAC_BITS;
4241
4242 for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4243 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4244 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4245 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4246 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4247 }
4248 }
4249 }
4250
4251 init_single_inter_mode_search_state(search_state);
4252 }
4253 }
4254
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)4255 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4256 const MV_REFERENCE_FRAME *ref_frame,
4257 const PREDICTION_MODE this_mode) {
4258 if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4259 return true;
4260 }
4261
4262 return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4263 }
4264
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)4265 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4266 BLOCK_SIZE bsize,
4267 PREDICTION_MODE curr_mode,
4268 const MV_REFERENCE_FRAME *ref_frames) {
4269 const int comp_pred = ref_frames[1] > INTRA_FRAME;
4270 if (comp_pred) {
4271 if (!is_comp_ref_allowed(bsize)) return 1;
4272 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4273 return 1;
4274 }
4275
4276 const AV1_COMMON *const cm = &cpi->common;
4277 if (frame_is_intra_only(cm)) return 1;
4278
4279 const CurrentFrame *const current_frame = &cm->current_frame;
4280 if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4281
4282 const struct segmentation *const seg = &cm->seg;
4283 const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4284 // Do not allow compound prediction if the segment level reference frame
4285 // feature is in use as in this case there can only be one reference.
4286 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4287 }
4288
4289 if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4290 // Mode must be compatible
4291 if (!is_interintra_allowed_bsize(bsize)) return 1;
4292 if (!is_interintra_allowed_mode(curr_mode)) return 1;
4293 }
4294
4295 return 0;
4296 }
4297
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4298 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4299 BLOCK_SIZE bsize, int mib_size) {
4300 const int sb_size_mask = mib_size - 1;
4301 const MACROBLOCKD *const xd = &x->e_mbd;
4302 const int mi_row = xd->mi_row;
4303 const int mi_col = xd->mi_col;
4304 const int mi_row_in_sb = mi_row & sb_size_mask;
4305 const int mi_col_in_sb = mi_col & sb_size_mask;
4306 const int mi_w = mi_size_wide[bsize];
4307 const int mi_h = mi_size_high[bsize];
4308 int picked_ref_frames_mask = 0;
4309 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4310 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4311 picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4312 }
4313 }
4314 return picked_ref_frames_mask;
4315 }
4316
4317 // Check if reference frame pair of the current block matches with the given
4318 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4319 static INLINE int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4320 const MV_REFERENCE_FRAME *ref_frames) {
4321 return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4322 (ref_frames[1] == mbmi->ref_frame[1]));
4323 }
4324
4325 // Case 1: return 0, means don't skip this mode
4326 // Case 2: return 1, means skip this mode completely
4327 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4328 static int inter_mode_search_order_independent_skip(
4329 const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4330 InterModeSearchState *search_state, int skip_ref_frame_mask,
4331 PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4332 if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4333 return 1;
4334 }
4335
4336 const int ref_type = av1_ref_frame_type(ref_frame);
4337 if (!cpi->sf.rt_sf.use_real_time_ref_set)
4338 if (prune_ref_frame(cpi, x, ref_type)) return 1;
4339
4340 // This is only used in motion vector unit test.
4341 if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4342 ref_frame[0] == INTRA_FRAME)
4343 return 1;
4344
4345 const AV1_COMMON *const cm = &cpi->common;
4346 if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4347 return 1;
4348 }
4349
4350 // Reuse the prediction mode in cache
4351 if (x->use_mb_mode_cache) {
4352 const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4353 const PREDICTION_MODE cached_mode = cached_mi->mode;
4354 const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4355 const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4356
4357 // If the cached mode is intra, then we just need to match the mode.
4358 if (is_mode_intra(cached_mode) && mode != cached_mode) {
4359 return 1;
4360 }
4361
4362 // If the cached mode is single inter mode, then we match the mode and
4363 // reference frame.
4364 if (cached_mode_is_single) {
4365 if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4366 return 1;
4367 }
4368 } else {
4369 // If the cached mode is compound, then we need to consider several cases.
4370 const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4371 if (mode_is_single) {
4372 // If the mode is single, we know the modes can't match. But we might
4373 // still want to search it if compound mode depends on the current mode.
4374 int skip_motion_mode_only = 0;
4375 if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4376 skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4377 } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4378 skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4379 } else if (cached_mode == NEW_NEWMV) {
4380 skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4381 ref_frame[0] == cached_frame[1]);
4382 }
4383
4384 return 1 + skip_motion_mode_only;
4385 } else {
4386 // If both modes are compound, then everything must match.
4387 if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4388 ref_frame[1] != cached_frame[1]) {
4389 return 1;
4390 }
4391 }
4392 }
4393 }
4394
4395 const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4396 // If no valid mode has been found so far in PARTITION_NONE when finding a
4397 // valid partition is required, do not skip mode.
4398 if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4399 x->must_find_valid_partition)
4400 return 0;
4401
4402 const SPEED_FEATURES *const sf = &cpi->sf;
4403 // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4404 // frames
4405 if (sf->inter_sf.prune_nearmv_using_neighbors &&
4406 (mode == NEAR_NEARMV || mode == NEARMV)) {
4407 const MACROBLOCKD *const xd = &x->e_mbd;
4408 if (search_state->best_rd != INT64_MAX && xd->left_available &&
4409 xd->up_available) {
4410 const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4411 { 1, 1, 0 },
4412 { 2, 1, 0 } };
4413 const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4414
4415 assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4416 qindex_sub_range < 3);
4417 const int num_ref_frame_pair_match_thresh =
4418 thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4419 [qindex_sub_range];
4420
4421 assert(num_ref_frame_pair_match_thresh <= 2 &&
4422 num_ref_frame_pair_match_thresh >= 0);
4423 int num_ref_frame_pair_match = 0;
4424
4425 num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4426 num_ref_frame_pair_match +=
4427 match_ref_frame_pair(xd->above_mbmi, ref_frame);
4428
4429 // Pruning based on ref frame pair match with neighbors.
4430 if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4431 }
4432 }
4433
4434 int skip_motion_mode = 0;
4435 if (mbmi->partition != PARTITION_NONE) {
4436 int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4437 if (ref_type <= ALTREF_FRAME && skip_ref) {
4438 // Since the compound ref modes depends on the motion estimation result of
4439 // two single ref modes (best mv of single ref modes as the start point),
4440 // if current single ref mode is marked skip, we need to check if it will
4441 // be used in compound ref modes.
4442 if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4443 // Found a not skipped compound ref mode which contains current
4444 // single ref. So this single ref can't be skipped completely
4445 // Just skip its motion mode search, still try its simple
4446 // transition mode.
4447 skip_motion_mode = 1;
4448 skip_ref = 0;
4449 }
4450 }
4451 // If we are reusing the prediction from cache, and the current frame is
4452 // required by the cache, then we cannot prune it.
4453 if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4454 skip_ref = 0;
4455 // If the cache only needs the current reference type for compound
4456 // prediction, then we can skip motion mode search.
4457 skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4458 x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4459 }
4460 if (skip_ref) return 1;
4461 }
4462
4463 if (ref_frame[0] == INTRA_FRAME) {
4464 if (mode != DC_PRED) {
4465 // Disable intra modes other than DC_PRED for blocks with low variance
4466 // Threshold for intra skipping based on source variance
4467 // TODO(debargha): Specialize the threshold for super block sizes
4468 const unsigned int skip_intra_var_thresh = 64;
4469 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4470 x->source_variance < skip_intra_var_thresh)
4471 return 1;
4472 }
4473 }
4474
4475 if (skip_motion_mode) return 2;
4476
4477 return 0;
4478 }
4479
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4480 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4481 const MV_REFERENCE_FRAME *ref_frames,
4482 const AV1_COMMON *cm) {
4483 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4484 mbmi->ref_mv_idx = 0;
4485 mbmi->mode = curr_mode;
4486 mbmi->uv_mode = UV_DC_PRED;
4487 mbmi->ref_frame[0] = ref_frames[0];
4488 mbmi->ref_frame[1] = ref_frames[1];
4489 pmi->palette_size[0] = 0;
4490 pmi->palette_size[1] = 0;
4491 mbmi->filter_intra_mode_info.use_filter_intra = 0;
4492 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4493 mbmi->motion_mode = SIMPLE_TRANSLATION;
4494 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4495 set_default_interp_filters(mbmi, cm->features.interp_filter);
4496 }
4497
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4498 static AOM_INLINE void collect_single_states(MACROBLOCK *x,
4499 InterModeSearchState *search_state,
4500 const MB_MODE_INFO *const mbmi) {
4501 int i, j;
4502 const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4503 const PREDICTION_MODE this_mode = mbmi->mode;
4504 const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4505 const int mode_offset = INTER_OFFSET(this_mode);
4506 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4507
4508 // Simple rd
4509 int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4510 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4511 const int64_t rd =
4512 search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4513 if (rd < simple_rd) simple_rd = rd;
4514 }
4515
4516 // Insertion sort of single_state
4517 const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4518 SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4519 i = search_state->single_state_cnt[dir][mode_offset];
4520 for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4521 state_s[j] = state_s[j - 1];
4522 state_s[j] = this_state_s;
4523 search_state->single_state_cnt[dir][mode_offset]++;
4524
4525 // Modelled rd
4526 int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4527 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4528 const int64_t rd =
4529 search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4530 if (rd < modelled_rd) modelled_rd = rd;
4531 }
4532
4533 // Insertion sort of single_state_modelled
4534 const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4535 SingleInterModeState *state_m =
4536 search_state->single_state_modelled[dir][mode_offset];
4537 i = search_state->single_state_modelled_cnt[dir][mode_offset];
4538 for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4539 state_m[j] = state_m[j - 1];
4540 state_m[j] = this_state_m;
4541 search_state->single_state_modelled_cnt[dir][mode_offset]++;
4542 }
4543
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4544 static AOM_INLINE void analyze_single_states(
4545 const AV1_COMP *cpi, InterModeSearchState *search_state) {
4546 const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4547 assert(prune_level >= 1);
4548 int i, j, dir, mode;
4549
4550 for (dir = 0; dir < 2; ++dir) {
4551 int64_t best_rd;
4552 SingleInterModeState(*state)[FWD_REFS];
4553 const int prune_factor = prune_level >= 2 ? 6 : 5;
4554
4555 // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4556 // reference frames for all the modes (NEARESTMV and NEARMV may not
4557 // have same motion vectors). Always keep the best of each mode
4558 // because it might form the best possible combination with other mode.
4559 state = search_state->single_state[dir];
4560 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4561 state[INTER_OFFSET(GLOBALMV)][0].rd);
4562 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4563 for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4564 if (state[mode][i].rd != INT64_MAX &&
4565 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4566 state[mode][i].valid = 0;
4567 }
4568 }
4569 }
4570
4571 state = search_state->single_state_modelled[dir];
4572 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4573 state[INTER_OFFSET(GLOBALMV)][0].rd);
4574 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4575 for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4576 if (state[mode][i].rd != INT64_MAX &&
4577 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4578 state[mode][i].valid = 0;
4579 }
4580 }
4581 }
4582 }
4583
4584 // Ordering by simple rd first, then by modelled rd
4585 for (dir = 0; dir < 2; ++dir) {
4586 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4587 const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4588 const int state_cnt_m =
4589 search_state->single_state_modelled_cnt[dir][mode];
4590 SingleInterModeState *state_s = search_state->single_state[dir][mode];
4591 SingleInterModeState *state_m =
4592 search_state->single_state_modelled[dir][mode];
4593 int count = 0;
4594 const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4595 for (i = 0; i < state_cnt_s; ++i) {
4596 if (state_s[i].rd == INT64_MAX) break;
4597 if (state_s[i].valid) {
4598 search_state->single_rd_order[dir][mode][count++] =
4599 state_s[i].ref_frame;
4600 }
4601 }
4602 if (count >= max_candidates) continue;
4603
4604 for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4605 if (state_m[i].rd == INT64_MAX) break;
4606 if (!state_m[i].valid) continue;
4607 const int ref_frame = state_m[i].ref_frame;
4608 int match = 0;
4609 // Check if existing already
4610 for (j = 0; j < count; ++j) {
4611 if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4612 match = 1;
4613 break;
4614 }
4615 }
4616 if (match) continue;
4617 // Check if this ref_frame is removed in simple rd
4618 int valid = 1;
4619 for (j = 0; j < state_cnt_s; ++j) {
4620 if (ref_frame == state_s[j].ref_frame) {
4621 valid = state_s[j].valid;
4622 break;
4623 }
4624 }
4625 if (valid) {
4626 search_state->single_rd_order[dir][mode][count++] = ref_frame;
4627 }
4628 }
4629 }
4630 }
4631 }
4632
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4633 static int compound_skip_get_candidates(
4634 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4635 const int dir, const PREDICTION_MODE mode) {
4636 const int mode_offset = INTER_OFFSET(mode);
4637 const SingleInterModeState *state =
4638 search_state->single_state[dir][mode_offset];
4639 const SingleInterModeState *state_modelled =
4640 search_state->single_state_modelled[dir][mode_offset];
4641
4642 int max_candidates = 0;
4643 for (int i = 0; i < FWD_REFS; ++i) {
4644 if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4645 max_candidates++;
4646 }
4647
4648 int candidates = max_candidates;
4649 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4650 candidates = AOMMIN(2, max_candidates);
4651 }
4652 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4653 if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4654 state[0].ref_frame == state_modelled[0].ref_frame)
4655 candidates = 1;
4656 if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4657 }
4658
4659 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4660 // Limit the number of candidates to 1 in each direction for compound
4661 // prediction
4662 candidates = AOMMIN(1, candidates);
4663 }
4664 return candidates;
4665 }
4666
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4667 static int compound_skip_by_single_states(
4668 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4669 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4670 const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4671 const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4672 const int mode[2] = { compound_ref0_mode(this_mode),
4673 compound_ref1_mode(this_mode) };
4674 const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4675 const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4676 refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4677 int ref_searched[2] = { 0, 0 };
4678 int ref_mv_match[2] = { 1, 1 };
4679 int i, j;
4680
4681 for (i = 0; i < 2; ++i) {
4682 const SingleInterModeState *state =
4683 search_state->single_state[mode_dir[i]][mode_offset[i]];
4684 const int state_cnt =
4685 search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4686 for (j = 0; j < state_cnt; ++j) {
4687 if (state[j].ref_frame == refs[i]) {
4688 ref_searched[i] = 1;
4689 break;
4690 }
4691 }
4692 }
4693
4694 const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4695 for (i = 0; i < 2; ++i) {
4696 if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4697 continue;
4698 }
4699 const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4700 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4701 int_mv single_mv;
4702 int_mv comp_mv;
4703 get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4704 &x->mbmi_ext);
4705 get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4706 if (single_mv.as_int != comp_mv.as_int) {
4707 ref_mv_match[i] = 0;
4708 break;
4709 }
4710 }
4711 }
4712
4713 for (i = 0; i < 2; ++i) {
4714 if (!ref_searched[i] || !ref_mv_match[i]) continue;
4715 const int candidates =
4716 compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4717 const MV_REFERENCE_FRAME *ref_order =
4718 search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4719 int match = 0;
4720 for (j = 0; j < candidates; ++j) {
4721 if (refs[i] == ref_order[j]) {
4722 match = 1;
4723 break;
4724 }
4725 }
4726 if (!match) return 1;
4727 }
4728
4729 return 0;
4730 }
4731
4732 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4733 static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
4734 const MV_REFERENCE_FRAME *ref_frames,
4735 int *const is_ref_match) {
4736 if (is_inter_block(mbmi)) {
4737 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4738 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4739 if (has_second_ref(mbmi)) {
4740 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4741 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4742 }
4743 }
4744 }
4745
4746 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4747 static INLINE int compound_skip_using_neighbor_refs(
4748 MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4749 const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4750 // Exclude non-extended compound modes from pruning
4751 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4752 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4753 return 0;
4754
4755 if (prune_ext_comp_using_neighbors >= 3) return 1;
4756
4757 int is_ref_match[2] = { 0 }; // 0 - match for forward refs
4758 // 1 - match for backward refs
4759 // Check if ref frames of this block matches with left neighbor.
4760 if (xd->left_available)
4761 match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4762
4763 // Check if ref frames of this block matches with above neighbor.
4764 if (xd->up_available)
4765 match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4766
4767 // Combine ref frame match with neighbors in forward and backward refs.
4768 const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4769
4770 // Pruning based on ref frame match with neighbors.
4771 if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4772 return 1;
4773 }
4774
4775 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4776 static INLINE void update_best_single_mode(InterModeSearchState *search_state,
4777 const PREDICTION_MODE this_mode,
4778 const MV_REFERENCE_FRAME ref_frame,
4779 int64_t this_rd) {
4780 if (this_rd < search_state->best_single_rd[ref_frame]) {
4781 search_state->best_single_rd[ref_frame] = this_rd;
4782 search_state->best_single_mode[ref_frame] = this_mode;
4783 }
4784 }
4785
4786 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4787 static INLINE int skip_compound_using_best_single_mode_ref(
4788 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4789 const PREDICTION_MODE *best_single_mode,
4790 int prune_comp_using_best_single_mode_ref) {
4791 // Exclude non-extended compound modes from pruning
4792 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4793 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4794 return 0;
4795
4796 assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4797 const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4798 // Get ref frame direction corresponding to NEWMV
4799 // 0 - NEWMV corresponding to forward direction
4800 // 1 - NEWMV corresponding to backward direction
4801 const int newmv_dir = comp_mode_ref0 != NEWMV;
4802
4803 // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4804 // have NEWMV as single mode winner.
4805 // Example: For an extended-compound mode,
4806 // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4807 // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4808 // - Avoid pruning this mode, if best single mode corresponding to ref frame
4809 // ALTREF_FRAME is NEWMV
4810 const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4811 if (single_mode == NEWMV) return 0;
4812
4813 // Avoid pruning the compound mode when best single mode is not available
4814 if (prune_comp_using_best_single_mode_ref == 1)
4815 if (single_mode == MB_MODE_COUNT) return 0;
4816 return 1;
4817 }
4818
compare_int64(const void * a,const void * b)4819 static int compare_int64(const void *a, const void *b) {
4820 int64_t a64 = *((int64_t *)a);
4821 int64_t b64 = *((int64_t *)b);
4822 if (a64 < b64) {
4823 return -1;
4824 } else if (a64 == b64) {
4825 return 0;
4826 } else {
4827 return 1;
4828 }
4829 }
4830
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4831 static INLINE void update_search_state(
4832 InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4833 PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4834 const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4835 THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4836 const MACROBLOCKD *xd = &x->e_mbd;
4837 const MB_MODE_INFO *mbmi = xd->mi[0];
4838 const int skip_ctx = av1_get_skip_txfm_context(xd);
4839 const int skip_txfm =
4840 mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4841 const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4842
4843 search_state->best_rd = new_best_rd_stats->rdcost;
4844 search_state->best_mode_index = new_best_mode;
4845 *best_rd_stats_dst = *new_best_rd_stats;
4846 search_state->best_mbmode = *mbmi;
4847 search_state->best_skip2 = skip_txfm;
4848 search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4849 // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4850 // rate_uv because av1_txfm_search process is replaced by rd estimation.
4851 // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4852 // These two values will be updated when av1_txfm_search is called.
4853 if (txfm_search_done) {
4854 search_state->best_rate_y =
4855 new_best_rd_stats_y->rate +
4856 x->mode_costs.skip_txfm_cost[skip_ctx]
4857 [new_best_rd_stats->skip_txfm || skip_txfm];
4858 search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4859 }
4860 search_state->best_y_rdcost = *new_best_rd_stats_y;
4861 memcpy(ctx->blk_skip, txfm_info->blk_skip,
4862 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4863 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4864 }
4865
4866 // Find the best RD for a reference frame (among single reference modes)
4867 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4868 static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4869 assert(ref_frame_rd[0] == INT64_MAX);
4870 int64_t ref_copy[REF_FRAMES - 1];
4871 memcpy(ref_copy, ref_frame_rd + 1,
4872 sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4873 qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4874
4875 int64_t cutoff = ref_copy[0];
4876 // The cut-off is within 10% of the best.
4877 if (cutoff != INT64_MAX) {
4878 assert(cutoff < INT64_MAX / 200);
4879 cutoff = (110 * cutoff) / 100;
4880 }
4881 ref_frame_rd[0] = cutoff;
4882 }
4883
4884 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4885 static INLINE bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4886 MV_REFERENCE_FRAME frame1,
4887 MV_REFERENCE_FRAME frame2) {
4888 assert(frame2 > 0);
4889 return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4890 ref_frame_rd[frame2] <= ref_frame_rd[0];
4891 }
4892
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4893 static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
4894 const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4895 HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4896 PICK_MODE_CONTEXT *const ctx,
4897 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4898 const motion_mode_best_st_candidate *const best_motion_mode_cands,
4899 int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4900 InterModeSearchState *const search_state, int64_t *yrd) {
4901 const AV1_COMMON *const cm = &cpi->common;
4902 const int num_planes = av1_num_planes(cm);
4903 MACROBLOCKD *const xd = &x->e_mbd;
4904 MB_MODE_INFO *const mbmi = xd->mi[0];
4905 InterModesInfo *const inter_modes_info = x->inter_modes_info;
4906 const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4907
4908 for (int cand = 0; cand < num_best_cand; cand++) {
4909 RD_STATS rd_stats;
4910 RD_STATS rd_stats_y;
4911 RD_STATS rd_stats_uv;
4912 av1_init_rd_stats(&rd_stats);
4913 av1_init_rd_stats(&rd_stats_y);
4914 av1_init_rd_stats(&rd_stats_uv);
4915 int rate_mv;
4916
4917 rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4918 args->skip_motion_mode =
4919 best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4920 *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4921 rd_stats.rate =
4922 best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4923
4924 // Continue if the best candidate is compound.
4925 if (!is_inter_singleref_mode(mbmi->mode)) continue;
4926
4927 x->txfm_search_info.skip_txfm = 0;
4928 struct macroblockd_plane *pd = xd->plane;
4929 const BUFFER_SET orig_dst = {
4930 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4931 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4932 };
4933
4934 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4935 // Initialize motion mode to simple translation
4936 // Calculation of switchable rate depends on it.
4937 mbmi->motion_mode = 0;
4938 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4939 for (int i = 0; i < num_planes; i++) {
4940 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4941 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4942 }
4943
4944 int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4945 search_state->best_skip_rd[1] };
4946 int64_t this_yrd = INT64_MAX;
4947 int64_t ret_value = motion_mode_rd(
4948 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4949 search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4950 do_tx_search, inter_modes_info, 1, &this_yrd);
4951
4952 if (ret_value != INT64_MAX) {
4953 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4954 const THR_MODES mode_enum = get_prediction_mode_idx(
4955 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4956 // Collect mode stats for multiwinner mode processing
4957 store_winner_mode_stats(
4958 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4959 mode_enum, NULL, bsize, rd_stats.rdcost,
4960 cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4961 if (rd_stats.rdcost < search_state->best_rd) {
4962 *yrd = this_yrd;
4963 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4964 &rd_stats_uv, mode_enum, x, do_tx_search);
4965 if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4966 }
4967 }
4968 }
4969 }
4970
4971 /*!\cond */
4972 // Arguments for speed feature pruning of inter mode search
4973 typedef struct {
4974 int *skip_motion_mode;
4975 mode_skip_mask_t *mode_skip_mask;
4976 InterModeSearchState *search_state;
4977 int skip_ref_frame_mask;
4978 int reach_first_comp_mode;
4979 int mode_thresh_mul_fact;
4980 int num_single_modes_processed;
4981 int prune_cpd_using_sr_stats_ready;
4982 } InterModeSFArgs;
4983 /*!\endcond */
4984
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args,int is_low_temp_var)4985 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4986 int64_t *ref_frame_rd, int midx,
4987 InterModeSFArgs *args, int is_low_temp_var) {
4988 const SPEED_FEATURES *const sf = &cpi->sf;
4989 MACROBLOCKD *const xd = &x->e_mbd;
4990 // Get the actual prediction mode we are trying in this iteration
4991 const THR_MODES mode_enum = av1_default_mode_order[midx];
4992 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4993 const PREDICTION_MODE this_mode = mode_def->mode;
4994 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4995 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4996 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4997 const int comp_pred = second_ref_frame > INTRA_FRAME;
4998
4999 if (ref_frame == INTRA_FRAME) return 1;
5000
5001 const FRAME_UPDATE_TYPE update_type =
5002 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5003 if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5004 comp_pred) {
5005 return 1;
5006 }
5007
5008 // This is for real time encoding.
5009 if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5010 this_mode != NEARESTMV)
5011 return 1;
5012
5013 // Check if this mode should be skipped because it is incompatible with the
5014 // current frame
5015 if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5016 return 1;
5017 const int ret = inter_mode_search_order_independent_skip(
5018 cpi, x, args->mode_skip_mask, args->search_state,
5019 args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5020 if (ret == 1) return 1;
5021 *(args->skip_motion_mode) = (ret == 2);
5022
5023 // We've reached the first compound prediction mode, get stats from the
5024 // single reference predictors to help with pruning.
5025 // Disable this pruning logic if interpolation filter search was skipped for
5026 // single prediction modes as it can result in aggressive pruning of compound
5027 // prediction modes due to the absence of modelled_rd populated by
5028 // av1_interpolation_filter_search().
5029 // TODO(Remya): Check the impact of the sf
5030 // 'prune_comp_search_by_single_result' if compound prediction modes are
5031 // enabled in future for REALTIME encode.
5032 if (!sf->interp_sf.skip_interp_filter_search &&
5033 sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5034 args->reach_first_comp_mode == 0) {
5035 analyze_single_states(cpi, args->search_state);
5036 args->reach_first_comp_mode = 1;
5037 }
5038
5039 // Prune aggressively when best mode is skippable.
5040 int mul_fact = args->search_state->best_mode_skippable
5041 ? args->mode_thresh_mul_fact
5042 : (1 << MODE_THRESH_QBITS);
5043 int64_t mode_threshold =
5044 (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5045 MODE_THRESH_QBITS;
5046
5047 if (args->search_state->best_rd < mode_threshold) return 1;
5048
5049 // Skip this compound mode based on the RD results from the single prediction
5050 // modes
5051 if (!sf->interp_sf.skip_interp_filter_search &&
5052 sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5053 if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5054 ref_frame, second_ref_frame, x))
5055 return 1;
5056 }
5057
5058 if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5059 // After we done with single reference modes, find the 2nd best RD
5060 // for a reference frame. Only search compound modes that have a reference
5061 // frame at least as good as the 2nd best.
5062 if (!args->prune_cpd_using_sr_stats_ready &&
5063 args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5064 find_top_ref(ref_frame_rd);
5065 args->prune_cpd_using_sr_stats_ready = 1;
5066 }
5067 if (args->prune_cpd_using_sr_stats_ready &&
5068 !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5069 return 1;
5070 }
5071
5072 // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5073 if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5074 (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5075 return 1;
5076 }
5077
5078 if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5079 if (compound_skip_using_neighbor_refs(
5080 xd, this_mode, ref_frames,
5081 sf->inter_sf.prune_ext_comp_using_neighbors))
5082 return 1;
5083 }
5084
5085 if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5086 if (skip_compound_using_best_single_mode_ref(
5087 this_mode, ref_frames, args->search_state->best_single_mode,
5088 sf->inter_sf.prune_comp_using_best_single_mode_ref))
5089 return 1;
5090 }
5091
5092 if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5093 const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5094 if (skip_nearest_near_mv_using_refmv_weight(
5095 x, this_mode, ref_frame_type,
5096 args->search_state->best_mbmode.mode)) {
5097 // Ensure the mode is pruned only when the current block has obtained a
5098 // valid inter mode.
5099 assert(is_inter_mode(args->search_state->best_mbmode.mode));
5100 return 1;
5101 }
5102 }
5103
5104 if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5105 ref_frame == GOLDEN_FRAME && !comp_pred) {
5106 const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5107 if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5108 args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5109 if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5110 return 1;
5111 }
5112 }
5113
5114 return 0;
5115 }
5116
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)5117 static void record_best_compound(REFERENCE_MODE reference_mode,
5118 RD_STATS *rd_stats, int comp_pred, int rdmult,
5119 InterModeSearchState *search_state,
5120 int compmode_cost) {
5121 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5122
5123 if (reference_mode == REFERENCE_MODE_SELECT) {
5124 single_rate = rd_stats->rate - compmode_cost;
5125 hybrid_rate = rd_stats->rate;
5126 } else {
5127 single_rate = rd_stats->rate;
5128 hybrid_rate = rd_stats->rate + compmode_cost;
5129 }
5130
5131 single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5132 hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5133
5134 if (!comp_pred) {
5135 if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5136 search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5137 } else {
5138 if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5139 search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5140 }
5141 if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5142 search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5143 }
5144
5145 // Does a transform search over a list of the best inter mode candidates.
5146 // This is called if the original mode search computed an RD estimate
5147 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)5148 static void tx_search_best_inter_candidates(
5149 AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5150 int64_t best_rd_so_far, BLOCK_SIZE bsize,
5151 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5152 InterModeSearchState *search_state, RD_STATS *rd_cost,
5153 PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5154 AV1_COMMON *const cm = &cpi->common;
5155 MACROBLOCKD *const xd = &x->e_mbd;
5156 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5157 const ModeCosts *mode_costs = &x->mode_costs;
5158 const int num_planes = av1_num_planes(cm);
5159 const int skip_ctx = av1_get_skip_txfm_context(xd);
5160 MB_MODE_INFO *const mbmi = xd->mi[0];
5161 InterModesInfo *inter_modes_info = x->inter_modes_info;
5162 inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5163 search_state->best_rd = best_rd_so_far;
5164 search_state->best_mode_index = THR_INVALID;
5165 // Initialize best mode stats for winner mode processing
5166 x->winner_mode_count = 0;
5167 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5168 NULL, bsize, best_rd_so_far,
5169 cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5170 inter_modes_info->num =
5171 inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5172 ? inter_modes_info->num
5173 : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5174 const int64_t top_est_rd =
5175 inter_modes_info->num > 0
5176 ? inter_modes_info
5177 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5178 : INT64_MAX;
5179 *yrd = INT64_MAX;
5180 int64_t best_rd_in_this_partition = INT64_MAX;
5181 int num_inter_mode_cands = inter_modes_info->num;
5182 int newmv_mode_evaled = 0;
5183 int max_allowed_cands = INT_MAX;
5184 if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5185 // The bound on the no. of inter mode candidates, beyond which the
5186 // candidates are limited if a newmv mode got evaluated, is set as
5187 // max_allowed_cands + 1.
5188 const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5189 assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5190 max_allowed_cands =
5191 num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5192 }
5193
5194 int num_mode_thresh = INT_MAX;
5195 if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5196 // Bound the no. of transform searches per prediction mode beyond a
5197 // threshold.
5198 const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5199 assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5200 num_mode_thresh =
5201 num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5202 }
5203
5204 int num_tx_cands = 0;
5205 int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5206 // Iterate over best inter mode candidates and perform tx search
5207 for (int j = 0; j < num_inter_mode_cands; ++j) {
5208 const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5209 *mbmi = inter_modes_info->mbmi_arr[data_idx];
5210 const PREDICTION_MODE prediction_mode = mbmi->mode;
5211 int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5212 if (curr_est_rd * 0.80 > top_est_rd) break;
5213
5214 if (num_tx_cands > num_mode_thresh) {
5215 if ((prediction_mode != NEARESTMV &&
5216 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5217 (prediction_mode == NEARESTMV &&
5218 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5219 continue;
5220 }
5221
5222 txfm_info->skip_txfm = 0;
5223 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5224
5225 // Select prediction reference frames.
5226 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5227 for (int i = 0; i < num_planes; i++) {
5228 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5229 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5230 }
5231
5232 bool is_predictor_built = false;
5233
5234 // Initialize RD stats
5235 RD_STATS rd_stats;
5236 RD_STATS rd_stats_y;
5237 RD_STATS rd_stats_uv;
5238 const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5239 int64_t skip_rd = INT64_MAX;
5240 const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5241 cm->seq_params->enable_masked_compound,
5242 cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5243 /*eval_motion_mode=*/0);
5244 if (txfm_rd_gate_level) {
5245 // Check if the mode is good enough based on skip RD
5246 int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5247 skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5248 int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5249 skip_rd, txfm_rd_gate_level, 0);
5250 if (!eval_txfm) continue;
5251 }
5252
5253 // Build the prediction for this mode
5254 if (!is_predictor_built) {
5255 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5256 av1_num_planes(cm) - 1);
5257 }
5258 if (mbmi->motion_mode == OBMC_CAUSAL) {
5259 av1_build_obmc_inter_predictors_sb(cm, xd);
5260 }
5261
5262 num_tx_cands++;
5263 if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5264 num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5265 int64_t this_yrd = INT64_MAX;
5266 // Do the transform search
5267 if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5268 mode_rate, search_state->best_rd)) {
5269 continue;
5270 } else {
5271 const int y_rate =
5272 rd_stats.skip_txfm
5273 ? mode_costs->skip_txfm_cost[skip_ctx][1]
5274 : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5275 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5276
5277 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5278 inter_mode_data_push(
5279 tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5280 rd_stats_y.rate + rd_stats_uv.rate +
5281 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5282 }
5283 }
5284 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5285 if (rd_stats.rdcost < best_rd_in_this_partition) {
5286 best_rd_in_this_partition = rd_stats.rdcost;
5287 *yrd = this_yrd;
5288 }
5289
5290 const THR_MODES mode_enum = get_prediction_mode_idx(
5291 prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5292
5293 // Collect mode stats for multiwinner mode processing
5294 const int txfm_search_done = 1;
5295 store_winner_mode_stats(
5296 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5297 NULL, bsize, rd_stats.rdcost,
5298 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5299
5300 if (rd_stats.rdcost < search_state->best_rd) {
5301 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5302 &rd_stats_uv, mode_enum, x, txfm_search_done);
5303 search_state->best_skip_rd[0] = skip_rd;
5304 // Limit the total number of modes to be evaluated if the first is valid
5305 // and transform skip or compound
5306 if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5307 if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5308 // Evaluate more candidates at high quantizers where occurrence of
5309 // transform skip is high.
5310 const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5311 const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5312 num_inter_mode_cands =
5313 AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5314 } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5315 const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5316 // Evaluate more candidates at low quantizers where occurrence of
5317 // single reference mode is high.
5318 const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5319 { 10, 7, 5, 3 } };
5320 const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5321 num_inter_mode_cands = AOMMIN(
5322 max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5323 }
5324 }
5325 }
5326 // If the number of candidates evaluated exceeds max_allowed_cands, break if
5327 // a newmv mode was evaluated already.
5328 if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5329 }
5330 }
5331
5332 // Indicates number of winner simple translation modes to be used
5333 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5334
5335 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5336 // speed feature. This list consists of modes that have only searched
5337 // SIMPLE_TRANSLATION. The final list will be used to search other motion
5338 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)5339 static void handle_winner_cand(
5340 MB_MODE_INFO *const mbmi,
5341 motion_mode_best_st_candidate *best_motion_mode_cands,
5342 int max_winner_motion_mode_cand, int64_t this_rd,
5343 motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5344 // Number of current motion mode candidates in list
5345 const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5346 int valid_motion_mode_cand_loc = num_motion_mode_cand;
5347
5348 // find the best location to insert new motion mode candidate
5349 for (int j = 0; j < num_motion_mode_cand; j++) {
5350 if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5351 valid_motion_mode_cand_loc = j;
5352 break;
5353 }
5354 }
5355
5356 // Insert motion mode if location is found
5357 if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5358 if (num_motion_mode_cand > 0 &&
5359 valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5360 memmove(
5361 &best_motion_mode_cands
5362 ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5363 &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5364 (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5365 valid_motion_mode_cand_loc) *
5366 sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5367 motion_mode_cand->mbmi = *mbmi;
5368 motion_mode_cand->rd_cost = this_rd;
5369 motion_mode_cand->skip_motion_mode = skip_motion_mode;
5370 best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5371 *motion_mode_cand;
5372 best_motion_mode_cands->num_motion_mode_cand =
5373 AOMMIN(max_winner_motion_mode_cand,
5374 best_motion_mode_cands->num_motion_mode_cand + 1);
5375 }
5376 }
5377
5378 /*!\brief Search intra modes in interframes
5379 *
5380 * \ingroup intra_mode_search
5381 *
5382 * This function searches for the best intra mode when the current frame is an
5383 * interframe. This function however does *not* handle luma palette mode.
5384 * Palette mode is currently handled by \ref av1_search_palette_mode.
5385 *
5386 * This function will first iterate through the luma mode candidates to find the
5387 * best luma intra mode. Once the best luma mode it's found, it will then search
5388 * for the best chroma mode. Because palette mode is currently not handled by
5389 * here, a cache of uv mode is stored in
5390 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5391 * av1_search_palette_mode.
5392 *
5393 * \param[in,out] search_state Struct keep track of the prediction mode
5394 * search state in interframe.
5395 *
5396 * \param[in] cpi Top-level encoder structure.
5397 * \param[in,out] x Pointer to struct holding all the data for
5398 * the current prediction block.
5399 * \param[out] rd_cost Stores the best rd_cost among all the
5400 * prediction modes searched.
5401 * \param[in] bsize Current block size.
5402 * \param[in,out] ctx Structure to hold the number of 4x4 blks to
5403 * copy the tx_type and txfm_skip arrays.
5404 * for only the Y plane.
5405 * \param[in] sf_args Stores the list of intra mode candidates
5406 * to be searched.
5407 * \param[in] intra_ref_frame_cost The entropy cost for signaling that the
5408 * current ref frame is an intra frame.
5409 * \param[in] yrd_threshold The rdcost threshold for luma intra mode to
5410 * terminate chroma intra mode search.
5411 *
5412 * \remark If a new best mode is found, search_state and rd_costs are updated
5413 * correspondingly. While x is also modified, it is only used as a temporary
5414 * buffer, and the final decisions are stored in search_state.
5415 */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,const InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5416 static AOM_INLINE void search_intra_modes_in_interframe(
5417 InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5418 RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5419 const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5420 int64_t yrd_threshold) {
5421 const AV1_COMMON *const cm = &cpi->common;
5422 const SPEED_FEATURES *const sf = &cpi->sf;
5423 const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5424 MACROBLOCKD *const xd = &x->e_mbd;
5425 MB_MODE_INFO *const mbmi = xd->mi[0];
5426 IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5427
5428 int is_best_y_mode_intra = 0;
5429 RD_STATS best_intra_rd_stats_y;
5430 int64_t best_rd_y = INT64_MAX;
5431 int best_mode_cost_y = -1;
5432 MB_MODE_INFO best_mbmi = *xd->mi[0];
5433 THR_MODES best_mode_enum = THR_INVALID;
5434 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5435 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5436 const int num_4x4 = bsize_to_num_blk(bsize);
5437
5438 // Performs luma search
5439 int64_t best_model_rd = INT64_MAX;
5440 int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5441 for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5442 top_intra_model_rd[i] = INT64_MAX;
5443 }
5444 for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5445 if (sf->intra_sf.skip_intra_in_interframe &&
5446 search_state->intra_search_state.skip_intra_modes)
5447 break;
5448 set_y_mode_and_delta_angle(
5449 mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5450 assert(mbmi->mode < INTRA_MODE_END);
5451
5452 // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5453 if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5454 continue;
5455
5456 const THR_MODES mode_enum =
5457 get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5458 if ((!intra_mode_cfg->enable_smooth_intra ||
5459 cpi->sf.intra_sf.disable_smooth_intra) &&
5460 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5461 mbmi->mode == SMOOTH_V_PRED))
5462 continue;
5463 if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5464 continue;
5465 if (av1_is_directional_mode(mbmi->mode) &&
5466 !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5467 mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5468 continue;
5469 const PREDICTION_MODE this_mode = mbmi->mode;
5470
5471 assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5472 assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5473 init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5474 x->txfm_search_info.skip_txfm = 0;
5475
5476 if (this_mode != DC_PRED) {
5477 // Only search the oblique modes if the best so far is
5478 // one of the neighboring directional modes
5479 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5480 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5481 if (search_state->best_mode_index != THR_INVALID &&
5482 search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5483 continue;
5484 }
5485 if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5486 if (conditional_skipintra(
5487 this_mode, search_state->intra_search_state.best_intra_mode))
5488 continue;
5489 }
5490 }
5491
5492 RD_STATS intra_rd_stats_y;
5493 int mode_cost_y;
5494 int64_t intra_rd_y = INT64_MAX;
5495 const int is_luma_result_valid = av1_handle_intra_y_mode(
5496 intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5497 &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5498 &best_model_rd, top_intra_model_rd);
5499 if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5500 is_best_y_mode_intra = 1;
5501 if (intra_rd_y < best_rd_y) {
5502 best_intra_rd_stats_y = intra_rd_stats_y;
5503 best_mode_cost_y = mode_cost_y;
5504 best_rd_y = intra_rd_y;
5505 best_mbmi = *mbmi;
5506 best_mode_enum = mode_enum;
5507 memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5508 sizeof(best_blk_skip[0]) * num_4x4);
5509 av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5510 }
5511 }
5512 }
5513
5514 if (!is_best_y_mode_intra) {
5515 return;
5516 }
5517
5518 assert(best_rd_y < INT64_MAX);
5519
5520 // Restores the best luma mode
5521 *mbmi = best_mbmi;
5522 memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5523 sizeof(best_blk_skip[0]) * num_4x4);
5524 av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5525
5526 // Performs chroma search
5527 RD_STATS intra_rd_stats, intra_rd_stats_uv;
5528 av1_init_rd_stats(&intra_rd_stats);
5529 av1_init_rd_stats(&intra_rd_stats_uv);
5530 const int num_planes = av1_num_planes(cm);
5531 if (num_planes > 1) {
5532 const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5533 intra_search_state, cpi, x, bsize, &intra_rd_stats,
5534 &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5535
5536 if (!intra_uv_mode_valid) {
5537 return;
5538 }
5539 }
5540
5541 // Merge the luma and chroma rd stats
5542 assert(best_mode_cost_y >= 0);
5543 intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5544 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5545 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5546 // in the tokenonly rate, but for intra blocks, tx_size is always coded
5547 // (prediction granularity), so we account for it in the full rate,
5548 // not the tokenonly rate.
5549 best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5550 }
5551
5552 const ModeCosts *mode_costs = &x->mode_costs;
5553 const PREDICTION_MODE mode = mbmi->mode;
5554 if (num_planes > 1 && xd->is_chroma_ref) {
5555 const int uv_mode_cost =
5556 mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5557 intra_rd_stats.rate +=
5558 intra_rd_stats_uv.rate +
5559 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5560 }
5561
5562 // Intra block is always coded as non-skip
5563 intra_rd_stats.skip_txfm = 0;
5564 intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5565 // Add in the cost of the no skip flag.
5566 const int skip_ctx = av1_get_skip_txfm_context(xd);
5567 intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5568 // Calculate the final RD estimate for this mode.
5569 const int64_t this_rd =
5570 RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5571 // Keep record of best intra rd
5572 if (this_rd < search_state->best_intra_rd) {
5573 search_state->best_intra_rd = this_rd;
5574 intra_search_state->best_intra_mode = mode;
5575 }
5576
5577 for (int i = 0; i < REFERENCE_MODES; ++i) {
5578 search_state->best_pred_rd[i] =
5579 AOMMIN(search_state->best_pred_rd[i], this_rd);
5580 }
5581
5582 intra_rd_stats.rdcost = this_rd;
5583
5584 // Collect mode stats for multiwinner mode processing
5585 const int txfm_search_done = 1;
5586 store_winner_mode_stats(
5587 &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5588 &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5589 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5590 if (intra_rd_stats.rdcost < search_state->best_rd) {
5591 update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5592 &best_intra_rd_stats_y, &intra_rd_stats_uv,
5593 best_mode_enum, x, txfm_search_done);
5594 }
5595 }
5596
5597 #if !CONFIG_REALTIME_ONLY
5598 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5599 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5600 static AOM_INLINE void calculate_cost_from_tpl_data(
5601 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
5602 int mi_col, int64_t *inter_cost, int64_t *intra_cost) {
5603 const AV1_COMMON *const cm = &cpi->common;
5604 // Only consider full SB.
5605 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5606 const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5607 const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5608 (block_size_high[sb_size] / tpl_bsize_1d);
5609 SuperBlockEnc *sb_enc = &x->sb_enc;
5610 if (sb_enc->tpl_data_count == len) {
5611 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5612 const int tpl_stride = sb_enc->tpl_stride;
5613 const int tplw = mi_size_wide[tpl_bsize];
5614 const int tplh = mi_size_high[tpl_bsize];
5615 const int nw = mi_size_wide[bsize] / tplw;
5616 const int nh = mi_size_high[bsize] / tplh;
5617 if (nw >= 1 && nh >= 1) {
5618 const int of_h = mi_row % mi_size_high[sb_size];
5619 const int of_w = mi_col % mi_size_wide[sb_size];
5620 const int start = of_h / tplh * tpl_stride + of_w / tplw;
5621
5622 for (int k = 0; k < nh; k++) {
5623 for (int l = 0; l < nw; l++) {
5624 *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5625 *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5626 }
5627 }
5628 *inter_cost /= nw * nh;
5629 *intra_cost /= nw * nh;
5630 }
5631 }
5632 }
5633 #endif // !CONFIG_REALTIME_ONLY
5634
5635 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5636 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,const SPEED_FEATURES * const sf,int64_t inter_cost,int64_t intra_cost)5637 static AOM_INLINE void skip_intra_modes_in_interframe(
5638 AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5639 InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5640 int64_t inter_cost, int64_t intra_cost) {
5641 MACROBLOCKD *const xd = &x->e_mbd;
5642 const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5643 if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5644 bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5645 const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5646 const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5647 if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5648 x->source_variance > 128) {
5649 search_state->intra_search_state.skip_intra_modes = 1;
5650 return;
5651 }
5652 }
5653
5654 const unsigned int src_var_thresh_intra_skip = 1;
5655 const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5656 if (!(skip_intra_in_interframe &&
5657 (x->source_variance > src_var_thresh_intra_skip)))
5658 return;
5659
5660 // Prune intra search based on best inter mode being transfrom skip.
5661 if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5662 const int qindex_thresh[2] = { 200, MAXQ };
5663 const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5664 if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5665 (x->qindex <= qindex_thresh[ind])) {
5666 search_state->intra_search_state.skip_intra_modes = 1;
5667 return;
5668 } else if ((skip_intra_in_interframe >= 4) &&
5669 (inter_cost < 0 || intra_cost < 0)) {
5670 search_state->intra_search_state.skip_intra_modes = 1;
5671 return;
5672 }
5673 }
5674 // Use ML model to prune intra search.
5675 if (inter_cost >= 0 && intra_cost >= 0) {
5676 const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5677 ? &av1_intrap_nn_config
5678 : &av1_intrap_hd_nn_config;
5679 float nn_features[6];
5680 float scores[2] = { 0.0f };
5681
5682 nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5683 nn_features[1] = (float)mi_size_wide_log2[bsize];
5684 nn_features[2] = (float)mi_size_high_log2[bsize];
5685 nn_features[3] = (float)intra_cost;
5686 nn_features[4] = (float)inter_cost;
5687 const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5688 const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5689 nn_features[5] = (float)(ac_q_max / ac_q);
5690
5691 av1_nn_predict(nn_features, nn_config, 1, scores);
5692
5693 // For two parameters, the max prob returned from av1_nn_softmax equals
5694 // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5695 // calling of av1_nn_softmax.
5696 const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5697 assert(skip_intra_in_interframe <= 5);
5698 if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5699 search_state->intra_search_state.skip_intra_modes = 1;
5700 }
5701 }
5702 }
5703
skip_interp_filter_search(const AV1_COMP * cpi,int is_single_pred)5704 static AOM_INLINE bool skip_interp_filter_search(const AV1_COMP *cpi,
5705 int is_single_pred) {
5706 const MODE encoding_mode = cpi->oxcf.mode;
5707 if (encoding_mode == REALTIME) {
5708 return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
5709 (cpi->sf.interp_sf.skip_interp_filter_search ||
5710 cpi->sf.winner_mode_sf.winner_mode_ifs));
5711 } else if (encoding_mode == GOOD) {
5712 // Skip interpolation filter search for single prediction modes.
5713 return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
5714 }
5715 return false;
5716 }
5717
get_block_temp_var(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)5718 static AOM_INLINE int get_block_temp_var(const AV1_COMP *cpi,
5719 const MACROBLOCK *x,
5720 BLOCK_SIZE bsize) {
5721 const AV1_COMMON *const cm = &cpi->common;
5722 const SPEED_FEATURES *const sf = &cpi->sf;
5723
5724 if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5725 !sf->rt_sf.short_circuit_low_temp_var ||
5726 !sf->rt_sf.prune_inter_modes_using_temp_var) {
5727 return 0;
5728 }
5729
5730 const int mi_row = x->e_mbd.mi_row;
5731 const int mi_col = x->e_mbd.mi_col;
5732 int is_low_temp_var = 0;
5733
5734 if (cm->seq_params->sb_size == BLOCK_64X64)
5735 is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5736 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5737 else
5738 is_low_temp_var = av1_get_force_skip_low_temp_var(
5739 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5740
5741 return is_low_temp_var;
5742 }
5743
5744 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5745 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5746 struct macroblock *x, struct RD_STATS *rd_cost,
5747 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5748 int64_t best_rd_so_far) {
5749 AV1_COMMON *const cm = &cpi->common;
5750 const FeatureFlags *const features = &cm->features;
5751 const int num_planes = av1_num_planes(cm);
5752 const SPEED_FEATURES *const sf = &cpi->sf;
5753 MACROBLOCKD *const xd = &x->e_mbd;
5754 MB_MODE_INFO *const mbmi = xd->mi[0];
5755 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5756 int i;
5757 const ModeCosts *mode_costs = &x->mode_costs;
5758 const int *comp_inter_cost =
5759 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5760
5761 InterModeSearchState search_state;
5762 init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5763 INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5764 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5765 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5766 };
5767 HandleInterModeArgs args = { { NULL },
5768 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5769 { NULL },
5770 { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5771 MAX_SB_SIZE >> 1 },
5772 NULL,
5773 NULL,
5774 NULL,
5775 search_state.modelled_rd,
5776 INT_MAX,
5777 INT_MAX,
5778 search_state.simple_rd,
5779 0,
5780 false,
5781 interintra_modes,
5782 { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5783 { { 0, 0 } },
5784 { 0 },
5785 0,
5786 0,
5787 -1,
5788 -1,
5789 -1,
5790 { 0 },
5791 { 0 },
5792 UINT_MAX };
5793 // Currently, is_low_temp_var is used in real time encoding.
5794 const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5795
5796 for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5797 // Indicates the appropriate number of simple translation winner modes for
5798 // exhaustive motion mode evaluation
5799 const int max_winner_motion_mode_cand =
5800 num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
5801 assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5802 motion_mode_candidate motion_mode_cand;
5803 motion_mode_best_st_candidate best_motion_mode_cands;
5804 // Initializing the number of motion mode candidates to zero.
5805 best_motion_mode_cands.num_motion_mode_cand = 0;
5806 for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5807 best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5808
5809 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5810
5811 av1_invalid_rd_stats(rd_cost);
5812
5813 for (i = 0; i < REF_FRAMES; ++i) {
5814 x->warp_sample_info[i].num = -1;
5815 }
5816
5817 // Ref frames that are selected by square partition blocks.
5818 int picked_ref_frames_mask = 0;
5819 if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
5820 mbmi->partition != PARTITION_NONE) {
5821 // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5822 // partition blocks. prune_ref_frame_for_rect_partitions >=2
5823 // implies prune for vert, horiz and extended partition blocks.
5824 if ((mbmi->partition != PARTITION_VERT &&
5825 mbmi->partition != PARTITION_HORZ) ||
5826 sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5827 picked_ref_frames_mask =
5828 fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5829 }
5830 }
5831
5832 #if CONFIG_COLLECT_COMPONENT_TIMING
5833 start_timing(cpi, set_params_rd_pick_inter_mode_time);
5834 #endif
5835 // Skip ref frames that never selected by square blocks.
5836 const int skip_ref_frame_mask =
5837 picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5838 mode_skip_mask_t mode_skip_mask;
5839 unsigned int ref_costs_single[REF_FRAMES];
5840 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5841 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5842 // init params, set frame modes, speed features
5843 set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5844 skip_ref_frame_mask, ref_costs_single,
5845 ref_costs_comp, yv12_mb);
5846 #if CONFIG_COLLECT_COMPONENT_TIMING
5847 end_timing(cpi, set_params_rd_pick_inter_mode_time);
5848 #endif
5849
5850 int64_t best_est_rd = INT64_MAX;
5851 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5852 // If do_tx_search is 0, only estimated RD should be computed.
5853 // If do_tx_search is 1, all modes have TX search performed.
5854 const int do_tx_search =
5855 !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5856 (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
5857 num_pels_log2_lookup[bsize] > 8));
5858 InterModesInfo *inter_modes_info = x->inter_modes_info;
5859 inter_modes_info->num = 0;
5860
5861 // Temporary buffers used by handle_inter_mode().
5862 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5863
5864 // The best RD found for the reference frame, among single reference modes.
5865 // Note that the 0-th element will contain a cut-off that is later used
5866 // to determine if we should skip a compound mode.
5867 int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5868 INT64_MAX, INT64_MAX, INT64_MAX,
5869 INT64_MAX, INT64_MAX };
5870
5871 // Prepared stats used later to check if we could skip intra mode eval.
5872 int64_t inter_cost = -1;
5873 int64_t intra_cost = -1;
5874 // Need to tweak the threshold for hdres speed 0 & 1.
5875 const int mi_row = xd->mi_row;
5876 const int mi_col = xd->mi_col;
5877
5878 // Obtain the relevant tpl stats for pruning inter modes
5879 PruneInfoFromTpl inter_cost_info_from_tpl;
5880 #if !CONFIG_REALTIME_ONLY
5881 if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
5882 // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5883 // prune_ref_by_selective_ref_frame()
5884 // x->tpl_keep_ref_frame[id] = 0 => ref frame can be pruned in
5885 // prune_ref_by_selective_ref_frame()
5886 // Populating valid_refs[idx] = 1 ensures that
5887 // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5888 // pruned ref frame.
5889 int valid_refs[INTER_REFS_PER_FRAME];
5890 for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5891 const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5892 valid_refs[frame - 1] =
5893 x->tpl_keep_ref_frame[frame] ||
5894 !prune_ref_by_selective_ref_frame(
5895 cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5896 }
5897 av1_zero(inter_cost_info_from_tpl);
5898 get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5899 &inter_cost_info_from_tpl);
5900 }
5901
5902 const int do_pruning =
5903 (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5904 if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5905 cpi->oxcf.algo_cfg.enable_tpl_model)
5906 calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5907 &intra_cost);
5908 #endif // !CONFIG_REALTIME_ONLY
5909
5910 // Initialize best mode stats for winner mode processing.
5911 const int max_winner_mode_count =
5912 winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
5913 zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
5914 x->winner_mode_count = 0;
5915 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5916 NULL, bsize, best_rd_so_far,
5917 sf->winner_mode_sf.multi_winner_mode_type, 0);
5918
5919 int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5920 if (sf->inter_sf.prune_inter_modes_if_skippable) {
5921 // Higher multiplication factor values for lower quantizers.
5922 mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5923 }
5924
5925 // Initialize arguments for mode loop speed features
5926 InterModeSFArgs sf_args = { &args.skip_motion_mode,
5927 &mode_skip_mask,
5928 &search_state,
5929 skip_ref_frame_mask,
5930 0,
5931 mode_thresh_mul_fact,
5932 0,
5933 0 };
5934 int64_t best_inter_yrd = INT64_MAX;
5935
5936 // This is the main loop of this function. It loops over all possible inter
5937 // modes and calls handle_inter_mode() to compute the RD for each.
5938 // Here midx is just an iterator index that should not be used by itself
5939 // except to keep track of the number of modes searched. It should be used
5940 // with av1_default_mode_order to get the enum that defines the mode, which
5941 // can be used with av1_mode_defs to get the prediction mode and the ref
5942 // frames.
5943 // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5944 // good speedup for real time case. If we decide to use compound mode in real
5945 // time, maybe we can modify av1_default_mode_order table.
5946 THR_MODES mode_start = THR_INTER_MODE_START;
5947 THR_MODES mode_end = THR_INTER_MODE_END;
5948 const CurrentFrame *const current_frame = &cm->current_frame;
5949 if (current_frame->reference_mode == SINGLE_REFERENCE) {
5950 mode_start = SINGLE_REF_MODE_START;
5951 mode_end = SINGLE_REF_MODE_END;
5952 }
5953
5954 for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5955 // Get the actual prediction mode we are trying in this iteration
5956 const THR_MODES mode_enum = av1_default_mode_order[midx];
5957 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5958 const PREDICTION_MODE this_mode = mode_def->mode;
5959 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5960
5961 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5962 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5963 const int is_single_pred =
5964 ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5965 const int comp_pred = second_ref_frame > INTRA_FRAME;
5966
5967 init_mbmi(mbmi, this_mode, ref_frames, cm);
5968
5969 txfm_info->skip_txfm = 0;
5970 sf_args.num_single_modes_processed += is_single_pred;
5971 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5972 #if CONFIG_COLLECT_COMPONENT_TIMING
5973 start_timing(cpi, skip_inter_mode_time);
5974 #endif
5975 // Apply speed features to decide if this inter mode can be skipped
5976 const int is_skip_inter_mode = skip_inter_mode(
5977 cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5978 #if CONFIG_COLLECT_COMPONENT_TIMING
5979 end_timing(cpi, skip_inter_mode_time);
5980 #endif
5981 if (is_skip_inter_mode) continue;
5982
5983 // Select prediction reference frames.
5984 for (i = 0; i < num_planes; i++) {
5985 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5986 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5987 }
5988
5989 mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5990 mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5991 mbmi->filter_intra_mode_info.use_filter_intra = 0;
5992 mbmi->ref_mv_idx = 0;
5993
5994 const int64_t ref_best_rd = search_state.best_rd;
5995 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5996 av1_init_rd_stats(&rd_stats);
5997
5998 const int ref_frame_cost = comp_pred
5999 ? ref_costs_comp[ref_frame][second_ref_frame]
6000 : ref_costs_single[ref_frame];
6001 const int compmode_cost =
6002 is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6003 const int real_compmode_cost =
6004 cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6005 ? compmode_cost
6006 : 0;
6007 // Point to variables that are maintained between loop iterations
6008 args.single_newmv = search_state.single_newmv;
6009 args.single_newmv_rate = search_state.single_newmv_rate;
6010 args.single_newmv_valid = search_state.single_newmv_valid;
6011 args.single_comp_cost = real_compmode_cost;
6012 args.ref_frame_cost = ref_frame_cost;
6013 args.best_pred_sse = search_state.best_pred_sse;
6014 args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6015
6016 int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6017 search_state.best_skip_rd[1] };
6018 int64_t this_yrd = INT64_MAX;
6019 #if CONFIG_COLLECT_COMPONENT_TIMING
6020 start_timing(cpi, handle_inter_mode_time);
6021 #endif
6022 int64_t this_rd = handle_inter_mode(
6023 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6024 ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6025 inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6026 &this_yrd);
6027 #if CONFIG_COLLECT_COMPONENT_TIMING
6028 end_timing(cpi, handle_inter_mode_time);
6029 #endif
6030 if (current_frame->reference_mode != SINGLE_REFERENCE) {
6031 if (!args.skip_ifs &&
6032 sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6033 is_inter_singleref_mode(this_mode)) {
6034 collect_single_states(x, &search_state, mbmi);
6035 }
6036
6037 if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6038 is_inter_singleref_mode(this_mode))
6039 update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6040 }
6041
6042 if (this_rd == INT64_MAX) continue;
6043
6044 if (mbmi->skip_txfm) {
6045 rd_stats_y.rate = 0;
6046 rd_stats_uv.rate = 0;
6047 }
6048
6049 if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6050 this_rd < ref_frame_rd[ref_frame]) {
6051 ref_frame_rd[ref_frame] = this_rd;
6052 }
6053
6054 // Did this mode help, i.e., is it the new best mode
6055 if (this_rd < search_state.best_rd) {
6056 assert(IMPLIES(comp_pred,
6057 cm->current_frame.reference_mode != SINGLE_REFERENCE));
6058 search_state.best_pred_sse = x->pred_sse[ref_frame];
6059 best_inter_yrd = this_yrd;
6060 update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6061 &rd_stats_uv, mode_enum, x, do_tx_search);
6062 if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6063 // skip_rd[0] is the best total rd for a skip mode so far.
6064 // skip_rd[1] is the best total rd for a skip mode so far in luma.
6065 // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6066 // When do_tx_search = 0, skip_rd[1] is updated.
6067 search_state.best_skip_rd[1] = skip_rd[1];
6068 }
6069 if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6070 // Add this mode to motion mode candidate list for motion mode search
6071 // if using motion_mode_for_winner_cand speed feature
6072 handle_winner_cand(mbmi, &best_motion_mode_cands,
6073 max_winner_motion_mode_cand, this_rd,
6074 &motion_mode_cand, args.skip_motion_mode);
6075 }
6076
6077 /* keep record of best compound/single-only prediction */
6078 record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6079 x->rdmult, &search_state, compmode_cost);
6080 }
6081
6082 #if CONFIG_COLLECT_COMPONENT_TIMING
6083 start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6084 #endif
6085 if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6086 // For the single ref winner candidates, evaluate other motion modes (non
6087 // simple translation).
6088 evaluate_motion_mode_for_winner_candidates(
6089 cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6090 &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6091 &search_state, &best_inter_yrd);
6092 }
6093 #if CONFIG_COLLECT_COMPONENT_TIMING
6094 end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6095 #endif
6096
6097 #if CONFIG_COLLECT_COMPONENT_TIMING
6098 start_timing(cpi, do_tx_search_time);
6099 #endif
6100 if (do_tx_search != 1) {
6101 // A full tx search has not yet been done, do tx search for
6102 // top mode candidates
6103 tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6104 yv12_mb, mi_row, mi_col, &search_state,
6105 rd_cost, ctx, &best_inter_yrd);
6106 }
6107 #if CONFIG_COLLECT_COMPONENT_TIMING
6108 end_timing(cpi, do_tx_search_time);
6109 #endif
6110
6111 #if CONFIG_COLLECT_COMPONENT_TIMING
6112 start_timing(cpi, handle_intra_mode_time);
6113 #endif
6114 // Gate intra mode evaluation if best of inter is skip except when source
6115 // variance is extremely low and also based on max intra bsize.
6116 skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6117 intra_cost);
6118
6119 const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6120 search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6121 &sf_args, intra_ref_frame_cost,
6122 best_inter_yrd);
6123 #if CONFIG_COLLECT_COMPONENT_TIMING
6124 end_timing(cpi, handle_intra_mode_time);
6125 #endif
6126
6127 #if CONFIG_COLLECT_COMPONENT_TIMING
6128 start_timing(cpi, refine_winner_mode_tx_time);
6129 #endif
6130 int winner_mode_count =
6131 sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6132 // In effect only when fast tx search speed features are enabled.
6133 refine_winner_mode_tx(
6134 cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6135 &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6136 search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6137 #if CONFIG_COLLECT_COMPONENT_TIMING
6138 end_timing(cpi, refine_winner_mode_tx_time);
6139 #endif
6140
6141 // Initialize default mode evaluation params
6142 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6143
6144 // Only try palette mode when the best mode so far is an intra mode.
6145 const int try_palette =
6146 cpi->oxcf.tool_cfg.enable_palette &&
6147 av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6148 !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6149 RD_STATS this_rd_cost;
6150 int this_skippable = 0;
6151 if (try_palette) {
6152 #if CONFIG_COLLECT_COMPONENT_TIMING
6153 start_timing(cpi, av1_search_palette_mode_time);
6154 #endif
6155 this_skippable = av1_search_palette_mode(
6156 &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6157 ctx, &this_rd_cost, search_state.best_rd);
6158 #if CONFIG_COLLECT_COMPONENT_TIMING
6159 end_timing(cpi, av1_search_palette_mode_time);
6160 #endif
6161 if (this_rd_cost.rdcost < search_state.best_rd) {
6162 search_state.best_mode_index = THR_DC;
6163 mbmi->mv[0].as_int = 0;
6164 rd_cost->rate = this_rd_cost.rate;
6165 rd_cost->dist = this_rd_cost.dist;
6166 rd_cost->rdcost = this_rd_cost.rdcost;
6167 search_state.best_rd = rd_cost->rdcost;
6168 search_state.best_mbmode = *mbmi;
6169 search_state.best_skip2 = 0;
6170 search_state.best_mode_skippable = this_skippable;
6171 memcpy(ctx->blk_skip, txfm_info->blk_skip,
6172 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6173 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6174 }
6175 }
6176
6177 search_state.best_mbmode.skip_mode = 0;
6178 if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6179 is_comp_ref_allowed(bsize)) {
6180 const struct segmentation *const seg = &cm->seg;
6181 unsigned char segment_id = mbmi->segment_id;
6182 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6183 rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6184 }
6185 }
6186
6187 // Make sure that the ref_mv_idx is only nonzero when we're
6188 // using a mode which can support ref_mv_idx
6189 if (search_state.best_mbmode.ref_mv_idx != 0 &&
6190 !(search_state.best_mbmode.mode == NEWMV ||
6191 search_state.best_mbmode.mode == NEW_NEWMV ||
6192 have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6193 search_state.best_mbmode.ref_mv_idx = 0;
6194 }
6195
6196 if (search_state.best_mode_index == THR_INVALID ||
6197 search_state.best_rd >= best_rd_so_far) {
6198 rd_cost->rate = INT_MAX;
6199 rd_cost->rdcost = INT64_MAX;
6200 return;
6201 }
6202
6203 const InterpFilter interp_filter = features->interp_filter;
6204 assert((interp_filter == SWITCHABLE) ||
6205 (interp_filter ==
6206 search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6207 !is_inter_block(&search_state.best_mbmode));
6208 assert((interp_filter == SWITCHABLE) ||
6209 (interp_filter ==
6210 search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6211 !is_inter_block(&search_state.best_mbmode));
6212
6213 if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6214 av1_update_rd_thresh_fact(
6215 cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6216 search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6217 }
6218
6219 // macroblock modes
6220 *mbmi = search_state.best_mbmode;
6221 txfm_info->skip_txfm |= search_state.best_skip2;
6222
6223 // Note: this section is needed since the mode may have been forced to
6224 // GLOBALMV by the all-zero mode handling of ref-mv.
6225 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6226 // Correct the interp filters for GLOBALMV
6227 if (is_nontrans_global_motion(xd, xd->mi[0])) {
6228 int_interpfilters filters =
6229 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6230 assert(mbmi->interp_filters.as_int == filters.as_int);
6231 (void)filters;
6232 }
6233 }
6234
6235 txfm_info->skip_txfm |= search_state.best_mode_skippable;
6236
6237 assert(search_state.best_mode_index != THR_INVALID);
6238
6239 #if CONFIG_INTERNAL_STATS
6240 store_coding_context(x, ctx, search_state.best_mode_index,
6241 search_state.best_mode_skippable);
6242 #else
6243 store_coding_context(x, ctx, search_state.best_mode_skippable);
6244 #endif // CONFIG_INTERNAL_STATS
6245
6246 if (mbmi->palette_mode_info.palette_size[1] > 0) {
6247 assert(try_palette);
6248 av1_restore_uv_color_map(cpi, x);
6249 }
6250 }
6251
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)6252 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6253 TileDataEnc *tile_data, MACROBLOCK *x,
6254 int mi_row, int mi_col,
6255 RD_STATS *rd_cost, BLOCK_SIZE bsize,
6256 PICK_MODE_CONTEXT *ctx,
6257 int64_t best_rd_so_far) {
6258 const AV1_COMMON *const cm = &cpi->common;
6259 const FeatureFlags *const features = &cm->features;
6260 MACROBLOCKD *const xd = &x->e_mbd;
6261 MB_MODE_INFO *const mbmi = xd->mi[0];
6262 unsigned char segment_id = mbmi->segment_id;
6263 const int comp_pred = 0;
6264 int i;
6265 unsigned int ref_costs_single[REF_FRAMES];
6266 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6267 const ModeCosts *mode_costs = &x->mode_costs;
6268 const int *comp_inter_cost =
6269 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6270 InterpFilter best_filter = SWITCHABLE;
6271 int64_t this_rd = INT64_MAX;
6272 int rate2 = 0;
6273 const int64_t distortion2 = 0;
6274 (void)mi_row;
6275 (void)mi_col;
6276 (void)tile_data;
6277
6278 av1_collect_neighbors_ref_counts(xd);
6279
6280 estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6281 ref_costs_comp);
6282
6283 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6284 for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6285
6286 rd_cost->rate = INT_MAX;
6287
6288 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6289
6290 mbmi->palette_mode_info.palette_size[0] = 0;
6291 mbmi->palette_mode_info.palette_size[1] = 0;
6292 mbmi->filter_intra_mode_info.use_filter_intra = 0;
6293 mbmi->mode = GLOBALMV;
6294 mbmi->motion_mode = SIMPLE_TRANSLATION;
6295 mbmi->uv_mode = UV_DC_PRED;
6296 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6297 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6298 else
6299 mbmi->ref_frame[0] = LAST_FRAME;
6300 mbmi->ref_frame[1] = NONE_FRAME;
6301 mbmi->mv[0].as_int =
6302 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6303 features->allow_high_precision_mv, bsize, mi_col,
6304 mi_row, features->cur_frame_force_integer_mv)
6305 .as_int;
6306 mbmi->tx_size = max_txsize_lookup[bsize];
6307 x->txfm_search_info.skip_txfm = 1;
6308
6309 mbmi->ref_mv_idx = 0;
6310
6311 mbmi->motion_mode = SIMPLE_TRANSLATION;
6312 av1_count_overlappable_neighbors(cm, xd);
6313 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6314 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6315 mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6316 // Select the samples according to motion vector difference
6317 if (mbmi->num_proj_ref > 1) {
6318 mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6319 mbmi->num_proj_ref, bsize);
6320 }
6321 }
6322
6323 const InterpFilter interp_filter = features->interp_filter;
6324 set_default_interp_filters(mbmi, interp_filter);
6325
6326 if (interp_filter != SWITCHABLE) {
6327 best_filter = interp_filter;
6328 } else {
6329 best_filter = EIGHTTAP_REGULAR;
6330 if (av1_is_interp_needed(xd)) {
6331 int rs;
6332 int best_rs = INT_MAX;
6333 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6334 mbmi->interp_filters = av1_broadcast_interp_filter(i);
6335 rs = av1_get_switchable_rate(x, xd, interp_filter,
6336 cm->seq_params->enable_dual_filter);
6337 if (rs < best_rs) {
6338 best_rs = rs;
6339 best_filter = mbmi->interp_filters.as_filters.y_filter;
6340 }
6341 }
6342 }
6343 }
6344 // Set the appropriate filter
6345 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6346 rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6347 cm->seq_params->enable_dual_filter);
6348
6349 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6350 rate2 += comp_inter_cost[comp_pred];
6351
6352 // Estimate the reference frame signaling cost and add it
6353 // to the rolling cost variable.
6354 rate2 += ref_costs_single[LAST_FRAME];
6355 this_rd = RDCOST(x->rdmult, rate2, distortion2);
6356
6357 rd_cost->rate = rate2;
6358 rd_cost->dist = distortion2;
6359 rd_cost->rdcost = this_rd;
6360
6361 if (this_rd >= best_rd_so_far) {
6362 rd_cost->rate = INT_MAX;
6363 rd_cost->rdcost = INT64_MAX;
6364 return;
6365 }
6366
6367 assert((interp_filter == SWITCHABLE) ||
6368 (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6369
6370 if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6371 av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6372 cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6373 THR_GLOBALMV, THR_INTER_MODE_START,
6374 THR_INTER_MODE_END, THR_DC, MAX_MODES);
6375 }
6376
6377 #if CONFIG_INTERNAL_STATS
6378 store_coding_context(x, ctx, THR_GLOBALMV, 0);
6379 #else
6380 store_coding_context(x, ctx, 0);
6381 #endif // CONFIG_INTERNAL_STATS
6382 }
6383
6384 /*!\cond */
6385 struct calc_target_weighted_pred_ctxt {
6386 const OBMCBuffer *obmc_buffer;
6387 const uint8_t *tmp;
6388 int tmp_stride;
6389 int overlap;
6390 };
6391 /*!\endcond */
6392
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6393 static INLINE void calc_target_weighted_pred_above(
6394 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6395 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6396 (void)nb_mi;
6397 (void)num_planes;
6398 (void)rel_mi_row;
6399 (void)dir;
6400
6401 struct calc_target_weighted_pred_ctxt *ctxt =
6402 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6403
6404 const int bw = xd->width << MI_SIZE_LOG2;
6405 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6406
6407 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6408 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6409 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6410 const int is_hbd = is_cur_buf_hbd(xd);
6411
6412 if (!is_hbd) {
6413 for (int row = 0; row < ctxt->overlap; ++row) {
6414 const uint8_t m0 = mask1d[row];
6415 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6416 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6417 wsrc[col] = m1 * tmp[col];
6418 mask[col] = m0;
6419 }
6420 wsrc += bw;
6421 mask += bw;
6422 tmp += ctxt->tmp_stride;
6423 }
6424 } else {
6425 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6426
6427 for (int row = 0; row < ctxt->overlap; ++row) {
6428 const uint8_t m0 = mask1d[row];
6429 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6430 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6431 wsrc[col] = m1 * tmp16[col];
6432 mask[col] = m0;
6433 }
6434 wsrc += bw;
6435 mask += bw;
6436 tmp16 += ctxt->tmp_stride;
6437 }
6438 }
6439 }
6440
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6441 static INLINE void calc_target_weighted_pred_left(
6442 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6443 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6444 (void)nb_mi;
6445 (void)num_planes;
6446 (void)rel_mi_col;
6447 (void)dir;
6448
6449 struct calc_target_weighted_pred_ctxt *ctxt =
6450 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6451
6452 const int bw = xd->width << MI_SIZE_LOG2;
6453 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6454
6455 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6456 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6457 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6458 const int is_hbd = is_cur_buf_hbd(xd);
6459
6460 if (!is_hbd) {
6461 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6462 for (int col = 0; col < ctxt->overlap; ++col) {
6463 const uint8_t m0 = mask1d[col];
6464 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6465 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6466 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6467 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6468 }
6469 wsrc += bw;
6470 mask += bw;
6471 tmp += ctxt->tmp_stride;
6472 }
6473 } else {
6474 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6475
6476 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6477 for (int col = 0; col < ctxt->overlap; ++col) {
6478 const uint8_t m0 = mask1d[col];
6479 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6480 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6481 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6482 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6483 }
6484 wsrc += bw;
6485 mask += bw;
6486 tmp16 += ctxt->tmp_stride;
6487 }
6488 }
6489 }
6490
6491 // This function has a structure similar to av1_build_obmc_inter_prediction
6492 //
6493 // The OBMC predictor is computed as:
6494 //
6495 // PObmc(x,y) =
6496 // AOM_BLEND_A64(Mh(x),
6497 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6498 // PLeft(x, y))
6499 //
6500 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6501 // rounding, this can be written as:
6502 //
6503 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6504 // Mh(x) * Mv(y) * P(x,y) +
6505 // Mh(x) * Cv(y) * Pabove(x,y) +
6506 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6507 //
6508 // Where :
6509 //
6510 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6511 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6512 //
6513 // This function computes 'wsrc' and 'mask' as:
6514 //
6515 // wsrc(x, y) =
6516 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6517 // Mh(x) * Cv(y) * Pabove(x,y) +
6518 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6519 //
6520 // mask(x, y) = Mh(x) * Mv(y)
6521 //
6522 // These can then be used to efficiently approximate the error for any
6523 // predictor P in the context of the provided neighbouring predictors by
6524 // computing:
6525 //
6526 // error(x, y) =
6527 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6528 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6529 static AOM_INLINE void calc_target_weighted_pred(
6530 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6531 const uint8_t *above, int above_stride, const uint8_t *left,
6532 int left_stride) {
6533 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6534 const int bw = xd->width << MI_SIZE_LOG2;
6535 const int bh = xd->height << MI_SIZE_LOG2;
6536 const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6537 int32_t *mask_buf = obmc_buffer->mask;
6538 int32_t *wsrc_buf = obmc_buffer->wsrc;
6539
6540 const int is_hbd = is_cur_buf_hbd(xd);
6541 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6542
6543 // plane 0 should not be sub-sampled
6544 assert(xd->plane[0].subsampling_x == 0);
6545 assert(xd->plane[0].subsampling_y == 0);
6546
6547 av1_zero_array(wsrc_buf, bw * bh);
6548 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6549
6550 // handle above row
6551 if (xd->up_available) {
6552 const int overlap =
6553 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6554 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6555 above_stride, overlap };
6556 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6557 max_neighbor_obmc[mi_size_wide_log2[bsize]],
6558 calc_target_weighted_pred_above, &ctxt);
6559 }
6560
6561 for (int i = 0; i < bw * bh; ++i) {
6562 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6563 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6564 }
6565
6566 // handle left column
6567 if (xd->left_available) {
6568 const int overlap =
6569 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6570 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6571 left_stride, overlap };
6572 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6573 max_neighbor_obmc[mi_size_high_log2[bsize]],
6574 calc_target_weighted_pred_left, &ctxt);
6575 }
6576
6577 if (!is_hbd) {
6578 const uint8_t *src = x->plane[0].src.buf;
6579
6580 for (int row = 0; row < bh; ++row) {
6581 for (int col = 0; col < bw; ++col) {
6582 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6583 }
6584 wsrc_buf += bw;
6585 src += x->plane[0].src.stride;
6586 }
6587 } else {
6588 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6589
6590 for (int row = 0; row < bh; ++row) {
6591 for (int col = 0; col < bw; ++col) {
6592 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6593 }
6594 wsrc_buf += bw;
6595 src += x->plane[0].src.stride;
6596 }
6597 }
6598 }
6599