1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72 #include "av1/encoder/var_based_part.h"
73
74 #define LAST_NEW_MV_INDEX 6
75
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81 10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82 9975, 9951, 9927, 9903, 9879, 9854, 9830, 9806, 9782, 9758, 9734,
83 9710, 9686, 9662, 9638, 9614, 9589, 9565, 9541, 9517, 9493, 9469,
84 9445, 9421, 9397, 9373, 9349, 9324, 9300, 9276, 9252, 9228, 9204,
85 9180, 9156, 9132, 9108, 9083, 9059, 9035, 9011, 8987, 8963, 8939,
86 8915, 8891, 8867, 8843, 8818, 8794, 8770, 8746, 8722, 8698, 8674,
87 8650, 8626, 8602, 8578, 8553, 8529, 8505, 8481, 8457, 8433, 8409,
88 8385, 8361, 8337, 8312, 8288, 8264, 8240, 8216, 8192, 8168, 8144,
89 8120, 8096, 8072, 8047, 8023, 7999, 7975, 7951, 7927, 7903, 7879,
90 7855, 7831, 7806, 7782, 7758, 7734, 7710, 7686, 7662, 7638, 7614,
91 7590, 7566, 7541, 7517, 7493, 7469, 7445, 7421, 7397, 7373, 7349,
92 7325, 7301, 7276, 7252, 7228, 7204, 7180, 7156, 7132, 7108, 7084,
93 7060, 7035, 7011, 6987, 6963, 6939, 6915, 6891, 6867, 6843, 6819,
94 6795, 6770, 6746, 6722, 6698, 6674, 6650, 6626, 6602, 6578, 6554,
95 6530, 6505, 6481, 6457, 6433, 6409, 6385, 6361, 6337, 6313, 6289,
96 6264, 6240, 6216, 6192, 6168, 6144, 6120, 6096, 6072, 6048, 6024,
97 5999, 5975, 5951, 5927, 5903, 5879, 5855, 5831, 5807, 5783, 5758,
98 5734, 5710, 5686, 5662, 5638, 5614, 5590, 5566, 5542, 5518, 5493,
99 5469, 5445, 5421, 5397, 5373, 5349, 5325, 5301, 5277, 5253, 5228,
100 5204, 5180, 5156, 5132, 5108, 5084, 5060, 5036, 5012, 4987, 4963,
101 4939, 4915, 4891, 4867, 4843, 4819, 4795, 4771, 4747, 4722, 4698,
102 4674, 4650, 4626, 4602, 4578, 4554, 4530, 4506, 4482, 4457, 4433,
103 4409, 4385, 4361, 4337, 4313, 4289, 4265, 4241, 4216, 4192, 4168,
104 4144, 4120, 4096
105 };
106
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108 THR_NEARESTMV,
109 THR_NEARESTL2,
110 THR_NEARESTL3,
111 THR_NEARESTB,
112 THR_NEARESTA2,
113 THR_NEARESTA,
114 THR_NEARESTG,
115
116 THR_NEWMV,
117 THR_NEWL2,
118 THR_NEWL3,
119 THR_NEWB,
120 THR_NEWA2,
121 THR_NEWA,
122 THR_NEWG,
123
124 THR_NEARMV,
125 THR_NEARL2,
126 THR_NEARL3,
127 THR_NEARB,
128 THR_NEARA2,
129 THR_NEARA,
130 THR_NEARG,
131
132 THR_GLOBALMV,
133 THR_GLOBALL2,
134 THR_GLOBALL3,
135 THR_GLOBALB,
136 THR_GLOBALA2,
137 THR_GLOBALA,
138 THR_GLOBALG,
139
140 THR_COMP_NEAREST_NEARESTLA,
141 THR_COMP_NEAREST_NEARESTL2A,
142 THR_COMP_NEAREST_NEARESTL3A,
143 THR_COMP_NEAREST_NEARESTGA,
144 THR_COMP_NEAREST_NEARESTLB,
145 THR_COMP_NEAREST_NEARESTL2B,
146 THR_COMP_NEAREST_NEARESTL3B,
147 THR_COMP_NEAREST_NEARESTGB,
148 THR_COMP_NEAREST_NEARESTLA2,
149 THR_COMP_NEAREST_NEARESTL2A2,
150 THR_COMP_NEAREST_NEARESTL3A2,
151 THR_COMP_NEAREST_NEARESTGA2,
152 THR_COMP_NEAREST_NEARESTLL2,
153 THR_COMP_NEAREST_NEARESTLL3,
154 THR_COMP_NEAREST_NEARESTLG,
155 THR_COMP_NEAREST_NEARESTBA,
156
157 THR_COMP_NEAR_NEARLB,
158 THR_COMP_NEW_NEWLB,
159 THR_COMP_NEW_NEARESTLB,
160 THR_COMP_NEAREST_NEWLB,
161 THR_COMP_NEW_NEARLB,
162 THR_COMP_NEAR_NEWLB,
163 THR_COMP_GLOBAL_GLOBALLB,
164
165 THR_COMP_NEAR_NEARLA,
166 THR_COMP_NEW_NEWLA,
167 THR_COMP_NEW_NEARESTLA,
168 THR_COMP_NEAREST_NEWLA,
169 THR_COMP_NEW_NEARLA,
170 THR_COMP_NEAR_NEWLA,
171 THR_COMP_GLOBAL_GLOBALLA,
172
173 THR_COMP_NEAR_NEARL2A,
174 THR_COMP_NEW_NEWL2A,
175 THR_COMP_NEW_NEARESTL2A,
176 THR_COMP_NEAREST_NEWL2A,
177 THR_COMP_NEW_NEARL2A,
178 THR_COMP_NEAR_NEWL2A,
179 THR_COMP_GLOBAL_GLOBALL2A,
180
181 THR_COMP_NEAR_NEARL3A,
182 THR_COMP_NEW_NEWL3A,
183 THR_COMP_NEW_NEARESTL3A,
184 THR_COMP_NEAREST_NEWL3A,
185 THR_COMP_NEW_NEARL3A,
186 THR_COMP_NEAR_NEWL3A,
187 THR_COMP_GLOBAL_GLOBALL3A,
188
189 THR_COMP_NEAR_NEARGA,
190 THR_COMP_NEW_NEWGA,
191 THR_COMP_NEW_NEARESTGA,
192 THR_COMP_NEAREST_NEWGA,
193 THR_COMP_NEW_NEARGA,
194 THR_COMP_NEAR_NEWGA,
195 THR_COMP_GLOBAL_GLOBALGA,
196
197 THR_COMP_NEAR_NEARL2B,
198 THR_COMP_NEW_NEWL2B,
199 THR_COMP_NEW_NEARESTL2B,
200 THR_COMP_NEAREST_NEWL2B,
201 THR_COMP_NEW_NEARL2B,
202 THR_COMP_NEAR_NEWL2B,
203 THR_COMP_GLOBAL_GLOBALL2B,
204
205 THR_COMP_NEAR_NEARL3B,
206 THR_COMP_NEW_NEWL3B,
207 THR_COMP_NEW_NEARESTL3B,
208 THR_COMP_NEAREST_NEWL3B,
209 THR_COMP_NEW_NEARL3B,
210 THR_COMP_NEAR_NEWL3B,
211 THR_COMP_GLOBAL_GLOBALL3B,
212
213 THR_COMP_NEAR_NEARGB,
214 THR_COMP_NEW_NEWGB,
215 THR_COMP_NEW_NEARESTGB,
216 THR_COMP_NEAREST_NEWGB,
217 THR_COMP_NEW_NEARGB,
218 THR_COMP_NEAR_NEWGB,
219 THR_COMP_GLOBAL_GLOBALGB,
220
221 THR_COMP_NEAR_NEARLA2,
222 THR_COMP_NEW_NEWLA2,
223 THR_COMP_NEW_NEARESTLA2,
224 THR_COMP_NEAREST_NEWLA2,
225 THR_COMP_NEW_NEARLA2,
226 THR_COMP_NEAR_NEWLA2,
227 THR_COMP_GLOBAL_GLOBALLA2,
228
229 THR_COMP_NEAR_NEARL2A2,
230 THR_COMP_NEW_NEWL2A2,
231 THR_COMP_NEW_NEARESTL2A2,
232 THR_COMP_NEAREST_NEWL2A2,
233 THR_COMP_NEW_NEARL2A2,
234 THR_COMP_NEAR_NEWL2A2,
235 THR_COMP_GLOBAL_GLOBALL2A2,
236
237 THR_COMP_NEAR_NEARL3A2,
238 THR_COMP_NEW_NEWL3A2,
239 THR_COMP_NEW_NEARESTL3A2,
240 THR_COMP_NEAREST_NEWL3A2,
241 THR_COMP_NEW_NEARL3A2,
242 THR_COMP_NEAR_NEWL3A2,
243 THR_COMP_GLOBAL_GLOBALL3A2,
244
245 THR_COMP_NEAR_NEARGA2,
246 THR_COMP_NEW_NEWGA2,
247 THR_COMP_NEW_NEARESTGA2,
248 THR_COMP_NEAREST_NEWGA2,
249 THR_COMP_NEW_NEARGA2,
250 THR_COMP_NEAR_NEWGA2,
251 THR_COMP_GLOBAL_GLOBALGA2,
252
253 THR_COMP_NEAR_NEARLL2,
254 THR_COMP_NEW_NEWLL2,
255 THR_COMP_NEW_NEARESTLL2,
256 THR_COMP_NEAREST_NEWLL2,
257 THR_COMP_NEW_NEARLL2,
258 THR_COMP_NEAR_NEWLL2,
259 THR_COMP_GLOBAL_GLOBALLL2,
260
261 THR_COMP_NEAR_NEARLL3,
262 THR_COMP_NEW_NEWLL3,
263 THR_COMP_NEW_NEARESTLL3,
264 THR_COMP_NEAREST_NEWLL3,
265 THR_COMP_NEW_NEARLL3,
266 THR_COMP_NEAR_NEWLL3,
267 THR_COMP_GLOBAL_GLOBALLL3,
268
269 THR_COMP_NEAR_NEARLG,
270 THR_COMP_NEW_NEWLG,
271 THR_COMP_NEW_NEARESTLG,
272 THR_COMP_NEAREST_NEWLG,
273 THR_COMP_NEW_NEARLG,
274 THR_COMP_NEAR_NEWLG,
275 THR_COMP_GLOBAL_GLOBALLG,
276
277 THR_COMP_NEAR_NEARBA,
278 THR_COMP_NEW_NEWBA,
279 THR_COMP_NEW_NEARESTBA,
280 THR_COMP_NEAREST_NEWBA,
281 THR_COMP_NEW_NEARBA,
282 THR_COMP_NEAR_NEWBA,
283 THR_COMP_GLOBAL_GLOBALBA,
284
285 THR_DC,
286 THR_PAETH,
287 THR_SMOOTH,
288 THR_SMOOTH_V,
289 THR_SMOOTH_H,
290 THR_H_PRED,
291 THR_V_PRED,
292 THR_D135_PRED,
293 THR_D203_PRED,
294 THR_D157_PRED,
295 THR_D67_PRED,
296 THR_D113_PRED,
297 THR_D45_PRED,
298 };
299
300 /*!\cond */
301 typedef struct SingleInterModeState {
302 int64_t rd;
303 MV_REFERENCE_FRAME ref_frame;
304 int valid;
305 } SingleInterModeState;
306
307 typedef struct InterModeSearchState {
308 int64_t best_rd;
309 int64_t best_skip_rd[2];
310 MB_MODE_INFO best_mbmode;
311 int best_rate_y;
312 int best_rate_uv;
313 int best_mode_skippable;
314 int best_skip2;
315 THR_MODES best_mode_index;
316 int num_available_refs;
317 int64_t dist_refs[REF_FRAMES];
318 int dist_order_refs[REF_FRAMES];
319 int64_t mode_threshold[MAX_MODES];
320 int64_t best_intra_rd;
321 unsigned int best_pred_sse;
322
323 /*!
324 * \brief Keep track of best intra rd for use in compound mode.
325 */
326 int64_t best_pred_rd[REFERENCE_MODES];
327 // Save a set of single_newmv for each checked ref_mv.
328 int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329 int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330 int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331 int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332 // The rd of simple translation in single inter modes
333 int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334 int64_t best_single_rd[REF_FRAMES];
335 PREDICTION_MODE best_single_mode[REF_FRAMES];
336
337 // Single search results by [directions][modes][reference frames]
338 SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339 int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340 SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341 [FWD_REFS];
342 int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343 MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344 IntraModeSearchState intra_search_state;
345 RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350 for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351 InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352 md->ready = 0;
353 md->num = 0;
354 md->dist_sum = 0;
355 md->ld_sum = 0;
356 md->sse_sum = 0;
357 md->sse_sse_sum = 0;
358 md->sse_ld_sum = 0;
359 }
360 }
361
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363 int64_t sse, int *est_residue_cost,
364 int64_t *est_dist) {
365 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366 if (md->ready) {
367 if (sse < md->dist_mean) {
368 *est_residue_cost = 0;
369 *est_dist = sse;
370 } else {
371 *est_dist = (int64_t)round(md->dist_mean);
372 const double est_ld = md->a * sse + md->b;
373 // Clamp estimated rate cost by INT_MAX / 2.
374 // TODO(angiebird@google.com): find better solution than clamping.
375 if (fabs(est_ld) < 1e-2) {
376 *est_residue_cost = INT_MAX / 2;
377 } else {
378 double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379 if (est_residue_cost_dbl < 0) {
380 *est_residue_cost = 0;
381 } else {
382 *est_residue_cost =
383 (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384 }
385 }
386 if (*est_residue_cost <= 0) {
387 *est_residue_cost = 0;
388 *est_dist = sse;
389 }
390 }
391 return 1;
392 }
393 return 0;
394 }
395
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398 const int block_idx = inter_mode_data_block_idx(bsize);
399 InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400 if (block_idx == -1) continue;
401 if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402 continue;
403 } else {
404 if (md->ready == 0) {
405 md->dist_mean = md->dist_sum / md->num;
406 md->ld_mean = md->ld_sum / md->num;
407 md->sse_mean = md->sse_sum / md->num;
408 md->sse_sse_mean = md->sse_sse_sum / md->num;
409 md->sse_ld_mean = md->sse_ld_sum / md->num;
410 } else {
411 const double factor = 3;
412 md->dist_mean =
413 (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414 md->ld_mean =
415 (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416 md->sse_mean =
417 (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418 md->sse_sse_mean =
419 (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420 (factor + 1);
421 md->sse_ld_mean =
422 (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423 (factor + 1);
424 }
425
426 const double my = md->ld_mean;
427 const double mx = md->sse_mean;
428 const double dx = sqrt(md->sse_sse_mean);
429 const double dxy = md->sse_ld_mean;
430
431 md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432 md->b = my - md->a * mx;
433 md->ready = 1;
434
435 md->num = 0;
436 md->dist_sum = 0;
437 md->ld_sum = 0;
438 md->sse_sum = 0;
439 md->sse_sse_sum = 0;
440 md->sse_ld_sum = 0;
441 }
442 (void)rdmult;
443 }
444 }
445
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static inline void inter_mode_data_push(TileDataEnc *tile_data,
447 BLOCK_SIZE bsize, int64_t sse,
448 int64_t dist, int residue_cost) {
449 if (residue_cost == 0 || sse == dist) return;
450 const int block_idx = inter_mode_data_block_idx(bsize);
451 if (block_idx == -1) return;
452 InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453 if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454 const double ld = (sse - dist) * 1. / residue_cost;
455 ++rd_model->num;
456 rd_model->dist_sum += dist;
457 rd_model->ld_sum += ld;
458 rd_model->sse_sum += sse;
459 rd_model->sse_sse_sum += (double)sse * (double)sse;
460 rd_model->sse_ld_sum += sse * ld;
461 }
462 }
463
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
465 int mode_rate, int64_t sse, int64_t rd,
466 RD_STATS *rd_cost, RD_STATS *rd_cost_y,
467 RD_STATS *rd_cost_uv,
468 const MB_MODE_INFO *mbmi) {
469 const int num = inter_modes_info->num;
470 assert(num < MAX_INTER_MODES);
471 inter_modes_info->mbmi_arr[num] = *mbmi;
472 inter_modes_info->mode_rate_arr[num] = mode_rate;
473 inter_modes_info->sse_arr[num] = sse;
474 inter_modes_info->est_rd_arr[num] = rd;
475 inter_modes_info->rd_cost_arr[num] = *rd_cost;
476 inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
477 inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
478 ++inter_modes_info->num;
479 }
480
compare_rd_idx_pair(const void * a,const void * b)481 static int compare_rd_idx_pair(const void *a, const void *b) {
482 if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
483 // To avoid inconsistency in qsort() ordering when two elements are equal,
484 // using idx as tie breaker. Refer aomedia:2928
485 if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
486 return 0;
487 else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
488 return 1;
489 else
490 return -1;
491 } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
492 return 1;
493 } else {
494 return -1;
495 }
496 }
497
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)498 static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
499 RdIdxPair *rd_idx_pair_arr) {
500 if (inter_modes_info->num == 0) {
501 return;
502 }
503 for (int i = 0; i < inter_modes_info->num; ++i) {
504 rd_idx_pair_arr[i].idx = i;
505 rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
506 }
507 qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
508 compare_rd_idx_pair);
509 }
510
511 // Similar to get_horver_correlation, but also takes into account first
512 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)513 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
514 int width, int height, float *hcorr,
515 float *vcorr) {
516 // The following notation is used:
517 // x - current pixel
518 // y - left neighbor pixel
519 // z - top neighbor pixel
520 int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
521 int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
522 int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
523
524 // First, process horizontal correlation on just the first row
525 x_sum += diff[0];
526 x2_sum += diff[0] * diff[0];
527 x_firstrow += diff[0];
528 x2_firstrow += diff[0] * diff[0];
529 for (int j = 1; j < width; ++j) {
530 const int16_t x = diff[j];
531 const int16_t y = diff[j - 1];
532 x_sum += x;
533 x_firstrow += x;
534 x2_sum += x * x;
535 x2_firstrow += x * x;
536 xy_sum += x * y;
537 }
538
539 // Process vertical correlation in the first column
540 x_firstcol += diff[0];
541 x2_firstcol += diff[0] * diff[0];
542 for (int i = 1; i < height; ++i) {
543 const int16_t x = diff[i * stride];
544 const int16_t z = diff[(i - 1) * stride];
545 x_sum += x;
546 x_firstcol += x;
547 x2_sum += x * x;
548 x2_firstcol += x * x;
549 xz_sum += x * z;
550 }
551
552 // Now process horiz and vert correlation through the rest unit
553 for (int i = 1; i < height; ++i) {
554 for (int j = 1; j < width; ++j) {
555 const int16_t x = diff[i * stride + j];
556 const int16_t y = diff[i * stride + j - 1];
557 const int16_t z = diff[(i - 1) * stride + j];
558 x_sum += x;
559 x2_sum += x * x;
560 xy_sum += x * y;
561 xz_sum += x * z;
562 }
563 }
564
565 for (int j = 0; j < width; ++j) {
566 x_finalrow += diff[(height - 1) * stride + j];
567 x2_finalrow +=
568 diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
569 }
570 for (int i = 0; i < height; ++i) {
571 x_finalcol += diff[i * stride + width - 1];
572 x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
573 }
574
575 int64_t xhor_sum = x_sum - x_finalcol;
576 int64_t xver_sum = x_sum - x_finalrow;
577 int64_t y_sum = x_sum - x_firstcol;
578 int64_t z_sum = x_sum - x_firstrow;
579 int64_t x2hor_sum = x2_sum - x2_finalcol;
580 int64_t x2ver_sum = x2_sum - x2_finalrow;
581 int64_t y2_sum = x2_sum - x2_firstcol;
582 int64_t z2_sum = x2_sum - x2_firstrow;
583
584 const float num_hor = (float)(height * (width - 1));
585 const float num_ver = (float)((height - 1) * width);
586
587 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
588 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
589
590 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
591 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
592
593 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
594 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
595
596 if (xhor_var_n > 0 && y_var_n > 0) {
597 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
598 *hcorr = *hcorr < 0 ? 0 : *hcorr;
599 } else {
600 *hcorr = 1.0;
601 }
602 if (xver_var_n > 0 && z_var_n > 0) {
603 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
604 *vcorr = *vcorr < 0 ? 0 : *vcorr;
605 } else {
606 *vcorr = 1.0;
607 }
608 }
609
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)610 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
611 int64_t *sse_y) {
612 const AV1_COMMON *cm = &cpi->common;
613 const int num_planes = av1_num_planes(cm);
614 const MACROBLOCKD *xd = &x->e_mbd;
615 const MB_MODE_INFO *mbmi = xd->mi[0];
616 int64_t total_sse = 0;
617 for (int plane = 0; plane < num_planes; ++plane) {
618 if (plane && !xd->is_chroma_ref) break;
619 const struct macroblock_plane *const p = &x->plane[plane];
620 const struct macroblockd_plane *const pd = &xd->plane[plane];
621 const BLOCK_SIZE bs =
622 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
623 unsigned int sse;
624
625 cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
626 pd->dst.stride, &sse);
627 total_sse += sse;
628 if (!plane && sse_y) *sse_y = sse;
629 }
630 total_sse <<= 4;
631 return total_sse;
632 }
633
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)634 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
635 intptr_t block_size, int64_t *ssz) {
636 int i;
637 int64_t error = 0, sqcoeff = 0;
638
639 for (i = 0; i < block_size; i++) {
640 const int diff = coeff[i] - dqcoeff[i];
641 error += diff * diff;
642 sqcoeff += coeff[i] * coeff[i];
643 }
644
645 *ssz = sqcoeff;
646 return error;
647 }
648
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)649 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
650 intptr_t block_size) {
651 int64_t error = 0;
652
653 for (int i = 0; i < block_size; i++) {
654 const int diff = coeff[i] - dqcoeff[i];
655 error += diff * diff;
656 }
657
658 return error;
659 }
660
661 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)662 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
663 const tran_low_t *dqcoeff, intptr_t block_size,
664 int64_t *ssz, int bd) {
665 int i;
666 int64_t error = 0, sqcoeff = 0;
667 int shift = 2 * (bd - 8);
668 int rounding = (1 << shift) >> 1;
669
670 for (i = 0; i < block_size; i++) {
671 const int64_t diff = coeff[i] - dqcoeff[i];
672 error += diff * diff;
673 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
674 }
675 error = (error + rounding) >> shift;
676 sqcoeff = (sqcoeff + rounding) >> shift;
677
678 *ssz = sqcoeff;
679 return error;
680 }
681 #endif
682
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)683 static int conditional_skipintra(PREDICTION_MODE mode,
684 PREDICTION_MODE best_intra_mode) {
685 if (mode == D113_PRED && best_intra_mode != V_PRED &&
686 best_intra_mode != D135_PRED)
687 return 1;
688 if (mode == D67_PRED && best_intra_mode != V_PRED &&
689 best_intra_mode != D45_PRED)
690 return 1;
691 if (mode == D203_PRED && best_intra_mode != H_PRED &&
692 best_intra_mode != D45_PRED)
693 return 1;
694 if (mode == D157_PRED && best_intra_mode != H_PRED &&
695 best_intra_mode != D135_PRED)
696 return 1;
697 return 0;
698 }
699
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)700 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
701 int16_t mode_context) {
702 if (is_inter_compound_mode(mode)) {
703 return mode_costs
704 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
705 }
706
707 int mode_cost = 0;
708 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
709
710 assert(is_inter_mode(mode));
711
712 if (mode == NEWMV) {
713 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
714 return mode_cost;
715 } else {
716 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
717 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
718
719 if (mode == GLOBALMV) {
720 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
721 return mode_cost;
722 } else {
723 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
724 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
725 mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
726 return mode_cost;
727 }
728 }
729 }
730
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)731 static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
732 int ref_idx) {
733 return ref_idx ? compound_ref1_mode(this_mode)
734 : compound_ref0_mode(this_mode);
735 }
736
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])737 static inline void estimate_ref_frame_costs(
738 const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
739 int segment_id, unsigned int *ref_costs_single,
740 unsigned int (*ref_costs_comp)[REF_FRAMES]) {
741 int seg_ref_active =
742 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
743 if (seg_ref_active) {
744 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
745 int ref_frame;
746 for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
747 memset(ref_costs_comp[ref_frame], 0,
748 REF_FRAMES * sizeof((*ref_costs_comp)[0]));
749 } else {
750 int intra_inter_ctx = av1_get_intra_inter_context(xd);
751 ref_costs_single[INTRA_FRAME] =
752 mode_costs->intra_inter_cost[intra_inter_ctx][0];
753 unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
754
755 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
756 ref_costs_single[i] = base_cost;
757
758 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
759 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
760 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
761 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
762 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
763 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
764
765 // Determine cost of a single ref frame, where frame types are represented
766 // by a tree:
767 // Level 0: add cost whether this ref is a forward or backward ref
768 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
769 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
770 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
773 ref_costs_single[ALTREF2_FRAME] +=
774 mode_costs->single_ref_cost[ctx_p1][0][1];
775 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
776
777 // Level 1: if this ref is forward ref,
778 // add cost whether it is last/last2 or last3/golden
779 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
780 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
781 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
782 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
783
784 // Level 1: if this ref is backward ref
785 // then add cost whether this ref is altref or backward ref
786 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
787 ref_costs_single[ALTREF2_FRAME] +=
788 mode_costs->single_ref_cost[ctx_p2][1][0];
789 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
790
791 // Level 2: further add cost whether this ref is last or last2
792 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
793 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
794
795 // Level 2: last3 or golden
796 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
797 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
798
799 // Level 2: bwdref or altref2
800 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
801 ref_costs_single[ALTREF2_FRAME] +=
802 mode_costs->single_ref_cost[ctx_p6][5][1];
803
804 if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
805 // Similar to single ref, determine cost of compound ref frames.
806 // cost_compound_refs = cost_first_ref + cost_second_ref
807 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
808 const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
809 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
810 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
811 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
812
813 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
814 unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
815
816 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
817 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
818 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
819 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
820 ref_bicomp_costs[ALTREF_FRAME] = 0;
821
822 // cost of first ref frame
823 ref_bicomp_costs[LAST_FRAME] +=
824 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
825 ref_bicomp_costs[LAST2_FRAME] +=
826 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827 ref_bicomp_costs[LAST3_FRAME] +=
828 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
829 ref_bicomp_costs[GOLDEN_FRAME] +=
830 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831
832 ref_bicomp_costs[LAST_FRAME] +=
833 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
834 ref_bicomp_costs[LAST2_FRAME] +=
835 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
836
837 ref_bicomp_costs[LAST3_FRAME] +=
838 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
839 ref_bicomp_costs[GOLDEN_FRAME] +=
840 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
841
842 // cost of second ref frame
843 ref_bicomp_costs[BWDREF_FRAME] +=
844 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
845 ref_bicomp_costs[ALTREF2_FRAME] +=
846 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847 ref_bicomp_costs[ALTREF_FRAME] +=
848 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
849
850 ref_bicomp_costs[BWDREF_FRAME] +=
851 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
852 ref_bicomp_costs[ALTREF2_FRAME] +=
853 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
854
855 // cost: if one ref frame is forward ref, the other ref is backward ref
856 int ref0, ref1;
857 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
858 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
859 ref_costs_comp[ref0][ref1] =
860 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
861 }
862 }
863
864 // cost: if both ref frames are the same side.
865 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
866 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
867 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
868 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
869 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
870 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
871 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
872 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
873 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
874 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
875 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
876 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
877 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
878 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
879 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
880 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
881 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
882 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
883 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
884 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
885 } else {
886 int ref0, ref1;
887 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
888 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
889 ref_costs_comp[ref0][ref1] = 512;
890 }
891 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
892 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
893 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
894 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
895 }
896 }
897 }
898
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int skippable)899 static inline void store_coding_context(
900 #if CONFIG_INTERNAL_STATS
901 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
902 #else
903 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
904 #endif // CONFIG_INTERNAL_STATS
905 int skippable) {
906 MACROBLOCKD *const xd = &x->e_mbd;
907
908 // Take a snapshot of the coding context so it can be
909 // restored if we decide to encode this way
910 ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
911 ctx->skippable = skippable;
912 #if CONFIG_INTERNAL_STATS
913 ctx->best_mode_index = mode_index;
914 #endif // CONFIG_INTERNAL_STATS
915 ctx->mic = *xd->mi[0];
916 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
917 av1_ref_frame_type(xd->mi[0]->ref_frame));
918 }
919
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])920 static inline void setup_buffer_ref_mvs_inter(
921 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
922 BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
923 const AV1_COMMON *cm = &cpi->common;
924 const int num_planes = av1_num_planes(cm);
925 const YV12_BUFFER_CONFIG *scaled_ref_frame =
926 av1_get_scaled_ref_frame(cpi, ref_frame);
927 MACROBLOCKD *const xd = &x->e_mbd;
928 MB_MODE_INFO *const mbmi = xd->mi[0];
929 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
930 const struct scale_factors *const sf =
931 get_ref_scale_factors_const(cm, ref_frame);
932 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
933 assert(yv12 != NULL);
934
935 if (scaled_ref_frame) {
936 // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
937 // support scaling.
938 av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
939 num_planes);
940 } else {
941 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
942 }
943
944 // Gets an initial list of candidate vectors from neighbours and orders them
945 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
946 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
947 mbmi_ext->mode_context);
948 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
949 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
950 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
951 // Further refinement that is encode side only to test the top few candidates
952 // in full and choose the best as the center point for subsequent searches.
953 // The current implementation doesn't support scaling.
954 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
955 ref_frame, block_size);
956
957 // Go back to unscaled reference.
958 if (scaled_ref_frame) {
959 // We had temporarily setup pred block based on scaled reference above. Go
960 // back to unscaled reference now, for subsequent use.
961 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
962 }
963 }
964
965 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
966 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
967
968 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)969 static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
970 const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
971 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
972 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
973 xd->mb_to_bottom_edge +
974 RIGHT_BOTTOM_MARGIN };
975 clamp_mv(mv, &mv_limits);
976 }
977
978 /* If the current mode shares the same mv with other modes with higher cost,
979 * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)980 static int skip_repeated_mv(const AV1_COMMON *const cm,
981 const MACROBLOCK *const x,
982 PREDICTION_MODE this_mode,
983 const MV_REFERENCE_FRAME ref_frames[2],
984 InterModeSearchState *search_state) {
985 const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
986 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
987 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
988 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
989 PREDICTION_MODE compare_mode = MB_MODE_COUNT;
990 if (!is_comp_pred) {
991 if (this_mode == NEARMV) {
992 if (ref_mv_count == 0) {
993 // NEARMV has the same motion vector as NEARESTMV
994 compare_mode = NEARESTMV;
995 }
996 if (ref_mv_count == 1 &&
997 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
998 // NEARMV has the same motion vector as GLOBALMV
999 compare_mode = GLOBALMV;
1000 }
1001 }
1002 if (this_mode == GLOBALMV) {
1003 if (ref_mv_count == 0 &&
1004 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1005 // GLOBALMV has the same motion vector as NEARESTMV
1006 compare_mode = NEARESTMV;
1007 }
1008 if (ref_mv_count == 1) {
1009 // GLOBALMV has the same motion vector as NEARMV
1010 compare_mode = NEARMV;
1011 }
1012 }
1013
1014 if (compare_mode != MB_MODE_COUNT) {
1015 // Use modelled_rd to check whether compare mode was searched
1016 if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1017 INT64_MAX) {
1018 const int16_t mode_ctx =
1019 av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1020 const int compare_cost =
1021 cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1022 const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1023
1024 // Only skip if the mode cost is larger than compare mode cost
1025 if (this_cost > compare_cost) {
1026 search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1027 search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1028 return 1;
1029 }
1030 }
1031 }
1032 }
1033 return 0;
1034 }
1035
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1036 static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1037 const AV1_COMMON *cm,
1038 const MACROBLOCK *x) {
1039 const MACROBLOCKD *const xd = &x->e_mbd;
1040 *out_mv = in_mv;
1041 lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1042 cm->features.cur_frame_force_integer_mv);
1043 clamp_mv2(&out_mv->as_mv, xd);
1044 return av1_is_fullmv_in_range(&x->mv_limits,
1045 get_fullmv_from_mv(&out_mv->as_mv));
1046 }
1047
1048 // To use single newmv directly for compound modes, need to clamp the mv to the
1049 // valid mv range. Without this, encoder would generate out of range mv, and
1050 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1051 static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1052 int ref_idx) {
1053 const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1054 SubpelMvLimits mv_limits;
1055
1056 av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1057 clamp_mv(&mv->as_mv, &mv_limits);
1058 }
1059
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1060 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1061 const BLOCK_SIZE bsize, int_mv *cur_mv,
1062 int *const rate_mv, HandleInterModeArgs *const args,
1063 inter_mode_info *mode_info) {
1064 MACROBLOCKD *const xd = &x->e_mbd;
1065 MB_MODE_INFO *const mbmi = xd->mi[0];
1066 const int is_comp_pred = has_second_ref(mbmi);
1067 const PREDICTION_MODE this_mode = mbmi->mode;
1068 const int refs[2] = { mbmi->ref_frame[0],
1069 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1070 const int ref_mv_idx = mbmi->ref_mv_idx;
1071
1072 if (is_comp_pred) {
1073 const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1074 const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1075 if (this_mode == NEW_NEWMV) {
1076 if (valid_mv0) {
1077 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1078 clamp_mv_in_range(x, &cur_mv[0], 0);
1079 }
1080 if (valid_mv1) {
1081 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1082 clamp_mv_in_range(x, &cur_mv[1], 1);
1083 }
1084 *rate_mv = 0;
1085 for (int i = 0; i < 2; ++i) {
1086 const int_mv ref_mv = av1_get_ref_mv(x, i);
1087 *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1088 x->mv_costs->nmv_joint_cost,
1089 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1090 }
1091 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1092 if (valid_mv1) {
1093 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1094 clamp_mv_in_range(x, &cur_mv[1], 1);
1095 }
1096 const int_mv ref_mv = av1_get_ref_mv(x, 1);
1097 *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1098 x->mv_costs->nmv_joint_cost,
1099 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1100 } else {
1101 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1102 if (valid_mv0) {
1103 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1104 clamp_mv_in_range(x, &cur_mv[0], 0);
1105 }
1106 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1107 *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1108 x->mv_costs->nmv_joint_cost,
1109 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1110 }
1111 } else {
1112 // Single ref case.
1113 const int ref_idx = 0;
1114 int search_range = INT_MAX;
1115
1116 if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1117 const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1118 int min_mv_diff = INT_MAX;
1119 int best_match = -1;
1120 MV prev_ref_mv[2] = { { 0 } };
1121 for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1122 prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1123 idx, &x->mbmi_ext)
1124 .as_mv;
1125 const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1126 abs(ref_mv.col - prev_ref_mv[idx].col));
1127
1128 if (min_mv_diff > ref_mv_diff) {
1129 min_mv_diff = ref_mv_diff;
1130 best_match = idx;
1131 }
1132 }
1133
1134 if (min_mv_diff < (16 << 3)) {
1135 if (args->single_newmv_valid[best_match][refs[0]]) {
1136 search_range = min_mv_diff;
1137 search_range +=
1138 AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1139 prev_ref_mv[best_match].row),
1140 abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1141 prev_ref_mv[best_match].col));
1142 // Get full pixel search range.
1143 search_range = (search_range + 4) >> 3;
1144 }
1145 }
1146 }
1147
1148 int_mv best_mv;
1149 av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1150 mode_info, &best_mv, args);
1151 if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1152
1153 args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1154 args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1155 args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1156 cur_mv[0].as_int = best_mv.as_int;
1157
1158 // Return after single_newmv is set.
1159 if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1160 }
1161
1162 return 0;
1163 }
1164
update_mode_start_end_index(const AV1_COMP * const cpi,const MB_MODE_INFO * const mbmi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1165 static inline void update_mode_start_end_index(
1166 const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1167 int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1168 int interintra_allowed, int eval_motion_mode) {
1169 *mode_index_start = (int)SIMPLE_TRANSLATION;
1170 *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1171 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1172 if (!eval_motion_mode) {
1173 *mode_index_end = (int)SIMPLE_TRANSLATION;
1174 } else {
1175 // Set the start index appropriately to process motion modes other than
1176 // simple translation
1177 *mode_index_start = 1;
1178 }
1179 }
1180 if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1181 *mode_index_end = SIMPLE_TRANSLATION;
1182 }
1183
1184 /*!\brief AV1 motion mode search
1185 *
1186 * \ingroup inter_mode_search
1187 * Function to search over and determine the motion mode. It will update
1188 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1189 * WARPED_CAUSAL and determine any necessary side information for the selected
1190 * motion mode. It will also perform the full transform search, unless the
1191 * input parameter do_tx_search indicates to do an estimation of the RD rather
1192 * than an RD corresponding to a full transform search. It will return the
1193 * RD for the final motion_mode.
1194 * Do the RD search for a given inter mode and compute all information relevant
1195 * to the input mode. It will compute the best MV,
1196 * compound parameters (if the mode is a compound mode) and interpolation filter
1197 * parameters.
1198 *
1199 * \param[in] cpi Top-level encoder structure.
1200 * \param[in] tile_data Pointer to struct holding adaptive
1201 * data/contexts/models for the tile during
1202 * encoding.
1203 * \param[in] x Pointer to struct holding all the data for
1204 * the current macroblock.
1205 * \param[in] bsize Current block size.
1206 * \param[in,out] rd_stats Struct to keep track of the overall RD
1207 * information.
1208 * \param[in,out] rd_stats_y Struct to keep track of the RD information
1209 * for only the Y plane.
1210 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
1211 * for only the UV planes.
1212 * \param[in] args HandleInterModeArgs struct holding
1213 * miscellaneous arguments for inter mode
1214 * search. See the documentation for this
1215 * struct for a description of each member.
1216 * \param[in] ref_best_rd Best RD found so far for this block.
1217 * It is used for early termination of this
1218 * search if the RD exceeds this value.
1219 * \param[in,out] ref_skip_rd A length 2 array, where skip_rd[0] is the
1220 * best total RD for a skip mode so far, and
1221 * skip_rd[1] is the best RD for a skip mode so
1222 * far in luma. This is used as a speed feature
1223 * to skip the transform search if the computed
1224 * skip RD for the current mode is not better
1225 * than the best skip_rd so far.
1226 * \param[in,out] rate_mv The rate associated with the motion vectors.
1227 * This will be modified if a motion search is
1228 * done in the motion mode search.
1229 * \param[in,out] orig_dst A prediction buffer to hold a computed
1230 * prediction. This will eventually hold the
1231 * final prediction, and the tmp_dst info will
1232 * be copied here.
1233 * \param[in,out] best_est_rd Estimated RD for motion mode search if
1234 * do_tx_search (see below) is 0.
1235 * \param[in] do_tx_search Parameter to indicate whether or not to do
1236 * a full transform search. This will compute
1237 * an estimated RD for the modes without the
1238 * transform search and later perform the full
1239 * transform search on the best candidates.
1240 * \param[in] inter_modes_info InterModesInfo struct to hold inter mode
1241 * information to perform a full transform
1242 * search only on winning candidates searched
1243 * with an estimate for transform coding RD.
1244 * \param[in] eval_motion_mode Boolean whether or not to evaluate motion
1245 * motion modes other than SIMPLE_TRANSLATION.
1246 * \param[out] yrd Stores the rdcost corresponding to encoding
1247 * the luma plane.
1248 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1249 * current motion mode being tested should be skipped. It returns 0 if the
1250 * motion mode search is a success.
1251 */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1252 static int64_t motion_mode_rd(
1253 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1254 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1255 RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1256 int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1257 int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1258 int eval_motion_mode, int64_t *yrd) {
1259 const AV1_COMMON *const cm = &cpi->common;
1260 const FeatureFlags *const features = &cm->features;
1261 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1262 const int num_planes = av1_num_planes(cm);
1263 MACROBLOCKD *xd = &x->e_mbd;
1264 MB_MODE_INFO *mbmi = xd->mi[0];
1265 const int is_comp_pred = has_second_ref(mbmi);
1266 const PREDICTION_MODE this_mode = mbmi->mode;
1267 const int rate2_nocoeff = rd_stats->rate;
1268 int best_xskip_txfm = 0;
1269 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1270 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1271 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1272 const int rate_mv0 = *rate_mv;
1273 const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1274 is_interintra_allowed(mbmi) &&
1275 mbmi->compound_idx;
1276 WARP_SAMPLE_INFO *const warp_sample_info =
1277 &x->warp_sample_info[mbmi->ref_frame[0]];
1278 int *pts0 = warp_sample_info->pts;
1279 int *pts_inref0 = warp_sample_info->pts_inref;
1280
1281 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1282 const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1283 av1_invalid_rd_stats(&best_rd_stats);
1284 mbmi->num_proj_ref = 1; // assume num_proj_ref >=1
1285 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1286 *yrd = INT64_MAX;
1287 if (features->switchable_motion_mode) {
1288 // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1289 // is allowed.
1290 last_motion_mode_allowed = motion_mode_allowed(
1291 xd->global_motion, xd, mbmi, features->allow_warped_motion);
1292 }
1293
1294 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1295 // Collect projection samples used in least squares approximation of
1296 // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1297 if (warp_sample_info->num < 0) {
1298 warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1299 }
1300 mbmi->num_proj_ref = warp_sample_info->num;
1301 }
1302 const int total_samples = mbmi->num_proj_ref;
1303 if (total_samples == 0) {
1304 // Do not search WARPED_CAUSAL if there are no samples to use to determine
1305 // warped parameters.
1306 last_motion_mode_allowed = OBMC_CAUSAL;
1307 }
1308
1309 const MB_MODE_INFO base_mbmi = *mbmi;
1310 MB_MODE_INFO best_mbmi;
1311 const int interp_filter = features->interp_filter;
1312 const int switchable_rate =
1313 av1_is_interp_needed(xd)
1314 ? av1_get_switchable_rate(x, xd, interp_filter,
1315 cm->seq_params->enable_dual_filter)
1316 : 0;
1317 int64_t best_rd = INT64_MAX;
1318 int best_rate_mv = rate_mv0;
1319 const int mi_row = xd->mi_row;
1320 const int mi_col = xd->mi_col;
1321 int mode_index_start, mode_index_end;
1322 const int txfm_rd_gate_level =
1323 get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1324 cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1325 TX_SEARCH_MOTION_MODE, eval_motion_mode);
1326
1327 // Modify the start and end index according to speed features. For example,
1328 // if SIMPLE_TRANSLATION has already been searched according to
1329 // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1330 // to avoid searching it again.
1331 update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1332 last_motion_mode_allowed, interintra_allowed,
1333 eval_motion_mode);
1334 // Main function loop. This loops over all of the possible motion modes and
1335 // computes RD to determine the best one. This process includes computing
1336 // any necessary side information for the motion mode and performing the
1337 // transform search.
1338 for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1339 mode_index++) {
1340 if (args->skip_motion_mode && mode_index) continue;
1341 int tmp_rate2 = rate2_nocoeff;
1342 const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1343 int tmp_rate_mv = rate_mv0;
1344
1345 *mbmi = base_mbmi;
1346 if (is_interintra_mode) {
1347 // Only use SIMPLE_TRANSLATION for interintra
1348 mbmi->motion_mode = SIMPLE_TRANSLATION;
1349 } else {
1350 mbmi->motion_mode = (MOTION_MODE)mode_index;
1351 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1352 }
1353
1354 // Do not search OBMC if the probability of selecting it is below a
1355 // predetermined threshold for this update_type and block size.
1356 const FRAME_UPDATE_TYPE update_type =
1357 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1358 int use_actual_frame_probs = 1;
1359 int prune_obmc;
1360 #if CONFIG_FPMT_TEST
1361 use_actual_frame_probs =
1362 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1363 if (!use_actual_frame_probs) {
1364 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1365 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1366 }
1367 #endif
1368 if (use_actual_frame_probs) {
1369 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1370 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1371 }
1372 if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1373 mbmi->motion_mode == OBMC_CAUSAL)
1374 continue;
1375
1376 if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1377 // SIMPLE_TRANSLATION mode: no need to recalculate.
1378 // The prediction is calculated before motion_mode_rd() is called in
1379 // handle_inter_mode()
1380 } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1381 const uint32_t cur_mv = mbmi->mv[0].as_int;
1382 // OBMC_CAUSAL not allowed for compound prediction
1383 assert(!is_comp_pred);
1384 if (have_newmv_in_inter_mode(this_mode)) {
1385 av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1386 &mbmi->mv[0], NULL);
1387 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1388 }
1389 if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1390 // Build the predictor according to the current motion vector if it has
1391 // not already been built
1392 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1393 0, av1_num_planes(cm) - 1);
1394 }
1395 // Build the inter predictor by blending the predictor corresponding to
1396 // this MV, and the neighboring blocks using the OBMC model
1397 av1_build_obmc_inter_prediction(
1398 cm, xd, args->above_pred_buf, args->above_pred_stride,
1399 args->left_pred_buf, args->left_pred_stride);
1400 #if !CONFIG_REALTIME_ONLY
1401 } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1402 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1403 mbmi->motion_mode = WARPED_CAUSAL;
1404 mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1405 mbmi->interp_filters =
1406 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1407
1408 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1409 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1410 // Select the samples according to motion vector difference
1411 if (mbmi->num_proj_ref > 1) {
1412 mbmi->num_proj_ref = av1_selectSamples(
1413 &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1414 }
1415
1416 // Compute the warped motion parameters with a least squares fit
1417 // using the collected samples
1418 if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1419 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1420 &mbmi->wm_params, mi_row, mi_col)) {
1421 assert(!is_comp_pred);
1422 if (have_newmv_in_inter_mode(this_mode)) {
1423 // Refine MV for NEWMV mode
1424 const int_mv mv0 = mbmi->mv[0];
1425 const WarpedMotionParams wm_params0 = mbmi->wm_params;
1426 const int num_proj_ref0 = mbmi->num_proj_ref;
1427
1428 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1429 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1430 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1431 &ref_mv.as_mv, NULL);
1432
1433 // Refine MV in a small range.
1434 av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1435 total_samples, cpi->sf.mv_sf.warp_search_method,
1436 cpi->sf.mv_sf.warp_search_iters);
1437
1438 if (mv0.as_int != mbmi->mv[0].as_int) {
1439 // Keep the refined MV and WM parameters.
1440 tmp_rate_mv = av1_mv_bit_cost(
1441 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1442 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1443 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1444 } else {
1445 // Restore the old MV and WM parameters.
1446 mbmi->mv[0] = mv0;
1447 mbmi->wm_params = wm_params0;
1448 mbmi->num_proj_ref = num_proj_ref0;
1449 }
1450 }
1451
1452 // Build the warped predictor
1453 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1454 av1_num_planes(cm) - 1);
1455 } else {
1456 continue;
1457 }
1458 #endif // !CONFIG_REALTIME_ONLY
1459 } else if (is_interintra_mode) {
1460 const int ret =
1461 av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1462 &tmp_rate_mv, &tmp_rate2, orig_dst);
1463 if (ret < 0) continue;
1464 }
1465
1466 // If we are searching newmv and the mv is the same as refmv, skip the
1467 // current mode
1468 if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1469
1470 // Update rd_stats for the current motion mode
1471 txfm_info->skip_txfm = 0;
1472 rd_stats->dist = 0;
1473 rd_stats->sse = 0;
1474 rd_stats->skip_txfm = 1;
1475 rd_stats->rate = tmp_rate2;
1476 const ModeCosts *mode_costs = &x->mode_costs;
1477 if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1478 if (interintra_allowed) {
1479 rd_stats->rate +=
1480 mode_costs->interintra_cost[size_group_lookup[bsize]]
1481 [mbmi->ref_frame[1] == INTRA_FRAME];
1482 }
1483 if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1484 (mbmi->ref_frame[1] != INTRA_FRAME)) {
1485 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1486 rd_stats->rate +=
1487 mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1488 } else {
1489 rd_stats->rate +=
1490 mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1491 }
1492 }
1493
1494 int64_t this_yrd = INT64_MAX;
1495
1496 if (!do_tx_search) {
1497 // Avoid doing a transform search here to speed up the overall mode
1498 // search. It will be done later in the mode search if the current
1499 // motion mode seems promising.
1500 int64_t curr_sse = -1;
1501 int64_t sse_y = -1;
1502 int est_residue_cost = 0;
1503 int64_t est_dist = 0;
1504 int64_t est_rd = 0;
1505 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1506 curr_sse = get_sse(cpi, x, &sse_y);
1507 const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1508 &est_residue_cost, &est_dist);
1509 (void)has_est_rd;
1510 assert(has_est_rd);
1511 } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1512 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1513 model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1514 cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1515 NULL, &curr_sse, NULL, NULL, NULL);
1516 sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1517 }
1518 est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1519 if (est_rd * 0.80 > *best_est_rd) {
1520 mbmi->ref_frame[1] = ref_frame_1;
1521 continue;
1522 }
1523 const int mode_rate = rd_stats->rate;
1524 rd_stats->rate += est_residue_cost;
1525 rd_stats->dist = est_dist;
1526 rd_stats->rdcost = est_rd;
1527 if (rd_stats->rdcost < *best_est_rd) {
1528 *best_est_rd = rd_stats->rdcost;
1529 assert(sse_y >= 0);
1530 ref_skip_rd[1] = txfm_rd_gate_level
1531 ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1532 : INT64_MAX;
1533 }
1534 if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1535 if (!is_comp_pred) {
1536 assert(curr_sse >= 0);
1537 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1538 rd_stats->rdcost, rd_stats, rd_stats_y,
1539 rd_stats_uv, mbmi);
1540 }
1541 } else {
1542 assert(curr_sse >= 0);
1543 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1544 rd_stats->rdcost, rd_stats, rd_stats_y,
1545 rd_stats_uv, mbmi);
1546 }
1547 mbmi->skip_txfm = 0;
1548 } else {
1549 // Perform full transform search
1550 int64_t skip_rd = INT64_MAX;
1551 int64_t skip_rdy = INT64_MAX;
1552 if (txfm_rd_gate_level) {
1553 // Check if the mode is good enough based on skip RD
1554 int64_t sse_y = INT64_MAX;
1555 int64_t curr_sse = get_sse(cpi, x, &sse_y);
1556 skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1557 skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1558 int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1559 txfm_rd_gate_level, 0);
1560 if (!eval_txfm) continue;
1561 }
1562
1563 // Do transform search
1564 const int mode_rate = rd_stats->rate;
1565 if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1566 rd_stats->rate, ref_best_rd)) {
1567 if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1568 return INT64_MAX;
1569 }
1570 continue;
1571 }
1572 const int skip_ctx = av1_get_skip_txfm_context(xd);
1573 const int y_rate =
1574 rd_stats->skip_txfm
1575 ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1576 : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1577 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1578
1579 const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1580 if (curr_rd < ref_best_rd) {
1581 ref_best_rd = curr_rd;
1582 ref_skip_rd[0] = skip_rd;
1583 ref_skip_rd[1] = skip_rdy;
1584 }
1585 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1586 inter_mode_data_push(
1587 tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1588 rd_stats_y->rate + rd_stats_uv->rate +
1589 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1590 }
1591 }
1592
1593 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1594 if (is_nontrans_global_motion(xd, xd->mi[0])) {
1595 mbmi->interp_filters =
1596 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1597 }
1598 }
1599
1600 const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1601 if (mode_index == 0) {
1602 args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1603 }
1604 if (mode_index == 0 || tmp_rd < best_rd) {
1605 // Update best_rd data if this is the best motion mode so far
1606 best_mbmi = *mbmi;
1607 best_rd = tmp_rd;
1608 best_rd_stats = *rd_stats;
1609 best_rd_stats_y = *rd_stats_y;
1610 best_rate_mv = tmp_rate_mv;
1611 *yrd = this_yrd;
1612 if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1613 memcpy(best_blk_skip, txfm_info->blk_skip,
1614 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1615 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1616 best_xskip_txfm = mbmi->skip_txfm;
1617 }
1618 }
1619 // Update RD and mbmi stats for selected motion mode
1620 mbmi->ref_frame[1] = ref_frame_1;
1621 *rate_mv = best_rate_mv;
1622 if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1623 av1_invalid_rd_stats(rd_stats);
1624 restore_dst_buf(xd, *orig_dst, num_planes);
1625 return INT64_MAX;
1626 }
1627 *mbmi = best_mbmi;
1628 *rd_stats = best_rd_stats;
1629 *rd_stats_y = best_rd_stats_y;
1630 if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1631 memcpy(txfm_info->blk_skip, best_blk_skip,
1632 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1633 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1634 txfm_info->skip_txfm = best_xskip_txfm;
1635
1636 restore_dst_buf(xd, *orig_dst, num_planes);
1637 return 0;
1638 }
1639
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t best_rd)1640 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1641 MACROBLOCK *const x, BLOCK_SIZE bsize,
1642 const BUFFER_SET *const orig_dst, int64_t best_rd) {
1643 assert(bsize < BLOCK_SIZES_ALL);
1644 const AV1_COMMON *cm = &cpi->common;
1645 const int num_planes = av1_num_planes(cm);
1646 MACROBLOCKD *const xd = &x->e_mbd;
1647 const int mi_row = xd->mi_row;
1648 const int mi_col = xd->mi_col;
1649 int64_t total_sse = 0;
1650 int64_t this_rd = INT64_MAX;
1651 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1652 rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1653
1654 for (int plane = 0; plane < num_planes; ++plane) {
1655 // Call av1_enc_build_inter_predictor() for one plane at a time.
1656 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1657 plane, plane);
1658 const struct macroblockd_plane *const pd = &xd->plane[plane];
1659 const BLOCK_SIZE plane_bsize =
1660 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1661
1662 av1_subtract_plane(x, plane_bsize, plane);
1663
1664 int64_t sse =
1665 av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1666 if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1667 sse <<= 4;
1668 total_sse += sse;
1669 // When current rd cost is more than the best rd, skip evaluation of
1670 // remaining planes.
1671 this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1672 if (this_rd > best_rd) break;
1673 }
1674
1675 rd_stats->dist = rd_stats->sse = total_sse;
1676 rd_stats->rdcost = this_rd;
1677
1678 restore_dst_buf(xd, *orig_dst, num_planes);
1679 return 0;
1680 }
1681
1682 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1683 // mode
1684 // Note(rachelbarker): This speed feature currently does not interact correctly
1685 // with global motion. The issue is that, when global motion is used, GLOBALMV
1686 // produces a different prediction to NEARESTMV/NEARMV even if the motion
1687 // vectors are the same. Thus GLOBALMV should not be pruned in this case.
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1688 static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1689 int ref_idx,
1690 const MV_REFERENCE_FRAME *ref_frame,
1691 PREDICTION_MODE single_mode) {
1692 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1693 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1694 assert(single_mode != NEWMV);
1695 if (single_mode == NEARESTMV) {
1696 return 0;
1697 } else if (single_mode == NEARMV) {
1698 // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1699 // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1700 if (ref_mv_count < 2) return 1;
1701 } else if (single_mode == GLOBALMV) {
1702 // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1703 if (ref_mv_count == 0) return 1;
1704 // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1705 else if (ref_mv_count == 1)
1706 return 0;
1707
1708 int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1709 // Check GLOBALMV is matching with any mv in ref_mv_stack
1710 for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1711 int_mv this_mv;
1712
1713 if (ref_idx == 0)
1714 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1715 else
1716 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1717
1718 if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1719 return 1;
1720 }
1721 }
1722 return 0;
1723 }
1724
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1725 static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1726 int ref_idx, int ref_mv_idx,
1727 int skip_repeated_ref_mv,
1728 const MV_REFERENCE_FRAME *ref_frame,
1729 const MB_MODE_INFO_EXT *mbmi_ext) {
1730 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1731 assert(is_inter_singleref_mode(single_mode));
1732 if (single_mode == NEWMV) {
1733 this_mv->as_int = INVALID_MV;
1734 } else if (single_mode == GLOBALMV) {
1735 if (skip_repeated_ref_mv &&
1736 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1737 return 0;
1738 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1739 } else {
1740 assert(single_mode == NEARMV || single_mode == NEARESTMV);
1741 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1742 const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1743 if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1744 assert(ref_mv_offset >= 0);
1745 if (ref_idx == 0) {
1746 *this_mv =
1747 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1748 } else {
1749 *this_mv =
1750 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1751 }
1752 } else {
1753 if (skip_repeated_ref_mv &&
1754 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1755 return 0;
1756 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1757 }
1758 }
1759 return 1;
1760 }
1761
1762 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1763 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type,PREDICTION_MODE best_mode)1764 static inline int skip_nearest_near_mv_using_refmv_weight(
1765 const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1766 const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
1767 if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1768 // Do not skip the mode if the current block has not yet obtained a valid
1769 // inter mode.
1770 if (!is_inter_mode(best_mode)) return 0;
1771
1772 const MACROBLOCKD *xd = &x->e_mbd;
1773 // Do not skip the mode if both the top and left neighboring blocks are not
1774 // available.
1775 if (!xd->left_available || !xd->up_available) return 0;
1776 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1777 const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1778 const int ref_mv_count =
1779 AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1780
1781 if (ref_mv_count == 0) return 0;
1782 // If ref mv list has at least one nearest candidate do not prune NEARESTMV
1783 if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1784
1785 // Count number of ref mvs populated from nearest candidates
1786 int nearest_refmv_count = 0;
1787 for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1788 if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1789 }
1790
1791 // nearest_refmv_count indicates the closeness of block motion characteristics
1792 // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1793 // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1794 // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1795 // mode since these modes work well for blocks that shares similar motion
1796 // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1797 // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1798 // mode is pruned if none of the ref mvs are populated from nearest candidate.
1799 const int prune_thresh = 1 + (ref_mv_count >= 2);
1800 if (nearest_refmv_count < prune_thresh) return 1;
1801 return 0;
1802 }
1803
1804 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1805 static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1806 const AV1_COMMON *cm, const MACROBLOCK *x,
1807 int skip_repeated_ref_mv) {
1808 const MACROBLOCKD *xd = &x->e_mbd;
1809 const MB_MODE_INFO *mbmi = xd->mi[0];
1810 const int is_comp_pred = has_second_ref(mbmi);
1811
1812 int ret = 1;
1813 for (int i = 0; i < is_comp_pred + 1; ++i) {
1814 int_mv this_mv;
1815 this_mv.as_int = INVALID_MV;
1816 ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1817 skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1818 if (!ret) return 0;
1819 const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1820 if (single_mode == NEWMV) {
1821 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1822 cur_mv[i] =
1823 (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1824 .this_mv
1825 : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1826 .comp_mv;
1827 } else {
1828 ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1829 }
1830 }
1831 return ret;
1832 }
1833
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1834 static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
1835 const MB_MODE_INFO_EXT *mbmi_ext,
1836 const int (*const drl_mode_cost0)[2],
1837 int8_t ref_frame_type) {
1838 int cost = 0;
1839 if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1840 for (int idx = 0; idx < 2; ++idx) {
1841 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1842 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1843 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1844 if (mbmi->ref_mv_idx == idx) return cost;
1845 }
1846 }
1847 return cost;
1848 }
1849
1850 if (have_nearmv_in_inter_mode(mbmi->mode)) {
1851 for (int idx = 1; idx < 3; ++idx) {
1852 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1853 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1854 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1855 if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1856 }
1857 }
1858 return cost;
1859 }
1860 return cost;
1861 }
1862
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1863 static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
1864 const MB_MODE_INFO *const mbmi,
1865 PREDICTION_MODE this_mode) {
1866 for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1867 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1868 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1869 if (single_mode == NEWMV &&
1870 args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1871 return 0;
1872 }
1873 }
1874 return 1;
1875 }
1876
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1877 static int get_drl_refmv_count(const MACROBLOCK *const x,
1878 const MV_REFERENCE_FRAME *ref_frame,
1879 PREDICTION_MODE mode) {
1880 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1881 const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1882 const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1883 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1884 const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1885 const int has_drl =
1886 (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1887 const int ref_set =
1888 has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1889
1890 return ref_set;
1891 }
1892
1893 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1894 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1895 const int qindex,
1896 const int ref_mv_idx) {
1897 if (reduce_inter_modes >= 3) return 1;
1898 // Q-index logic based pruning is enabled only for
1899 // reduce_inter_modes = 2.
1900 assert(reduce_inter_modes == 2);
1901 // When reduce_inter_modes=2, pruning happens as below based on q index.
1902 // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1903 // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1904 // For q index range between 171 and 255: no pruning.
1905 const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1906 return (ref_mv_idx >= min_prune_ref_mv_idx);
1907 }
1908
1909 // Whether this reference motion vector can be skipped, based on initial
1910 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1911 static bool ref_mv_idx_early_breakout(
1912 const SPEED_FEATURES *const sf,
1913 const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1914 const HandleInterModeArgs *const args, int64_t ref_best_rd,
1915 int ref_mv_idx) {
1916 MACROBLOCKD *xd = &x->e_mbd;
1917 MB_MODE_INFO *mbmi = xd->mi[0];
1918 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1919 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1920 const int is_comp_pred = has_second_ref(mbmi);
1921 if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1922 if (mbmi->ref_frame[0] == LAST2_FRAME ||
1923 mbmi->ref_frame[0] == LAST3_FRAME ||
1924 mbmi->ref_frame[1] == LAST2_FRAME ||
1925 mbmi->ref_frame[1] == LAST3_FRAME) {
1926 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1927 if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1928 REF_CAT_LEVEL) {
1929 return true;
1930 }
1931 }
1932 // TODO(any): Experiment with reduce_inter_modes for compound prediction
1933 if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1934 have_newmv_in_inter_mode(mbmi->mode)) {
1935 if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1936 mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1937 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1938 const int do_prune = prune_ref_mv_idx_using_qindex(
1939 sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1940 if (do_prune &&
1941 (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1942 REF_CAT_LEVEL)) {
1943 return true;
1944 }
1945 }
1946 }
1947 }
1948
1949 mbmi->ref_mv_idx = ref_mv_idx;
1950 if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1951 return true;
1952 }
1953 size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1954 const int drl_cost = get_drl_cost(
1955 mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1956 est_rd_rate += drl_cost;
1957 if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1958 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1959 return true;
1960 }
1961 return false;
1962 }
1963
1964 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1965 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1966 RD_STATS *rd_stats,
1967 HandleInterModeArgs *args,
1968 int ref_mv_idx, int64_t ref_best_rd,
1969 BLOCK_SIZE bsize) {
1970 MACROBLOCKD *xd = &x->e_mbd;
1971 MB_MODE_INFO *mbmi = xd->mi[0];
1972 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1973 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1974 const AV1_COMMON *cm = &cpi->common;
1975 const int is_comp_pred = has_second_ref(mbmi);
1976 const ModeCosts *mode_costs = &x->mode_costs;
1977
1978 struct macroblockd_plane *p = xd->plane;
1979 const BUFFER_SET orig_dst = {
1980 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1981 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1982 };
1983 av1_init_rd_stats(rd_stats);
1984
1985 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1986 mbmi->comp_group_idx = 0;
1987 mbmi->compound_idx = 1;
1988 if (mbmi->ref_frame[1] == INTRA_FRAME) {
1989 mbmi->ref_frame[1] = NONE_FRAME;
1990 }
1991 int16_t mode_ctx =
1992 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1993
1994 mbmi->num_proj_ref = 0;
1995 mbmi->motion_mode = SIMPLE_TRANSLATION;
1996 mbmi->ref_mv_idx = ref_mv_idx;
1997
1998 rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1999 const int drl_cost =
2000 get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2001 rd_stats->rate += drl_cost;
2002
2003 int_mv cur_mv[2];
2004 if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2005 return INT64_MAX;
2006 }
2007 assert(have_nearmv_in_inter_mode(mbmi->mode));
2008 for (int i = 0; i < is_comp_pred + 1; ++i) {
2009 mbmi->mv[i].as_int = cur_mv[i].as_int;
2010 }
2011 const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2012 rd_stats->rate += ref_mv_cost;
2013
2014 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
2015 return INT64_MAX;
2016 }
2017
2018 mbmi->motion_mode = SIMPLE_TRANSLATION;
2019 mbmi->num_proj_ref = 0;
2020 if (is_comp_pred) {
2021 // Only compound_average
2022 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2023 mbmi->comp_group_idx = 0;
2024 mbmi->compound_idx = 1;
2025 }
2026 set_default_interp_filters(mbmi, cm->features.interp_filter);
2027
2028 const int mi_row = xd->mi_row;
2029 const int mi_col = xd->mi_col;
2030 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2031 AOM_PLANE_Y, AOM_PLANE_Y);
2032 int est_rate;
2033 int64_t est_dist;
2034 model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2035 NULL, NULL, NULL, NULL, NULL);
2036 return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2037 }
2038
2039 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2040 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2041 // it is included.
mask_set_bit(int * mask,int index)2042 static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2043
mask_check_bit(int mask,int index)2044 static inline bool mask_check_bit(int mask, int index) {
2045 return (mask >> index) & 0x1;
2046 }
2047
2048 // Before performing the full MV search in handle_inter_mode, do a simple
2049 // translation search and see if we can eliminate any motion vectors.
2050 // Returns an integer where, if the i-th bit is set, it means that the i-th
2051 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2052 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2053 RD_STATS *rd_stats,
2054 HandleInterModeArgs *const args,
2055 int64_t ref_best_rd, BLOCK_SIZE bsize,
2056 const int ref_set) {
2057 // If the number of ref mv count is equal to 1, do not prune the same. It
2058 // is better to evaluate the same than to prune it.
2059 if (ref_set == 1) return 1;
2060 AV1_COMMON *const cm = &cpi->common;
2061 const MACROBLOCKD *const xd = &x->e_mbd;
2062 const MB_MODE_INFO *const mbmi = xd->mi[0];
2063 const PREDICTION_MODE this_mode = mbmi->mode;
2064
2065 // Only search indices if they have some chance of being good.
2066 int good_indices = 0;
2067 for (int i = 0; i < ref_set; ++i) {
2068 if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2069 ref_best_rd, i)) {
2070 continue;
2071 }
2072 mask_set_bit(&good_indices, i);
2073 }
2074
2075 // Only prune in NEARMV mode, if the speed feature is set, and the block size
2076 // is large enough. If these conditions are not met, return all good indices
2077 // found so far.
2078 if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2079 return good_indices;
2080 if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2081 if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2082 // Do not prune when there is internal resizing. TODO(elliottk) fix this
2083 // so b/2384 can be resolved.
2084 if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2085 (mbmi->ref_frame[1] > 0 &&
2086 av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2087 return good_indices;
2088 }
2089
2090 // Calculate the RD cost for the motion vectors using simple translation.
2091 int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2092 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2093 // If this index is bad, ignore it.
2094 if (!mask_check_bit(good_indices, ref_mv_idx)) {
2095 continue;
2096 }
2097 idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2098 cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2099 }
2100 // Find the index with the best RD cost.
2101 int best_idx = 0;
2102 for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2103 if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2104 best_idx = i;
2105 }
2106 }
2107 // Only include indices that are good and within a % of the best.
2108 const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2109 // If the simple translation cost is not within this multiple of the
2110 // best RD, skip it. Note that the cutoff is derived experimentally.
2111 const double ref_dth = 5;
2112 int result = 0;
2113 for (int i = 0; i < ref_set; ++i) {
2114 if (mask_check_bit(good_indices, i) &&
2115 (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2116 (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2117 mask_set_bit(&result, i);
2118 }
2119 }
2120 return result;
2121 }
2122
2123 /*!\brief Motion mode information for inter mode search speedup.
2124 *
2125 * Used in a speed feature to search motion modes other than
2126 * SIMPLE_TRANSLATION only on winning candidates.
2127 */
2128 typedef struct motion_mode_candidate {
2129 /*!
2130 * Mode info for the motion mode candidate.
2131 */
2132 MB_MODE_INFO mbmi;
2133 /*!
2134 * Rate describing the cost of the motion vectors for this candidate.
2135 */
2136 int rate_mv;
2137 /*!
2138 * Rate before motion mode search and transform coding is applied.
2139 */
2140 int rate2_nocoeff;
2141 /*!
2142 * An integer value 0 or 1 which indicates whether or not to skip the motion
2143 * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2144 * candidate.
2145 */
2146 int skip_motion_mode;
2147 /*!
2148 * Total RD cost for this candidate.
2149 */
2150 int64_t rd_cost;
2151 } motion_mode_candidate;
2152
2153 /*!\cond */
2154 typedef struct motion_mode_best_st_candidate {
2155 motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2156 int num_motion_mode_cand;
2157 } motion_mode_best_st_candidate;
2158
2159 // Checks if the current reference frame matches with neighbouring block's
2160 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2161 static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2162 MB_MODE_INFO *nb_mbmi) {
2163 MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2164 nb_mbmi->ref_frame[1] };
2165 MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2166 cur_mbmi->ref_frame[1] };
2167 const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2168 int match_found = 0;
2169
2170 for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2171 if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2172 (cur_ref_frames[i] == nb_ref_frames[1]))
2173 match_found = 1;
2174 }
2175 return match_found;
2176 }
2177
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2178 static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2179 MACROBLOCKD *xd) {
2180 if (!xd->up_available) return 1;
2181 const int mi_col = xd->mi_col;
2182 MB_MODE_INFO **cur_mbmi = xd->mi;
2183 // prev_row_mi points into the mi array, starting at the beginning of the
2184 // previous row.
2185 MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2186 const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2187 uint8_t mi_step;
2188 for (int above_mi_col = mi_col; above_mi_col < end_col;
2189 above_mi_col += mi_step) {
2190 MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2191 mi_step = mi_size_wide[above_mi[0]->bsize];
2192 int match_found = 0;
2193 if (is_inter_block(*above_mi))
2194 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2195 if (match_found) return 1;
2196 }
2197 return 0;
2198 }
2199
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2200 static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2201 MACROBLOCKD *xd) {
2202 if (!xd->left_available) return 1;
2203 const int mi_row = xd->mi_row;
2204 MB_MODE_INFO **cur_mbmi = xd->mi;
2205 // prev_col_mi points into the mi array, starting at the top of the
2206 // previous column
2207 MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2208 const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2209 uint8_t mi_step;
2210 for (int left_mi_row = mi_row; left_mi_row < end_row;
2211 left_mi_row += mi_step) {
2212 MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2213 mi_step = mi_size_high[left_mi[0]->bsize];
2214 int match_found = 0;
2215 if (is_inter_block(*left_mi))
2216 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2217 if (match_found) return 1;
2218 }
2219 return 0;
2220 }
2221 /*!\endcond */
2222
2223 /*! \brief Struct used to hold TPL data to
2224 * narrow down parts of the inter mode search.
2225 */
2226 typedef struct {
2227 /*!
2228 * The best inter cost out of all of the reference frames.
2229 */
2230 int64_t best_inter_cost;
2231 /*!
2232 * The inter cost for each reference frame.
2233 */
2234 int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2235 } PruneInfoFromTpl;
2236
2237 #if !CONFIG_REALTIME_ONLY
2238 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2239 static inline void get_block_level_tpl_stats(
2240 AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2241 PruneInfoFromTpl *inter_cost_info_from_tpl) {
2242 AV1_COMMON *const cm = &cpi->common;
2243
2244 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2245 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2246 const int tpl_idx = cpi->gf_frame_index;
2247 TplParams *const tpl_data = &cpi->ppi->tpl_data;
2248 if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2249 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2250 const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2251 const int mi_wide = mi_size_wide[bsize];
2252 const int mi_high = mi_size_high[bsize];
2253 const int tpl_stride = tpl_frame->stride;
2254 const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2255 const int mi_col_sr =
2256 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2257 const int mi_col_end_sr =
2258 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2259 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2260
2261 const int row_step = step;
2262 const int col_step_sr =
2263 coded_to_superres_mi(step, cm->superres_scale_denominator);
2264 for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2265 row += row_step) {
2266 for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2267 col += col_step_sr) {
2268 const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2269 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2270
2271 // Sums up the inter cost of corresponding ref frames
2272 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2273 inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2274 this_stats->pred_error[ref_idx];
2275 }
2276 }
2277 }
2278
2279 // Computes the best inter cost (minimum inter_cost)
2280 int64_t best_inter_cost = INT64_MAX;
2281 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2282 const int64_t cur_inter_cost =
2283 inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2284 // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2285 // calculating the minimum inter_cost
2286 if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2287 valid_refs[ref_idx])
2288 best_inter_cost = cur_inter_cost;
2289 }
2290 inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2291 }
2292 #endif
2293
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2294 static inline int prune_modes_based_on_tpl_stats(
2295 PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2296 const PREDICTION_MODE this_mode, int prune_mode_level) {
2297 const int have_newmv = have_newmv_in_inter_mode(this_mode);
2298 if ((prune_mode_level < 2) && have_newmv) return 0;
2299
2300 const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2301 if (best_inter_cost == INT64_MAX) return 0;
2302
2303 const int prune_level = prune_mode_level - 1;
2304 int64_t cur_inter_cost;
2305
2306 const int is_globalmv =
2307 (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2308 const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2309
2310 // Thresholds used for pruning:
2311 // Lower value indicates aggressive pruning and higher value indicates
2312 // conservative pruning which is set based on ref_mv_idx and speed feature.
2313 // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2314 // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2315 static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2316 { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2317 };
2318
2319 const int is_comp_pred = (refs[1] > INTRA_FRAME);
2320 if (!is_comp_pred) {
2321 cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2322 } else {
2323 const int64_t inter_cost_ref0 =
2324 inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2325 const int64_t inter_cost_ref1 =
2326 inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2327 // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2328 // more aggressive pruning
2329 cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2330 }
2331
2332 // Prune the mode if cur_inter_cost is greater than threshold times
2333 // best_inter_cost
2334 if (cur_inter_cost >
2335 ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2336 best_inter_cost) >>
2337 2))
2338 return 1;
2339 return 0;
2340 }
2341
2342 /*!\brief High level function to select parameters for compound mode.
2343 *
2344 * \ingroup inter_mode_search
2345 * The main search functionality is done in the call to av1_compound_type_rd().
2346 *
2347 * \param[in] cpi Top-level encoder structure.
2348 * \param[in] x Pointer to struct holding all the data for
2349 * the current macroblock.
2350 * \param[in] args HandleInterModeArgs struct holding
2351 * miscellaneous arguments for inter mode
2352 * search. See the documentation for this
2353 * struct for a description of each member.
2354 * \param[in] ref_best_rd Best RD found so far for this block.
2355 * It is used for early termination of this
2356 * search if the RD exceeds this value.
2357 * \param[in,out] cur_mv Current motion vector.
2358 * \param[in] bsize Current block size.
2359 * \param[in,out] compmode_interinter_cost RD of the selected interinter
2360 compound mode.
2361 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2362 * allocated buffers for the compound
2363 * predictors and masks in the compound type
2364 * search.
2365 * \param[in,out] orig_dst A prediction buffer to hold a computed
2366 * prediction. This will eventually hold the
2367 * final prediction, and the tmp_dst info will
2368 * be copied here.
2369 * \param[in] tmp_dst A temporary prediction buffer to hold a
2370 * computed prediction.
2371 * \param[in,out] rate_mv The rate associated with the motion vectors.
2372 * This will be modified if a motion search is
2373 * done in the motion mode search.
2374 * \param[in,out] rd_stats Struct to keep track of the overall RD
2375 * information.
2376 * \param[in,out] skip_rd An array of length 2 where skip_rd[0] is the
2377 * best total RD for a skip mode so far, and
2378 * skip_rd[1] is the best RD for a skip mode so
2379 * far in luma. This is used as a speed feature
2380 * to skip the transform search if the computed
2381 * skip RD for the current mode is not better
2382 * than the best skip_rd so far.
2383 * \param[in,out] skip_build_pred Indicates whether or not to build the inter
2384 * predictor. If this is 0, the inter predictor
2385 * has already been built and thus we can avoid
2386 * repeating computation.
2387 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2388 * a viable candidate.
2389 */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2390 static int process_compound_inter_mode(
2391 AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2392 int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2393 int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2394 const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2395 RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2396 MACROBLOCKD *xd = &x->e_mbd;
2397 MB_MODE_INFO *mbmi = xd->mi[0];
2398 const AV1_COMMON *cm = &cpi->common;
2399 const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2400 cm->seq_params->enable_masked_compound;
2401 int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2402 (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2403
2404 const int num_planes = av1_num_planes(cm);
2405 const int mi_row = xd->mi_row;
2406 const int mi_col = xd->mi_col;
2407 int is_luma_interp_done = 0;
2408 set_default_interp_filters(mbmi, cm->features.interp_filter);
2409
2410 int64_t best_rd_compound;
2411 int64_t rd_thresh;
2412 const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2413 const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2414 rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2415 comp_type_rd_scale);
2416 // Select compound type and any parameters related to that type
2417 // (for example, the mask parameters if it is a masked mode) and compute
2418 // the RD
2419 *compmode_interinter_cost = av1_compound_type_rd(
2420 cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2421 orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2422 ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2423 if (ref_best_rd < INT64_MAX &&
2424 (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2425 ref_best_rd) {
2426 restore_dst_buf(xd, *orig_dst, num_planes);
2427 return 1;
2428 }
2429
2430 // Build only uv predictor for COMPOUND_AVERAGE.
2431 // Note there is no need to call av1_enc_build_inter_predictor
2432 // for luma if COMPOUND_AVERAGE is selected because it is the first
2433 // candidate in av1_compound_type_rd, which means it used the dst_buf
2434 // rather than the tmp_buf.
2435 if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2436 if (num_planes > 1) {
2437 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2438 AOM_PLANE_U, num_planes - 1);
2439 }
2440 *skip_build_pred = 1;
2441 }
2442 return 0;
2443 }
2444
2445 // Speed feature to prune out MVs that are similar to previous MVs if they
2446 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2447 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2448 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2449 MB_MODE_INFO *mbmi, int pruning_factor) {
2450 int i;
2451 const int is_comp_pred = has_second_ref(mbmi);
2452 const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2453
2454 // Skip the evaluation if an MV match is found.
2455 if (ref_mv_idx > 0) {
2456 for (int idx = 0; idx < ref_mv_idx; ++idx) {
2457 if (save_mv[idx][0].as_int == INVALID_MV) continue;
2458
2459 int mv_diff = 0;
2460 for (i = 0; i < 1 + is_comp_pred; ++i) {
2461 mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2462 abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2463 }
2464
2465 // If this mode is not the best one, and current MV is similar to
2466 // previous stored MV, terminate this ref_mv_idx evaluation.
2467 if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2468 }
2469 }
2470
2471 if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2472 for (i = 0; i < is_comp_pred + 1; ++i)
2473 save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2474 }
2475
2476 return 0;
2477 }
2478
2479 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2480 *
2481 * \ingroup inter_mode_search
2482 *
2483 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2484 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2485 * Else returns 0.
2486 *
2487 * Note that the sse of here comes from single_motion_search. So it is
2488 * interpolated with the filter in motion search, not the actual interpolation
2489 * filter used in encoding.
2490 *
2491 * \param[in] fn_ptr A table of function pointers to compute SSE.
2492 * \param[in] x Pointer to struct holding all the data for
2493 * the current macroblock.
2494 * \param[in] bsize The current block_size.
2495 * \param[in] args The args to handle_inter_mode, used to track
2496 * the best SSE.
2497 * \param[in] prune_zero_mv_with_sse The argument holds speed feature
2498 * prune_zero_mv_with_sse value
2499 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2500 */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args,int prune_zero_mv_with_sse)2501 static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2502 const MACROBLOCK *x, BLOCK_SIZE bsize,
2503 const HandleInterModeArgs *args,
2504 int prune_zero_mv_with_sse) {
2505 const MACROBLOCKD *xd = &x->e_mbd;
2506 const MB_MODE_INFO *mbmi = xd->mi[0];
2507
2508 const int is_comp_pred = has_second_ref(mbmi);
2509 const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2510
2511 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2512 if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2513 // Pruning logic only works for IDENTITY type models
2514 // Note: In theory we could apply similar logic for TRANSLATION
2515 // type models, but we do not code these due to a spec bug
2516 // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2517 assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2518 return 0;
2519 }
2520
2521 // Don't prune if we have invalid data
2522 assert(mbmi->mv[idx].as_int == 0);
2523 if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2524 return 0;
2525 }
2526 }
2527
2528 // Sum up the sse of ZEROMV and best NEWMV
2529 unsigned int this_sse_sum = 0;
2530 unsigned int best_sse_sum = 0;
2531 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2532 const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2533 const struct macroblockd_plane *pd = xd->plane;
2534 const struct buf_2d *src_buf = &p->src;
2535 const struct buf_2d *ref_buf = &pd->pre[idx];
2536 const uint8_t *src = src_buf->buf;
2537 const uint8_t *ref = ref_buf->buf;
2538 const int src_stride = src_buf->stride;
2539 const int ref_stride = ref_buf->stride;
2540
2541 unsigned int this_sse;
2542 fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2543 this_sse_sum += this_sse;
2544
2545 const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2546 best_sse_sum += best_sse;
2547 }
2548
2549 const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2550 if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2551 return 1;
2552 }
2553
2554 return 0;
2555 }
2556
2557 /*!\brief Searches for interpolation filter in realtime mode during winner eval
2558 *
2559 * \ingroup inter_mode_search
2560 *
2561 * Does a simple interpolation filter search during winner mode evaluation. This
2562 * is currently only used by realtime mode as \ref
2563 * av1_interpolation_filter_search is not called during realtime encoding.
2564 *
2565 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2566 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2567 * higher res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2568 * *
2569 * \param[in] cpi Pointer to the compressor. Used for feature
2570 * flags.
2571 * \param[in,out] x Pointer to macroblock. This is primarily
2572 * used to access the buffers.
2573 * \param[in] mi_row The current row in mi unit (4X4 pixels).
2574 * \param[in] mi_col The current col in mi unit (4X4 pixels).
2575 * \param[in] bsize The current block_size.
2576 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2577 */
fast_interp_search(const AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize)2578 static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2579 int mi_row, int mi_col,
2580 BLOCK_SIZE bsize) {
2581 static const InterpFilters filters_ref_set[3] = {
2582 { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2583 { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2584 { MULTITAP_SHARP, MULTITAP_SHARP }
2585 };
2586
2587 const AV1_COMMON *const cm = &cpi->common;
2588 MACROBLOCKD *const xd = &x->e_mbd;
2589 MB_MODE_INFO *const mi = xd->mi[0];
2590 int64_t best_cost = INT64_MAX;
2591 int best_filter_index = -1;
2592 // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2593 const int num_planes = av1_num_planes(cm);
2594 const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2595 assert(is_inter_mode(mi->mode));
2596 assert(mi->motion_mode == SIMPLE_TRANSLATION);
2597 assert(!is_inter_compound_mode(mi->mode));
2598
2599 if (!av1_is_interp_needed(xd)) {
2600 return false;
2601 }
2602
2603 struct macroblockd_plane *pd = xd->plane;
2604 const BUFFER_SET orig_dst = {
2605 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2606 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2607 };
2608 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2609 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2610 tmp_buf + 2 * MAX_SB_SQUARE },
2611 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2612 const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2613
2614 for (int i = 0; i < 3; ++i) {
2615 if (is_240p_or_lesser) {
2616 if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2617 continue;
2618 }
2619 } else {
2620 if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2621 continue;
2622 }
2623 }
2624 int64_t cost;
2625 RD_STATS tmp_rd = { 0 };
2626
2627 mi->interp_filters.as_filters = filters_ref_set[i];
2628 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2629
2630 model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2631 ? MODELRD_LEGACY
2632 : MODELRD_TYPE_INTERP_FILTER](
2633 cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2634 &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2635
2636 tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2637 cm->seq_params->enable_dual_filter);
2638 cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2639 if (cost < best_cost) {
2640 best_filter_index = i;
2641 best_cost = cost;
2642 swap_dst_buf(xd, dst_bufs, num_planes);
2643 }
2644 }
2645 assert(best_filter_index >= 0);
2646
2647 mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2648
2649 const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2650
2651 if (is_best_pred_in_orig) {
2652 swap_dst_buf(xd, dst_bufs, num_planes);
2653 } else {
2654 // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2655 // is_best_pred_in_orig is false, that means the current buffer is the
2656 // original one.
2657 assert(&orig_dst == dst_bufs[0]);
2658 assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2659 const int width = block_size_wide[bsize];
2660 const int height = block_size_high[bsize];
2661 #if CONFIG_AV1_HIGHBITDEPTH
2662 const bool is_hbd = is_cur_buf_hbd(xd);
2663 if (is_hbd) {
2664 aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2665 tmp_dst.stride[AOM_PLANE_Y],
2666 CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2667 orig_dst.stride[AOM_PLANE_Y], width, height);
2668 } else {
2669 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2670 orig_dst.plane[AOM_PLANE_Y],
2671 orig_dst.stride[AOM_PLANE_Y], width, height);
2672 }
2673 #else
2674 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2675 orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2676 width, height);
2677 #endif
2678 }
2679
2680 // Build the YUV predictor.
2681 if (num_planes > 1) {
2682 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2683 AOM_PLANE_U, AOM_PLANE_V);
2684 }
2685
2686 return true;
2687 }
2688
2689 /*!\brief AV1 inter mode RD computation
2690 *
2691 * \ingroup inter_mode_search
2692 * Do the RD search for a given inter mode and compute all information relevant
2693 * to the input mode. It will compute the best MV,
2694 * compound parameters (if the mode is a compound mode) and interpolation filter
2695 * parameters.
2696 *
2697 * \param[in] cpi Top-level encoder structure.
2698 * \param[in] tile_data Pointer to struct holding adaptive
2699 * data/contexts/models for the tile during
2700 * encoding.
2701 * \param[in] x Pointer to structure holding all the data
2702 * for the current macroblock.
2703 * \param[in] bsize Current block size.
2704 * \param[in,out] rd_stats Struct to keep track of the overall RD
2705 * information.
2706 * \param[in,out] rd_stats_y Struct to keep track of the RD information
2707 * for only the Y plane.
2708 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
2709 * for only the UV planes.
2710 * \param[in] args HandleInterModeArgs struct holding
2711 * miscellaneous arguments for inter mode
2712 * search. See the documentation for this
2713 * struct for a description of each member.
2714 * \param[in] ref_best_rd Best RD found so far for this block.
2715 * It is used for early termination of this
2716 * search if the RD exceeds this value.
2717 * \param[in] tmp_buf Temporary buffer used to hold predictors
2718 * built in this search.
2719 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2720 * allocated buffers for the compound
2721 * predictors and masks in the compound type
2722 * search.
2723 * \param[in,out] best_est_rd Estimated RD for motion mode search if
2724 * do_tx_search (see below) is 0.
2725 * \param[in] do_tx_search Parameter to indicate whether or not to do
2726 * a full transform search. This will compute
2727 * an estimated RD for the modes without the
2728 * transform search and later perform the full
2729 * transform search on the best candidates.
2730 * \param[in,out] inter_modes_info InterModesInfo struct to hold inter mode
2731 * information to perform a full transform
2732 * search only on winning candidates searched
2733 * with an estimate for transform coding RD.
2734 * \param[in,out] motion_mode_cand A motion_mode_candidate struct to store
2735 * motion mode information used in a speed
2736 * feature to search motion modes other than
2737 * SIMPLE_TRANSLATION only on winning
2738 * candidates.
2739 * \param[in,out] skip_rd A length 2 array, where skip_rd[0] is the
2740 * best total RD for a skip mode so far, and
2741 * skip_rd[1] is the best RD for a skip mode so
2742 * far in luma. This is used as a speed feature
2743 * to skip the transform search if the computed
2744 * skip RD for the current mode is not better
2745 * than the best skip_rd so far.
2746 * \param[in] inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2747 * narrow down the search based on data
2748 * collected in the TPL model.
2749 * \param[out] yrd Stores the rdcost corresponding to encoding
2750 * the luma plane.
2751 *
2752 * \return The RD cost for the mode being searched.
2753 */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2754 static int64_t handle_inter_mode(
2755 AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2756 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2757 RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2758 uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2759 int64_t *best_est_rd, const int do_tx_search,
2760 InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2761 int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2762 int64_t *yrd) {
2763 const AV1_COMMON *cm = &cpi->common;
2764 const int num_planes = av1_num_planes(cm);
2765 MACROBLOCKD *xd = &x->e_mbd;
2766 MB_MODE_INFO *mbmi = xd->mi[0];
2767 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2768 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2769 const int is_comp_pred = has_second_ref(mbmi);
2770 const PREDICTION_MODE this_mode = mbmi->mode;
2771
2772 #if CONFIG_REALTIME_ONLY
2773 const int prune_modes_based_on_tpl = 0;
2774 #else // CONFIG_REALTIME_ONLY
2775 const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2776 const int prune_modes_based_on_tpl =
2777 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2778 av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2779 #endif // CONFIG_REALTIME_ONLY
2780 int i;
2781 // Reference frames for this mode
2782 const int refs[2] = { mbmi->ref_frame[0],
2783 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2784 int rate_mv = 0;
2785 int64_t rd = INT64_MAX;
2786 // Do first prediction into the destination buffer. Do the next
2787 // prediction into a temporary buffer. Then keep track of which one
2788 // of these currently holds the best predictor, and use the other
2789 // one for future predictions. In the end, copy from tmp_buf to
2790 // dst if necessary.
2791 struct macroblockd_plane *pd = xd->plane;
2792 const BUFFER_SET orig_dst = {
2793 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2794 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2795 };
2796 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2797 tmp_buf + 2 * MAX_SB_SQUARE },
2798 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2799
2800 int64_t ret_val = INT64_MAX;
2801 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2802 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2803 int64_t best_rd = INT64_MAX;
2804 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2805 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2806 int64_t best_yrd = INT64_MAX;
2807 MB_MODE_INFO best_mbmi = *mbmi;
2808 int best_xskip_txfm = 0;
2809 int64_t newmv_ret_val = INT64_MAX;
2810 inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2811
2812 // Do not prune the mode based on inter cost from tpl if the current ref frame
2813 // is the winner ref in neighbouring blocks.
2814 int ref_match_found_in_above_nb = 0;
2815 int ref_match_found_in_left_nb = 0;
2816 if (prune_modes_based_on_tpl) {
2817 ref_match_found_in_above_nb =
2818 find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2819 ref_match_found_in_left_nb =
2820 find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2821 }
2822
2823 // First, perform a simple translation search for each of the indices. If
2824 // an index performs well, it will be fully searched in the main loop
2825 // of this function.
2826 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2827 // Save MV results from first 2 ref_mv_idx.
2828 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2829 int best_ref_mv_idx = -1;
2830 const int idx_mask =
2831 ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2832 const int16_t mode_ctx =
2833 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2834 const ModeCosts *mode_costs = &x->mode_costs;
2835 const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2836 const int base_rate =
2837 args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2838
2839 for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2840 save_mv[i][0].as_int = INVALID_MV;
2841 save_mv[i][1].as_int = INVALID_MV;
2842 }
2843 args->start_mv_cnt = 0;
2844
2845 // Main loop of this function. This will iterate over all of the ref mvs
2846 // in the dynamic reference list and do the following:
2847 // 1.) Get the current MV. Create newmv MV if necessary
2848 // 2.) Search compound type and parameters if applicable
2849 // 3.) Do interpolation filter search
2850 // 4.) Build the inter predictor
2851 // 5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2852 // WARPED_CAUSAL)
2853 // 6.) Update stats if best so far
2854 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2855 mbmi->ref_mv_idx = ref_mv_idx;
2856
2857 mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2858 mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2859 const int drl_cost = get_drl_cost(
2860 mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2861 mode_info[ref_mv_idx].drl_cost = drl_cost;
2862 mode_info[ref_mv_idx].skip = 0;
2863
2864 if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2865 // MV did not perform well in simple translation search. Skip it.
2866 continue;
2867 }
2868 if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2869 !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2870 // Skip mode if TPL model indicates it will not be beneficial.
2871 if (prune_modes_based_on_tpl_stats(
2872 inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2873 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2874 continue;
2875 }
2876 av1_init_rd_stats(rd_stats);
2877
2878 // Initialize compound mode data
2879 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2880 mbmi->comp_group_idx = 0;
2881 mbmi->compound_idx = 1;
2882 if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2883
2884 mbmi->num_proj_ref = 0;
2885 mbmi->motion_mode = SIMPLE_TRANSLATION;
2886
2887 // Compute cost for signalling this DRL index
2888 rd_stats->rate = base_rate;
2889 rd_stats->rate += drl_cost;
2890
2891 int rs = 0;
2892 int compmode_interinter_cost = 0;
2893
2894 int_mv cur_mv[2];
2895
2896 // TODO(Cherma): Extend this speed feature to support compound mode
2897 int skip_repeated_ref_mv =
2898 is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2899 // Generate the current mv according to the prediction mode
2900 if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2901 continue;
2902 }
2903
2904 // The above call to build_cur_mv does not handle NEWMV modes. Build
2905 // the mv here if we have NEWMV for any predictors.
2906 if (have_newmv_in_inter_mode(this_mode)) {
2907 #if CONFIG_COLLECT_COMPONENT_TIMING
2908 start_timing(cpi, handle_newmv_time);
2909 #endif
2910 newmv_ret_val =
2911 handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2912 #if CONFIG_COLLECT_COMPONENT_TIMING
2913 end_timing(cpi, handle_newmv_time);
2914 #endif
2915
2916 if (newmv_ret_val != 0) continue;
2917
2918 if (is_inter_singleref_mode(this_mode) &&
2919 cur_mv[0].as_int != INVALID_MV) {
2920 const MV_REFERENCE_FRAME ref = refs[0];
2921 const unsigned int this_sse = x->pred_sse[ref];
2922 if (this_sse < args->best_single_sse_in_refs[ref]) {
2923 args->best_single_sse_in_refs[ref] = this_sse;
2924 }
2925
2926 if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2927 const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
2928 const int pix_idx = num_pels_log2_lookup[bsize] - 4;
2929 const double scale_factor[3][11] = {
2930 { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
2931 { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
2932 { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
2933 };
2934 assert(pix_idx >= 0);
2935 assert(th_idx <= 2);
2936 if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
2937 continue;
2938 }
2939 }
2940
2941 rd_stats->rate += rate_mv;
2942 }
2943 // Copy the motion vector for this mode into mbmi struct
2944 for (i = 0; i < is_comp_pred + 1; ++i) {
2945 mbmi->mv[i].as_int = cur_mv[i].as_int;
2946 }
2947
2948 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2949 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2950 continue;
2951 }
2952
2953 // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2954 // is enabled, and the current MV is similar to a previous one.
2955 if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2956 prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2957 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2958 continue;
2959
2960 if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2961 (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2962 if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2963 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2964 continue;
2965 }
2966 }
2967
2968 int skip_build_pred = 0;
2969 const int mi_row = xd->mi_row;
2970 const int mi_col = xd->mi_col;
2971
2972 // Handle a compound predictor, continue if it is determined this
2973 // cannot be the best compound mode
2974 if (is_comp_pred) {
2975 #if CONFIG_COLLECT_COMPONENT_TIMING
2976 start_timing(cpi, compound_type_rd_time);
2977 #endif
2978 const int not_best_mode = process_compound_inter_mode(
2979 cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2980 rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2981 &skip_build_pred);
2982 #if CONFIG_COLLECT_COMPONENT_TIMING
2983 end_timing(cpi, compound_type_rd_time);
2984 #endif
2985 if (not_best_mode) continue;
2986 }
2987
2988 if (!args->skip_ifs) {
2989 #if CONFIG_COLLECT_COMPONENT_TIMING
2990 start_timing(cpi, interpolation_filter_search_time);
2991 #endif
2992 // Determine the interpolation filter for this mode
2993 ret_val = av1_interpolation_filter_search(
2994 x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2995 &skip_build_pred, args, ref_best_rd);
2996 #if CONFIG_COLLECT_COMPONENT_TIMING
2997 end_timing(cpi, interpolation_filter_search_time);
2998 #endif
2999 if (args->modelled_rd != NULL && !is_comp_pred) {
3000 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3001 }
3002 if (ret_val != 0) {
3003 restore_dst_buf(xd, orig_dst, num_planes);
3004 continue;
3005 } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3006 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3007 restore_dst_buf(xd, orig_dst, num_planes);
3008 continue;
3009 }
3010
3011 // Compute modelled RD if enabled
3012 if (args->modelled_rd != NULL) {
3013 if (is_comp_pred) {
3014 const int mode0 = compound_ref0_mode(this_mode);
3015 const int mode1 = compound_ref1_mode(this_mode);
3016 const int64_t mrd =
3017 AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3018 args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3019 if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3020 restore_dst_buf(xd, orig_dst, num_planes);
3021 continue;
3022 }
3023 }
3024 }
3025 }
3026
3027 rd_stats->rate += compmode_interinter_cost;
3028 if (skip_build_pred != 1) {
3029 // Build this inter predictor if it has not been previously built
3030 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3031 av1_num_planes(cm) - 1);
3032 }
3033
3034 #if CONFIG_COLLECT_COMPONENT_TIMING
3035 start_timing(cpi, motion_mode_rd_time);
3036 #endif
3037 int rate2_nocoeff = rd_stats->rate;
3038 // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3039 // OBMC_CAUSAL or WARPED_CAUSAL
3040 int64_t this_yrd;
3041 ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3042 rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3043 &orig_dst, best_est_rd, do_tx_search,
3044 inter_modes_info, 0, &this_yrd);
3045 #if CONFIG_COLLECT_COMPONENT_TIMING
3046 end_timing(cpi, motion_mode_rd_time);
3047 #endif
3048 assert(
3049 IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3050
3051 if (ret_val != INT64_MAX) {
3052 int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3053 const THR_MODES mode_enum = get_prediction_mode_idx(
3054 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3055 // Collect mode stats for multiwinner mode processing
3056 store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3057 rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3058 cpi->sf.winner_mode_sf.multi_winner_mode_type,
3059 do_tx_search);
3060 if (tmp_rd < best_rd) {
3061 best_yrd = this_yrd;
3062 // Update the best rd stats if we found the best mode so far
3063 best_rd_stats = *rd_stats;
3064 best_rd_stats_y = *rd_stats_y;
3065 best_rd_stats_uv = *rd_stats_uv;
3066 best_rd = tmp_rd;
3067 best_mbmi = *mbmi;
3068 best_xskip_txfm = txfm_info->skip_txfm;
3069 memcpy(best_blk_skip, txfm_info->blk_skip,
3070 sizeof(best_blk_skip[0]) * xd->height * xd->width);
3071 av1_copy_array(best_tx_type_map, xd->tx_type_map,
3072 xd->height * xd->width);
3073 motion_mode_cand->rate_mv = rate_mv;
3074 motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3075 }
3076
3077 if (tmp_rd < ref_best_rd) {
3078 ref_best_rd = tmp_rd;
3079 best_ref_mv_idx = ref_mv_idx;
3080 }
3081 }
3082 restore_dst_buf(xd, orig_dst, num_planes);
3083 }
3084
3085 if (best_rd == INT64_MAX) return INT64_MAX;
3086
3087 // re-instate status of the best choice
3088 *rd_stats = best_rd_stats;
3089 *rd_stats_y = best_rd_stats_y;
3090 *rd_stats_uv = best_rd_stats_uv;
3091 *yrd = best_yrd;
3092 *mbmi = best_mbmi;
3093 txfm_info->skip_txfm = best_xskip_txfm;
3094 assert(IMPLIES(mbmi->comp_group_idx == 1,
3095 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3096 memcpy(txfm_info->blk_skip, best_blk_skip,
3097 sizeof(best_blk_skip[0]) * xd->height * xd->width);
3098 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3099
3100 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3101
3102 return rd_stats->rdcost;
3103 }
3104
3105 /*!\brief Search for the best intrabc predictor
3106 *
3107 * \ingroup intra_mode_search
3108 * \callergraph
3109 * This function performs a motion search to find the best intrabc predictor.
3110 *
3111 * \returns Returns the best overall rdcost (including the non-intrabc modes
3112 * search before this function).
3113 */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)3114 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3115 PICK_MODE_CONTEXT *ctx,
3116 RD_STATS *rd_stats, BLOCK_SIZE bsize,
3117 int64_t best_rd) {
3118 const AV1_COMMON *const cm = &cpi->common;
3119 if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3120 !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3121 return INT64_MAX;
3122 const int num_planes = av1_num_planes(cm);
3123
3124 MACROBLOCKD *const xd = &x->e_mbd;
3125 const TileInfo *tile = &xd->tile;
3126 MB_MODE_INFO *mbmi = xd->mi[0];
3127 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3128
3129 const int mi_row = xd->mi_row;
3130 const int mi_col = xd->mi_col;
3131 const int w = block_size_wide[bsize];
3132 const int h = block_size_high[bsize];
3133 const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3134 const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3135
3136 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3137 const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3138 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3139 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3140 mbmi_ext->mode_context);
3141 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3142 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3143 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3144 int_mv nearestmv, nearmv;
3145 av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3146 0);
3147
3148 if (nearestmv.as_int == INVALID_MV) {
3149 nearestmv.as_int = 0;
3150 }
3151 if (nearmv.as_int == INVALID_MV) {
3152 nearmv.as_int = 0;
3153 }
3154
3155 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3156 if (dv_ref.as_int == 0) {
3157 av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3158 }
3159 // Ref DV should not have sub-pel.
3160 assert((dv_ref.as_mv.col & 7) == 0);
3161 assert((dv_ref.as_mv.row & 7) == 0);
3162 mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3163
3164 struct buf_2d yv12_mb[MAX_MB_PLANE];
3165 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3166 for (int i = 0; i < num_planes; ++i) {
3167 xd->plane[i].pre[0] = yv12_mb[i];
3168 }
3169
3170 enum IntrabcMotionDirection {
3171 IBC_MOTION_ABOVE,
3172 IBC_MOTION_LEFT,
3173 IBC_MOTION_DIRECTIONS
3174 };
3175
3176 MB_MODE_INFO best_mbmi = *mbmi;
3177 RD_STATS best_rdstats = *rd_stats;
3178 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3179 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3180 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3181
3182 FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3183 const SEARCH_METHODS search_method =
3184 av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3185 const search_site_config *lookahead_search_sites =
3186 cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3187 const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3188 av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3189 &dv_ref.as_mv, start_mv,
3190 lookahead_search_sites, search_method,
3191 /*fine_search_interval=*/0);
3192 const IntraBCMVCosts *const dv_costs = x->dv_costs;
3193 av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3194
3195 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3196 dir < IBC_MOTION_DIRECTIONS; ++dir) {
3197 switch (dir) {
3198 case IBC_MOTION_ABOVE:
3199 fullms_params.mv_limits.col_min =
3200 (tile->mi_col_start - mi_col) * MI_SIZE;
3201 fullms_params.mv_limits.col_max =
3202 (tile->mi_col_end - mi_col) * MI_SIZE - w;
3203 fullms_params.mv_limits.row_min =
3204 (tile->mi_row_start - mi_row) * MI_SIZE;
3205 fullms_params.mv_limits.row_max =
3206 (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3207 break;
3208 case IBC_MOTION_LEFT:
3209 fullms_params.mv_limits.col_min =
3210 (tile->mi_col_start - mi_col) * MI_SIZE;
3211 fullms_params.mv_limits.col_max =
3212 (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3213 // TODO(aconverse@google.com): Minimize the overlap between above and
3214 // left areas.
3215 fullms_params.mv_limits.row_min =
3216 (tile->mi_row_start - mi_row) * MI_SIZE;
3217 int bottom_coded_mi_edge =
3218 AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3219 fullms_params.mv_limits.row_max =
3220 (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3221 break;
3222 default: assert(0);
3223 }
3224 assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3225 assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3226 assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3227 assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3228
3229 av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3230
3231 if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3232 fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3233 continue;
3234 }
3235
3236 const int step_param = cpi->mv_search_params.mv_step_param;
3237 IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3238 int_mv best_mv, best_hash_mv;
3239 FULLPEL_MV_STATS best_mv_stats;
3240
3241 int bestsme =
3242 av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3243 &best_mv.as_fullmv, &best_mv_stats, NULL);
3244 const int hashsme = av1_intrabc_hash_search(
3245 cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3246 if (hashsme < bestsme) {
3247 best_mv = best_hash_mv;
3248 bestsme = hashsme;
3249 }
3250
3251 if (bestsme == INT_MAX) continue;
3252 const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3253 if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3254 get_fullmv_from_mv(&dv)))
3255 continue;
3256 if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3257 cm->seq_params->mib_size_log2))
3258 continue;
3259
3260 // DV should not have sub-pel.
3261 assert((dv.col & 7) == 0);
3262 assert((dv.row & 7) == 0);
3263 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3264 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3265 mbmi->use_intrabc = 1;
3266 mbmi->mode = DC_PRED;
3267 mbmi->uv_mode = UV_DC_PRED;
3268 mbmi->motion_mode = SIMPLE_TRANSLATION;
3269 mbmi->mv[0].as_mv = dv;
3270 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3271 mbmi->skip_txfm = 0;
3272 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3273 av1_num_planes(cm) - 1);
3274
3275 // TODO(aconverse@google.com): The full motion field defining discount
3276 // in MV_COST_WEIGHT is too large. Explore other values.
3277 const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3278 dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3279 const int rate_mode = x->mode_costs.intrabc_cost[1];
3280 RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3281 if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3282 &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3283 continue;
3284 rd_stats_yuv.rdcost =
3285 RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3286 if (rd_stats_yuv.rdcost < best_rd) {
3287 best_rd = rd_stats_yuv.rdcost;
3288 best_mbmi = *mbmi;
3289 best_rdstats = rd_stats_yuv;
3290 memcpy(best_blk_skip, txfm_info->blk_skip,
3291 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3292 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3293 }
3294 }
3295 *mbmi = best_mbmi;
3296 *rd_stats = best_rdstats;
3297 memcpy(txfm_info->blk_skip, best_blk_skip,
3298 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3299 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3300 #if CONFIG_RD_DEBUG
3301 mbmi->rd_stats = *rd_stats;
3302 #endif
3303 return best_rd;
3304 }
3305
3306 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3307 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3308 // the typedef will prevent doxygen from finding this function and generating
3309 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3310 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3311 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3312 struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3313 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3314 const AV1_COMMON *const cm = &cpi->common;
3315 MACROBLOCKD *const xd = &x->e_mbd;
3316 MB_MODE_INFO *const mbmi = xd->mi[0];
3317 const int num_planes = av1_num_planes(cm);
3318 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3319 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3320 uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3321 int64_t dist_y = 0, dist_uv = 0;
3322
3323 ctx->rd_stats.skip_txfm = 0;
3324 mbmi->ref_frame[0] = INTRA_FRAME;
3325 mbmi->ref_frame[1] = NONE_FRAME;
3326 mbmi->use_intrabc = 0;
3327 mbmi->mv[0].as_int = 0;
3328 mbmi->skip_mode = 0;
3329
3330 const int64_t intra_yrd =
3331 av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3332 &y_skip_txfm, bsize, best_rd, ctx);
3333
3334 // Initialize default mode evaluation params
3335 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3336
3337 if (intra_yrd < best_rd) {
3338 // Search intra modes for uv planes if needed
3339 if (num_planes > 1) {
3340 // Set up the tx variables for reproducing the y predictions in case we
3341 // need it for chroma-from-luma.
3342 if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3343 memcpy(txfm_info->blk_skip, ctx->blk_skip,
3344 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3345 av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3346 }
3347 const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3348 av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3349 &dist_uv, &uv_skip_txfm, bsize,
3350 max_uv_tx_size);
3351 }
3352
3353 // Intra block is always coded as non-skip
3354 rd_cost->rate =
3355 rate_y + rate_uv +
3356 x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3357 rd_cost->dist = dist_y + dist_uv;
3358 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3359 rd_cost->skip_txfm = 0;
3360 } else {
3361 rd_cost->rate = INT_MAX;
3362 }
3363
3364 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3365 best_rd = rd_cost->rdcost;
3366 if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3367 ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3368 memcpy(ctx->blk_skip, txfm_info->blk_skip,
3369 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3370 assert(rd_cost->rate != INT_MAX);
3371 }
3372 if (rd_cost->rate == INT_MAX) return;
3373
3374 ctx->mic = *xd->mi[0];
3375 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3376 av1_ref_frame_type(xd->mi[0]->ref_frame));
3377 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3378 }
3379
3380 static inline void calc_target_weighted_pred(
3381 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3382 const uint8_t *above, int above_stride, const uint8_t *left,
3383 int left_stride);
3384
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3385 static inline void rd_pick_skip_mode(
3386 RD_STATS *rd_cost, InterModeSearchState *search_state,
3387 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3388 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3389 const AV1_COMMON *const cm = &cpi->common;
3390 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3391 const int num_planes = av1_num_planes(cm);
3392 MACROBLOCKD *const xd = &x->e_mbd;
3393 MB_MODE_INFO *const mbmi = xd->mi[0];
3394
3395 x->compound_idx = 1; // COMPOUND_AVERAGE
3396 RD_STATS skip_mode_rd_stats;
3397 av1_invalid_rd_stats(&skip_mode_rd_stats);
3398
3399 if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3400 skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3401 return;
3402 }
3403
3404 const MV_REFERENCE_FRAME ref_frame =
3405 LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3406 const MV_REFERENCE_FRAME second_ref_frame =
3407 LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3408 const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3409 const THR_MODES mode_index =
3410 get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3411
3412 if (mode_index == THR_INVALID) {
3413 return;
3414 }
3415
3416 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3417 cpi->sf.inter_sf.disable_onesided_comp) &&
3418 cpi->all_one_sided_refs) {
3419 return;
3420 }
3421
3422 mbmi->mode = this_mode;
3423 mbmi->uv_mode = UV_DC_PRED;
3424 mbmi->ref_frame[0] = ref_frame;
3425 mbmi->ref_frame[1] = second_ref_frame;
3426 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3427 if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3428 MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3429 if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3430 mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3431 return;
3432 }
3433 av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3434 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3435 mbmi_ext->mode_context);
3436 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3437 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3438 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3439 }
3440
3441 assert(this_mode == NEAREST_NEARESTMV);
3442 if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3443 return;
3444 }
3445
3446 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3447 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3448 mbmi->comp_group_idx = 0;
3449 mbmi->compound_idx = x->compound_idx;
3450 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3451 mbmi->motion_mode = SIMPLE_TRANSLATION;
3452 mbmi->ref_mv_idx = 0;
3453 mbmi->skip_mode = mbmi->skip_txfm = 1;
3454 mbmi->palette_mode_info.palette_size[0] = 0;
3455 mbmi->palette_mode_info.palette_size[1] = 0;
3456
3457 set_default_interp_filters(mbmi, cm->features.interp_filter);
3458
3459 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3460 for (int i = 0; i < num_planes; i++) {
3461 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3462 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3463 }
3464
3465 BUFFER_SET orig_dst;
3466 for (int i = 0; i < num_planes; i++) {
3467 orig_dst.plane[i] = xd->plane[i].dst.buf;
3468 orig_dst.stride[i] = xd->plane[i].dst.stride;
3469 }
3470
3471 // Compare the use of skip_mode with the best intra/inter mode obtained.
3472 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3473 int64_t best_intra_inter_mode_cost = INT64_MAX;
3474 if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3475 const ModeCosts *mode_costs = &x->mode_costs;
3476 best_intra_inter_mode_cost = RDCOST(
3477 x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3478 rd_cost->dist);
3479 // Account for non-skip mode rate in total rd stats
3480 rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3481 av1_rd_cost_update(x->rdmult, rd_cost);
3482 }
3483
3484 // Obtain the rdcost for skip_mode.
3485 skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3486 best_intra_inter_mode_cost);
3487
3488 if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3489 (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3490 assert(mode_index != THR_INVALID);
3491 search_state->best_mbmode.skip_mode = 1;
3492 search_state->best_mbmode = *mbmi;
3493 memset(search_state->best_mbmode.inter_tx_size,
3494 search_state->best_mbmode.tx_size,
3495 sizeof(search_state->best_mbmode.inter_tx_size));
3496 set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3497 search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3498 xd);
3499 search_state->best_mode_index = mode_index;
3500
3501 // Update rd_cost
3502 rd_cost->rate = skip_mode_rd_stats.rate;
3503 rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3504 rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3505
3506 search_state->best_rd = rd_cost->rdcost;
3507 search_state->best_skip2 = 1;
3508 search_state->best_mode_skippable = 1;
3509
3510 x->txfm_search_info.skip_txfm = 1;
3511 }
3512 }
3513
3514 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3515 static inline MB_MODE_INFO *get_winner_mode_stats(
3516 MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3517 int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3518 RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3519 THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3520 int mode_idx) {
3521 MB_MODE_INFO *winner_mbmi;
3522 if (multi_winner_mode_type) {
3523 assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3524 WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3525 winner_mbmi = &winner_mode_stat->mbmi;
3526
3527 *winner_rd_cost = &winner_mode_stat->rd_cost;
3528 *winner_rate_y = winner_mode_stat->rate_y;
3529 *winner_rate_uv = winner_mode_stat->rate_uv;
3530 *winner_mode_index = winner_mode_stat->mode_index;
3531 } else {
3532 winner_mbmi = best_mbmode;
3533 *winner_rd_cost = best_rd_cost;
3534 *winner_rate_y = best_rate_y;
3535 *winner_rate_uv = best_rate_uv;
3536 *winner_mode_index = *best_mode_index;
3537 }
3538 return winner_mbmi;
3539 }
3540
3541 // speed feature: fast intra/inter transform type search
3542 // Used for speed >= 2
3543 // When this speed feature is on, in rd mode search, only DCT is used.
3544 // After the mode is determined, this function is called, to select
3545 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3546 static inline void refine_winner_mode_tx(
3547 const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3548 PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3549 MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3550 int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3551 const AV1_COMMON *const cm = &cpi->common;
3552 MACROBLOCKD *const xd = &x->e_mbd;
3553 MB_MODE_INFO *const mbmi = xd->mi[0];
3554 TxfmSearchParams *txfm_params = &x->txfm_search_params;
3555 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3556 int64_t best_rd;
3557 const int num_planes = av1_num_planes(cm);
3558
3559 if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3560 rd_cost->skip_txfm))
3561 return;
3562
3563 // Set params for winner mode evaluation
3564 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3565
3566 // No best mode identified so far
3567 if (*best_mode_index == THR_INVALID) return;
3568
3569 best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3570 for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3571 RD_STATS *winner_rd_stats = NULL;
3572 int winner_rate_y = 0, winner_rate_uv = 0;
3573 THR_MODES winner_mode_index = 0;
3574
3575 // TODO(any): Combine best mode and multi-winner mode processing paths
3576 // Get winner mode stats for current mode index
3577 MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3578 x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3579 &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3580 cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3581
3582 if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3583 winner_mode_index != THR_INVALID &&
3584 is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3585 rd_cost->skip_txfm)) {
3586 RD_STATS rd_stats = *winner_rd_stats;
3587 int skip_blk = 0;
3588 RD_STATS rd_stats_y, rd_stats_uv;
3589 const int skip_ctx = av1_get_skip_txfm_context(xd);
3590
3591 *mbmi = *winner_mbmi;
3592
3593 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3594
3595 // Select prediction reference frames.
3596 for (int i = 0; i < num_planes; i++) {
3597 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3598 if (has_second_ref(mbmi))
3599 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3600 }
3601
3602 if (is_inter_mode(mbmi->mode)) {
3603 const int mi_row = xd->mi_row;
3604 const int mi_col = xd->mi_col;
3605 bool is_predictor_built = false;
3606 const PREDICTION_MODE prediction_mode = mbmi->mode;
3607 // Do interpolation filter search for realtime mode if applicable.
3608 if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3609 cpi->oxcf.mode == REALTIME &&
3610 cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3611 is_inter_mode(prediction_mode) &&
3612 mbmi->motion_mode == SIMPLE_TRANSLATION &&
3613 !is_inter_compound_mode(prediction_mode)) {
3614 is_predictor_built =
3615 fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3616 }
3617 if (!is_predictor_built) {
3618 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3619 av1_num_planes(cm) - 1);
3620 }
3621 if (mbmi->motion_mode == OBMC_CAUSAL)
3622 av1_build_obmc_inter_predictors_sb(cm, xd);
3623
3624 av1_subtract_plane(x, bsize, 0);
3625 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3626 !xd->lossless[mbmi->segment_id]) {
3627 av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3628 INT64_MAX);
3629 assert(rd_stats_y.rate != INT_MAX);
3630 } else {
3631 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3632 INT64_MAX);
3633 memset(mbmi->inter_tx_size, mbmi->tx_size,
3634 sizeof(mbmi->inter_tx_size));
3635 for (int i = 0; i < xd->height * xd->width; ++i)
3636 set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3637 }
3638 } else {
3639 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3640 INT64_MAX);
3641 }
3642
3643 if (num_planes > 1) {
3644 av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3645 } else {
3646 av1_init_rd_stats(&rd_stats_uv);
3647 }
3648
3649 const ModeCosts *mode_costs = &x->mode_costs;
3650 if (is_inter_mode(mbmi->mode) &&
3651 RDCOST(x->rdmult,
3652 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3653 rd_stats_uv.rate,
3654 (rd_stats_y.dist + rd_stats_uv.dist)) >
3655 RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3656 (rd_stats_y.sse + rd_stats_uv.sse))) {
3657 skip_blk = 1;
3658 rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3659 rd_stats_uv.rate = 0;
3660 rd_stats_y.dist = rd_stats_y.sse;
3661 rd_stats_uv.dist = rd_stats_uv.sse;
3662 } else {
3663 skip_blk = 0;
3664 rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3665 }
3666 int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3667 winner_rate_y - winner_rate_uv;
3668 int64_t this_rd =
3669 RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3670 if (best_rd > this_rd) {
3671 *best_mbmode = *mbmi;
3672 *best_mode_index = winner_mode_index;
3673 av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3674 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3675 rd_cost->rate = this_rate;
3676 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3677 rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3678 rd_cost->rdcost = this_rd;
3679 best_rd = this_rd;
3680 *best_skip2 = skip_blk;
3681 }
3682 }
3683 }
3684 }
3685
3686 /*!\cond */
3687 typedef struct {
3688 // Mask for each reference frame, specifying which prediction modes to NOT try
3689 // during search.
3690 uint32_t pred_modes[REF_FRAMES];
3691 // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3692 // reference frames (i, j).
3693 // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3694 // (NONE_FRAME).
3695 bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3696 } mode_skip_mask_t;
3697 /*!\endcond */
3698
3699 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3700 static inline void disable_reference(
3701 MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3702 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3703 ref_combo[ref][ref2 + 1] = true;
3704 }
3705 }
3706
3707 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3708 static inline void disable_inter_references_except_altref(
3709 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3710 disable_reference(LAST_FRAME, ref_combo);
3711 disable_reference(LAST2_FRAME, ref_combo);
3712 disable_reference(LAST3_FRAME, ref_combo);
3713 disable_reference(GOLDEN_FRAME, ref_combo);
3714 disable_reference(BWDREF_FRAME, ref_combo);
3715 disable_reference(ALTREF2_FRAME, ref_combo);
3716 }
3717
3718 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3719 { LAST_FRAME, NONE_FRAME }, { ALTREF_FRAME, NONE_FRAME },
3720 { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, NONE_FRAME },
3721 { INTRA_FRAME, NONE_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
3722 { LAST_FRAME, GOLDEN_FRAME }, { LAST_FRAME, INTRA_FRAME },
3723 { LAST_FRAME, BWDREF_FRAME }, { LAST_FRAME, LAST3_FRAME },
3724 { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3725 { BWDREF_FRAME, NONE_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
3726 { ALTREF_FRAME, INTRA_FRAME }, { BWDREF_FRAME, INTRA_FRAME },
3727 };
3728
3729 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3730
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3731 static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
3732 if (ref_set == REF_SET_FULL) {
3733 // Everything available by default.
3734 memset(mask, 0, sizeof(*mask));
3735 } else {
3736 // All modes available by default.
3737 memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3738 // All references disabled first.
3739 for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3740 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3741 mask->ref_combo[ref1][ref2 + 1] = true;
3742 }
3743 }
3744 const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3745 int num_ref_combos;
3746
3747 // Then enable reduced set of references explicitly.
3748 switch (ref_set) {
3749 case REF_SET_REDUCED:
3750 ref_set_combos = reduced_ref_combos;
3751 num_ref_combos =
3752 (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3753 break;
3754 case REF_SET_REALTIME:
3755 ref_set_combos = real_time_ref_combos;
3756 num_ref_combos =
3757 (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3758 break;
3759 default: assert(0); num_ref_combos = 0;
3760 }
3761
3762 for (int i = 0; i < num_ref_combos; ++i) {
3763 const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3764 mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3765 }
3766 }
3767 }
3768
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3769 static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
3770 const AV1_COMP *cpi, MACROBLOCK *x,
3771 BLOCK_SIZE bsize) {
3772 const AV1_COMMON *const cm = &cpi->common;
3773 const struct segmentation *const seg = &cm->seg;
3774 MACROBLOCKD *const xd = &x->e_mbd;
3775 MB_MODE_INFO *const mbmi = xd->mi[0];
3776 unsigned char segment_id = mbmi->segment_id;
3777 const SPEED_FEATURES *const sf = &cpi->sf;
3778 const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
3779 REF_SET ref_set = REF_SET_FULL;
3780
3781 if (sf->rt_sf.use_real_time_ref_set)
3782 ref_set = REF_SET_REALTIME;
3783 else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3784 ref_set = REF_SET_REDUCED;
3785
3786 default_skip_mask(mask, ref_set);
3787
3788 int min_pred_mv_sad = INT_MAX;
3789 MV_REFERENCE_FRAME ref_frame;
3790 if (ref_set == REF_SET_REALTIME) {
3791 // For real-time encoding, we only look at a subset of ref frames. So the
3792 // threshold for pruning should be computed from this subset as well.
3793 const int num_rt_refs =
3794 sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3795 for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3796 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3797 if (ref != INTRA_FRAME) {
3798 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3799 }
3800 }
3801 } else {
3802 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3803 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3804 }
3805
3806 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3807 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3808 // Skip checking missing reference in both single and compound reference
3809 // modes.
3810 disable_reference(ref_frame, mask->ref_combo);
3811 } else {
3812 // Skip fixed mv modes for poor references
3813 if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3814 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3815 }
3816 }
3817 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3818 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3819 // Reference not used for the segment.
3820 disable_reference(ref_frame, mask->ref_combo);
3821 }
3822 }
3823 // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3824 // is disabled for this segment. This is to prevent the possibility that we
3825 // end up unable to pick any mode.
3826 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3827 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3828 // unless ARNR filtering is enabled in which case we want
3829 // an unfiltered alternative. We allow near/nearest as well
3830 // because they may result in zero-zero MVs but be cheaper.
3831 if (cpi->rc.is_src_frame_alt_ref &&
3832 (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3833 disable_inter_references_except_altref(mask->ref_combo);
3834
3835 mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3836 const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3837 int_mv near_mv, nearest_mv, global_mv;
3838 get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3839 &x->mbmi_ext);
3840 get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3841 get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3842
3843 if (near_mv.as_int != global_mv.as_int)
3844 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3845 if (nearest_mv.as_int != global_mv.as_int)
3846 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3847 }
3848 }
3849
3850 if (cpi->rc.is_src_frame_alt_ref) {
3851 if (inter_sf->alt_ref_search_fp &&
3852 (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3853 mask->pred_modes[ALTREF_FRAME] = 0;
3854 disable_inter_references_except_altref(mask->ref_combo);
3855 disable_reference(INTRA_FRAME, mask->ref_combo);
3856 }
3857 }
3858
3859 if (inter_sf->alt_ref_search_fp) {
3860 if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
3861 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
3862 // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3863 // those are past frames
3864 MV_REFERENCE_FRAME start_frame =
3865 inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3866 for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3867 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3868 0) {
3869 // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3870 // to the relative dist of LAST_FRAME.
3871 if (inter_sf->alt_ref_search_fp == 1 &&
3872 (abs(cpi->ref_frame_dist_info
3873 .ref_relative_dist[ref_frame - LAST_FRAME]) >
3874 1.5 * abs(cpi->ref_frame_dist_info
3875 .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3876 continue;
3877 }
3878 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3879 mask->pred_modes[ref_frame] |= INTER_ALL;
3880 }
3881 }
3882 }
3883 }
3884
3885 if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3886 if (x->best_pred_mv_sad[0] < INT_MAX) {
3887 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
3888 const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3889
3890 // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3891 for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3892 ref_frame = prune_ref_list[ref_idx];
3893 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3894 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3895 }
3896 }
3897 }
3898
3899 if (bsize > sf->part_sf.max_intra_bsize) {
3900 disable_reference(INTRA_FRAME, mask->ref_combo);
3901 }
3902
3903 if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3904 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3905 mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3906 mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3907 }
3908 }
3909
3910 mask->pred_modes[INTRA_FRAME] |=
3911 ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3912
3913 // Prune reference frames which are not the closest to the current
3914 // frame and with large pred_mv_sad.
3915 if (inter_sf->prune_single_ref) {
3916 assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
3917 const double prune_threshes[2] = { 1.20, 1.05 };
3918
3919 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3920 const RefFrameDistanceInfo *const ref_frame_dist_info =
3921 &cpi->ref_frame_dist_info;
3922 const int is_closest_ref =
3923 (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
3924 (ref_frame == ref_frame_dist_info->nearest_future_ref);
3925
3926 if (!is_closest_ref) {
3927 const int dir =
3928 (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3929 ? 0
3930 : 1;
3931 if (x->best_pred_mv_sad[dir] < INT_MAX &&
3932 x->pred_mv_sad[ref_frame] >
3933 prune_threshes[inter_sf->prune_single_ref - 1] *
3934 x->best_pred_mv_sad[dir])
3935 mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
3936 }
3937 }
3938 }
3939 }
3940
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3941 static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
3942 HandleInterModeArgs *const args,
3943 int is_hbd) {
3944 if (is_hbd) {
3945 const int len = sizeof(uint16_t);
3946 args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3947 args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3948 (MAX_SB_SQUARE >> 1) * len);
3949 args->above_pred_buf[2] =
3950 CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3951 args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3952 args->left_pred_buf[1] =
3953 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3954 args->left_pred_buf[2] =
3955 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3956 } else {
3957 args->above_pred_buf[0] = obmc_buffer->above_pred;
3958 args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3959 args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3960 args->left_pred_buf[0] = obmc_buffer->left_pred;
3961 args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3962 args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3963 }
3964 }
3965
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3966 static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3967 MV_REFERENCE_FRAME ref_frame) {
3968 const AV1_COMMON *const cm = &cpi->common;
3969 MV_REFERENCE_FRAME rf[2];
3970 av1_set_ref_frame(rf, ref_frame);
3971
3972 if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3973
3974 if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3975 cm->cur_frame->ref_display_order_hint)) {
3976 return 1;
3977 }
3978
3979 return 0;
3980 }
3981
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3982 static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
3983 int skip_ref_frame_mask) {
3984 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3985 if (!(skip_ref_frame_mask & (1 << r))) {
3986 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3987 if (rf[0] == ref_frame || rf[1] == ref_frame) {
3988 return 1;
3989 }
3990 }
3991 }
3992 return 0;
3993 }
3994
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3995 static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3996 const MB_MODE_INFO *mi_cache) {
3997 if (!mi_cache) {
3998 return 0;
3999 }
4000
4001 if (ref_frame < REF_FRAMES) {
4002 return (ref_frame == mi_cache->ref_frame[0] ||
4003 ref_frame == mi_cache->ref_frame[1]);
4004 }
4005
4006 // if we are here, then the current mode is compound.
4007 MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4008 return ref_frame == cached_ref_type;
4009 }
4010
4011 // Please add/modify parameter setting in this function, making it consistent
4012 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])4013 static inline void set_params_rd_pick_inter_mode(
4014 const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4015 BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4016 unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4017 struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4018 const AV1_COMMON *const cm = &cpi->common;
4019 MACROBLOCKD *const xd = &x->e_mbd;
4020 MB_MODE_INFO *const mbmi = xd->mi[0];
4021 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4022 unsigned char segment_id = mbmi->segment_id;
4023
4024 init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4025 av1_collect_neighbors_ref_counts(xd);
4026 estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4027 ref_costs_comp);
4028
4029 const int mi_row = xd->mi_row;
4030 const int mi_col = xd->mi_col;
4031 x->best_pred_mv_sad[0] = INT_MAX;
4032 x->best_pred_mv_sad[1] = INT_MAX;
4033
4034 for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4035 ++ref_frame) {
4036 x->pred_mv_sad[ref_frame] = INT_MAX;
4037 mbmi_ext->mode_context[ref_frame] = 0;
4038 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4039 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4040 // Skip the ref frame if the mask says skip and the ref is not used by
4041 // compound ref.
4042 if (skip_ref_frame_mask & (1 << ref_frame) &&
4043 !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4044 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4045 continue;
4046 }
4047 assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4048 setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4049 }
4050 if (cpi->sf.inter_sf.alt_ref_search_fp ||
4051 cpi->sf.inter_sf.prune_single_ref ||
4052 cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4053 // Store the best pred_mv_sad across all past frames
4054 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4055 0)
4056 x->best_pred_mv_sad[0] =
4057 AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4058 else
4059 // Store the best pred_mv_sad across all future frames
4060 x->best_pred_mv_sad[1] =
4061 AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4062 }
4063 }
4064
4065 if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4066 // No second reference on RT ref set, so no need to initialize
4067 for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4068 ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4069 mbmi_ext->mode_context[ref_frame] = 0;
4070 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4071 const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4072 if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4073 (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4074 continue;
4075 }
4076
4077 if (skip_ref_frame_mask & (1 << ref_frame) &&
4078 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4079 continue;
4080 }
4081 // Ref mv list population is not required, when compound references are
4082 // pruned.
4083 if (prune_ref_frame(cpi, x, ref_frame)) continue;
4084
4085 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4086 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4087 mbmi_ext->mode_context);
4088 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4089 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4090 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4091 }
4092 }
4093
4094 av1_count_overlappable_neighbors(cm, xd);
4095 const FRAME_UPDATE_TYPE update_type =
4096 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4097 int use_actual_frame_probs = 1;
4098 int prune_obmc;
4099 #if CONFIG_FPMT_TEST
4100 use_actual_frame_probs =
4101 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4102 if (!use_actual_frame_probs) {
4103 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4104 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4105 }
4106 #endif
4107 if (use_actual_frame_probs) {
4108 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4109 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4110 }
4111 if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4112 if (check_num_overlappable_neighbors(mbmi) &&
4113 is_motion_variation_allowed_bsize(bsize)) {
4114 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4115 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4116 MAX_SB_SIZE >> 1 };
4117 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4118 MAX_SB_SIZE >> 1 };
4119 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4120 av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4121 dst_width1, dst_height1,
4122 args->above_pred_stride);
4123 av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4124 dst_width2, dst_height2,
4125 args->left_pred_stride);
4126 const int num_planes = av1_num_planes(cm);
4127 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4128 mi_col, 0, num_planes);
4129 calc_target_weighted_pred(
4130 cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4131 args->left_pred_buf[0], args->left_pred_stride[0]);
4132 }
4133 }
4134
4135 init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4136
4137 // Set params for mode evaluation
4138 set_mode_eval_params(cpi, x, MODE_EVAL);
4139
4140 x->comp_rd_stats_idx = 0;
4141
4142 for (int idx = 0; idx < REF_FRAMES; idx++) {
4143 args->best_single_sse_in_refs[idx] = INT32_MAX;
4144 }
4145 }
4146
init_single_inter_mode_search_state(InterModeSearchState * search_state)4147 static inline void init_single_inter_mode_search_state(
4148 InterModeSearchState *search_state) {
4149 for (int dir = 0; dir < 2; ++dir) {
4150 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4151 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4152 SingleInterModeState *state;
4153
4154 state = &search_state->single_state[dir][mode][ref_frame];
4155 state->ref_frame = NONE_FRAME;
4156 state->rd = INT64_MAX;
4157
4158 state = &search_state->single_state_modelled[dir][mode][ref_frame];
4159 state->ref_frame = NONE_FRAME;
4160 state->rd = INT64_MAX;
4161
4162 search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4163 }
4164 }
4165 }
4166
4167 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4168 search_state->best_single_rd[ref_frame] = INT64_MAX;
4169 search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4170 }
4171 av1_zero(search_state->single_state_cnt);
4172 av1_zero(search_state->single_state_modelled_cnt);
4173 }
4174
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)4175 static inline void init_inter_mode_search_state(
4176 InterModeSearchState *search_state, const AV1_COMP *cpi,
4177 const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4178 init_intra_mode_search_state(&search_state->intra_search_state);
4179 av1_invalid_rd_stats(&search_state->best_y_rdcost);
4180
4181 search_state->best_rd = best_rd_so_far;
4182 search_state->best_skip_rd[0] = INT64_MAX;
4183 search_state->best_skip_rd[1] = INT64_MAX;
4184
4185 av1_zero(search_state->best_mbmode);
4186
4187 search_state->best_rate_y = INT_MAX;
4188
4189 search_state->best_rate_uv = INT_MAX;
4190
4191 search_state->best_mode_skippable = 0;
4192
4193 search_state->best_skip2 = 0;
4194
4195 search_state->best_mode_index = THR_INVALID;
4196
4197 const MACROBLOCKD *const xd = &x->e_mbd;
4198 const MB_MODE_INFO *const mbmi = xd->mi[0];
4199 const unsigned char segment_id = mbmi->segment_id;
4200
4201 search_state->num_available_refs = 0;
4202 memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4203 memset(search_state->dist_order_refs, -1,
4204 sizeof(search_state->dist_order_refs));
4205
4206 for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4207 search_state->mode_threshold[i] = 0;
4208 const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4209 for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4210 search_state->mode_threshold[i] =
4211 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4212 RD_THRESH_FAC_FRAC_BITS;
4213
4214 search_state->best_intra_rd = INT64_MAX;
4215
4216 search_state->best_pred_sse = UINT_MAX;
4217
4218 av1_zero(search_state->single_newmv);
4219 av1_zero(search_state->single_newmv_rate);
4220 av1_zero(search_state->single_newmv_valid);
4221 for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4222 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4223 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4224 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4225 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4226 }
4227 }
4228 }
4229
4230 for (int i = 0; i < REFERENCE_MODES; ++i) {
4231 search_state->best_pred_rd[i] = INT64_MAX;
4232 }
4233
4234 if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4235 for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4236 search_state->mode_threshold[i] =
4237 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4238 RD_THRESH_FAC_FRAC_BITS;
4239
4240 for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4241 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4242 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4243 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4244 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4245 }
4246 }
4247 }
4248
4249 init_single_inter_mode_search_state(search_state);
4250 }
4251 }
4252
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)4253 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4254 const MV_REFERENCE_FRAME *ref_frame,
4255 const PREDICTION_MODE this_mode) {
4256 if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4257 return true;
4258 }
4259
4260 return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4261 }
4262
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)4263 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4264 BLOCK_SIZE bsize,
4265 PREDICTION_MODE curr_mode,
4266 const MV_REFERENCE_FRAME *ref_frames) {
4267 const int comp_pred = ref_frames[1] > INTRA_FRAME;
4268 if (comp_pred) {
4269 if (!is_comp_ref_allowed(bsize)) return 1;
4270 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4271 return 1;
4272 }
4273
4274 const AV1_COMMON *const cm = &cpi->common;
4275 if (frame_is_intra_only(cm)) return 1;
4276
4277 const CurrentFrame *const current_frame = &cm->current_frame;
4278 if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4279
4280 const struct segmentation *const seg = &cm->seg;
4281 const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4282 // Do not allow compound prediction if the segment level reference frame
4283 // feature is in use as in this case there can only be one reference.
4284 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4285 }
4286
4287 if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4288 // Mode must be compatible
4289 if (!is_interintra_allowed_bsize(bsize)) return 1;
4290 if (!is_interintra_allowed_mode(curr_mode)) return 1;
4291 }
4292
4293 return 0;
4294 }
4295
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4296 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4297 BLOCK_SIZE bsize, int mib_size) {
4298 const int sb_size_mask = mib_size - 1;
4299 const MACROBLOCKD *const xd = &x->e_mbd;
4300 const int mi_row = xd->mi_row;
4301 const int mi_col = xd->mi_col;
4302 const int mi_row_in_sb = mi_row & sb_size_mask;
4303 const int mi_col_in_sb = mi_col & sb_size_mask;
4304 const int mi_w = mi_size_wide[bsize];
4305 const int mi_h = mi_size_high[bsize];
4306 int picked_ref_frames_mask = 0;
4307 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4308 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4309 picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4310 }
4311 }
4312 return picked_ref_frames_mask;
4313 }
4314
4315 // Check if reference frame pair of the current block matches with the given
4316 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4317 static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4318 const MV_REFERENCE_FRAME *ref_frames) {
4319 return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4320 (ref_frames[1] == mbmi->ref_frame[1]));
4321 }
4322
4323 // Case 1: return 0, means don't skip this mode
4324 // Case 2: return 1, means skip this mode completely
4325 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4326 static int inter_mode_search_order_independent_skip(
4327 const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4328 InterModeSearchState *search_state, int skip_ref_frame_mask,
4329 PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4330 if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4331 return 1;
4332 }
4333
4334 const int ref_type = av1_ref_frame_type(ref_frame);
4335 if (!cpi->sf.rt_sf.use_real_time_ref_set)
4336 if (prune_ref_frame(cpi, x, ref_type)) return 1;
4337
4338 // This is only used in motion vector unit test.
4339 if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4340 ref_frame[0] == INTRA_FRAME)
4341 return 1;
4342
4343 const AV1_COMMON *const cm = &cpi->common;
4344 if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4345 return 1;
4346 }
4347
4348 // Reuse the prediction mode in cache
4349 if (x->use_mb_mode_cache) {
4350 const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4351 const PREDICTION_MODE cached_mode = cached_mi->mode;
4352 const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4353 const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4354
4355 // If the cached mode is intra, then we just need to match the mode.
4356 if (is_mode_intra(cached_mode) && mode != cached_mode) {
4357 return 1;
4358 }
4359
4360 // If the cached mode is single inter mode, then we match the mode and
4361 // reference frame.
4362 if (cached_mode_is_single) {
4363 if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4364 return 1;
4365 }
4366 } else {
4367 // If the cached mode is compound, then we need to consider several cases.
4368 const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4369 if (mode_is_single) {
4370 // If the mode is single, we know the modes can't match. But we might
4371 // still want to search it if compound mode depends on the current mode.
4372 int skip_motion_mode_only = 0;
4373 if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4374 skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4375 } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4376 skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4377 } else if (cached_mode == NEW_NEWMV) {
4378 skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4379 ref_frame[0] == cached_frame[1]);
4380 }
4381
4382 return 1 + skip_motion_mode_only;
4383 } else {
4384 // If both modes are compound, then everything must match.
4385 if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4386 ref_frame[1] != cached_frame[1]) {
4387 return 1;
4388 }
4389 }
4390 }
4391 }
4392
4393 const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4394 // If no valid mode has been found so far in PARTITION_NONE when finding a
4395 // valid partition is required, do not skip mode.
4396 if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4397 x->must_find_valid_partition)
4398 return 0;
4399
4400 const SPEED_FEATURES *const sf = &cpi->sf;
4401 // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4402 // frames
4403 if (sf->inter_sf.prune_nearmv_using_neighbors &&
4404 (mode == NEAR_NEARMV || mode == NEARMV)) {
4405 const MACROBLOCKD *const xd = &x->e_mbd;
4406 if (search_state->best_rd != INT64_MAX && xd->left_available &&
4407 xd->up_available) {
4408 const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4409 { 1, 1, 0 },
4410 { 2, 1, 0 } };
4411 const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4412
4413 assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4414 qindex_sub_range < 3);
4415 const int num_ref_frame_pair_match_thresh =
4416 thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4417 [qindex_sub_range];
4418
4419 assert(num_ref_frame_pair_match_thresh <= 2 &&
4420 num_ref_frame_pair_match_thresh >= 0);
4421 int num_ref_frame_pair_match = 0;
4422
4423 num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4424 num_ref_frame_pair_match +=
4425 match_ref_frame_pair(xd->above_mbmi, ref_frame);
4426
4427 // Pruning based on ref frame pair match with neighbors.
4428 if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4429 }
4430 }
4431
4432 int skip_motion_mode = 0;
4433 if (mbmi->partition != PARTITION_NONE) {
4434 int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4435 if (ref_type <= ALTREF_FRAME && skip_ref) {
4436 // Since the compound ref modes depends on the motion estimation result of
4437 // two single ref modes (best mv of single ref modes as the start point),
4438 // if current single ref mode is marked skip, we need to check if it will
4439 // be used in compound ref modes.
4440 if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4441 // Found a not skipped compound ref mode which contains current
4442 // single ref. So this single ref can't be skipped completely
4443 // Just skip its motion mode search, still try its simple
4444 // transition mode.
4445 skip_motion_mode = 1;
4446 skip_ref = 0;
4447 }
4448 }
4449 // If we are reusing the prediction from cache, and the current frame is
4450 // required by the cache, then we cannot prune it.
4451 if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4452 skip_ref = 0;
4453 // If the cache only needs the current reference type for compound
4454 // prediction, then we can skip motion mode search.
4455 skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4456 x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4457 }
4458 if (skip_ref) return 1;
4459 }
4460
4461 if (ref_frame[0] == INTRA_FRAME) {
4462 if (mode != DC_PRED) {
4463 // Disable intra modes other than DC_PRED for blocks with low variance
4464 // Threshold for intra skipping based on source variance
4465 // TODO(debargha): Specialize the threshold for super block sizes
4466 const unsigned int skip_intra_var_thresh = 64;
4467 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4468 x->source_variance < skip_intra_var_thresh)
4469 return 1;
4470 }
4471 }
4472
4473 if (skip_motion_mode) return 2;
4474
4475 return 0;
4476 }
4477
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4478 static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4479 const MV_REFERENCE_FRAME *ref_frames,
4480 const AV1_COMMON *cm) {
4481 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4482 mbmi->ref_mv_idx = 0;
4483 mbmi->mode = curr_mode;
4484 mbmi->uv_mode = UV_DC_PRED;
4485 mbmi->ref_frame[0] = ref_frames[0];
4486 mbmi->ref_frame[1] = ref_frames[1];
4487 pmi->palette_size[0] = 0;
4488 pmi->palette_size[1] = 0;
4489 mbmi->filter_intra_mode_info.use_filter_intra = 0;
4490 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4491 mbmi->motion_mode = SIMPLE_TRANSLATION;
4492 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4493 set_default_interp_filters(mbmi, cm->features.interp_filter);
4494 }
4495
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4496 static inline void collect_single_states(MACROBLOCK *x,
4497 InterModeSearchState *search_state,
4498 const MB_MODE_INFO *const mbmi) {
4499 int i, j;
4500 const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4501 const PREDICTION_MODE this_mode = mbmi->mode;
4502 const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4503 const int mode_offset = INTER_OFFSET(this_mode);
4504 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4505
4506 // Simple rd
4507 int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4508 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4509 const int64_t rd =
4510 search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4511 if (rd < simple_rd) simple_rd = rd;
4512 }
4513
4514 // Insertion sort of single_state
4515 const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4516 SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4517 i = search_state->single_state_cnt[dir][mode_offset];
4518 for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4519 state_s[j] = state_s[j - 1];
4520 state_s[j] = this_state_s;
4521 search_state->single_state_cnt[dir][mode_offset]++;
4522
4523 // Modelled rd
4524 int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4525 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4526 const int64_t rd =
4527 search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4528 if (rd < modelled_rd) modelled_rd = rd;
4529 }
4530
4531 // Insertion sort of single_state_modelled
4532 const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4533 SingleInterModeState *state_m =
4534 search_state->single_state_modelled[dir][mode_offset];
4535 i = search_state->single_state_modelled_cnt[dir][mode_offset];
4536 for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4537 state_m[j] = state_m[j - 1];
4538 state_m[j] = this_state_m;
4539 search_state->single_state_modelled_cnt[dir][mode_offset]++;
4540 }
4541
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4542 static inline void analyze_single_states(const AV1_COMP *cpi,
4543 InterModeSearchState *search_state) {
4544 const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4545 assert(prune_level >= 1);
4546 int i, j, dir, mode;
4547
4548 for (dir = 0; dir < 2; ++dir) {
4549 int64_t best_rd;
4550 SingleInterModeState(*state)[FWD_REFS];
4551 const int prune_factor = prune_level >= 2 ? 6 : 5;
4552
4553 // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4554 // reference frames for all the modes (NEARESTMV and NEARMV may not
4555 // have same motion vectors). Always keep the best of each mode
4556 // because it might form the best possible combination with other mode.
4557 state = search_state->single_state[dir];
4558 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4559 state[INTER_OFFSET(GLOBALMV)][0].rd);
4560 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4561 for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4562 if (state[mode][i].rd != INT64_MAX &&
4563 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4564 state[mode][i].valid = 0;
4565 }
4566 }
4567 }
4568
4569 state = search_state->single_state_modelled[dir];
4570 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4571 state[INTER_OFFSET(GLOBALMV)][0].rd);
4572 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4573 for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4574 if (state[mode][i].rd != INT64_MAX &&
4575 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4576 state[mode][i].valid = 0;
4577 }
4578 }
4579 }
4580 }
4581
4582 // Ordering by simple rd first, then by modelled rd
4583 for (dir = 0; dir < 2; ++dir) {
4584 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4585 const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4586 const int state_cnt_m =
4587 search_state->single_state_modelled_cnt[dir][mode];
4588 SingleInterModeState *state_s = search_state->single_state[dir][mode];
4589 SingleInterModeState *state_m =
4590 search_state->single_state_modelled[dir][mode];
4591 int count = 0;
4592 const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4593 for (i = 0; i < state_cnt_s; ++i) {
4594 if (state_s[i].rd == INT64_MAX) break;
4595 if (state_s[i].valid) {
4596 search_state->single_rd_order[dir][mode][count++] =
4597 state_s[i].ref_frame;
4598 }
4599 }
4600 if (count >= max_candidates) continue;
4601
4602 for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4603 if (state_m[i].rd == INT64_MAX) break;
4604 if (!state_m[i].valid) continue;
4605 const int ref_frame = state_m[i].ref_frame;
4606 int match = 0;
4607 // Check if existing already
4608 for (j = 0; j < count; ++j) {
4609 if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4610 match = 1;
4611 break;
4612 }
4613 }
4614 if (match) continue;
4615 // Check if this ref_frame is removed in simple rd
4616 int valid = 1;
4617 for (j = 0; j < state_cnt_s; ++j) {
4618 if (ref_frame == state_s[j].ref_frame) {
4619 valid = state_s[j].valid;
4620 break;
4621 }
4622 }
4623 if (valid) {
4624 search_state->single_rd_order[dir][mode][count++] = ref_frame;
4625 }
4626 }
4627 }
4628 }
4629 }
4630
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4631 static int compound_skip_get_candidates(
4632 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4633 const int dir, const PREDICTION_MODE mode) {
4634 const int mode_offset = INTER_OFFSET(mode);
4635 const SingleInterModeState *state =
4636 search_state->single_state[dir][mode_offset];
4637 const SingleInterModeState *state_modelled =
4638 search_state->single_state_modelled[dir][mode_offset];
4639
4640 int max_candidates = 0;
4641 for (int i = 0; i < FWD_REFS; ++i) {
4642 if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4643 max_candidates++;
4644 }
4645
4646 int candidates = max_candidates;
4647 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4648 candidates = AOMMIN(2, max_candidates);
4649 }
4650 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4651 if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4652 state[0].ref_frame == state_modelled[0].ref_frame)
4653 candidates = 1;
4654 if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4655 }
4656
4657 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4658 // Limit the number of candidates to 1 in each direction for compound
4659 // prediction
4660 candidates = AOMMIN(1, candidates);
4661 }
4662 return candidates;
4663 }
4664
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4665 static int compound_skip_by_single_states(
4666 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4667 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4668 const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4669 const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4670 const int mode[2] = { compound_ref0_mode(this_mode),
4671 compound_ref1_mode(this_mode) };
4672 const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4673 const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4674 refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4675 int ref_searched[2] = { 0, 0 };
4676 int ref_mv_match[2] = { 1, 1 };
4677 int i, j;
4678
4679 for (i = 0; i < 2; ++i) {
4680 const SingleInterModeState *state =
4681 search_state->single_state[mode_dir[i]][mode_offset[i]];
4682 const int state_cnt =
4683 search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4684 for (j = 0; j < state_cnt; ++j) {
4685 if (state[j].ref_frame == refs[i]) {
4686 ref_searched[i] = 1;
4687 break;
4688 }
4689 }
4690 }
4691
4692 const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4693 for (i = 0; i < 2; ++i) {
4694 if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4695 continue;
4696 }
4697 const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4698 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4699 int_mv single_mv;
4700 int_mv comp_mv;
4701 get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4702 &x->mbmi_ext);
4703 get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4704 if (single_mv.as_int != comp_mv.as_int) {
4705 ref_mv_match[i] = 0;
4706 break;
4707 }
4708 }
4709 }
4710
4711 for (i = 0; i < 2; ++i) {
4712 if (!ref_searched[i] || !ref_mv_match[i]) continue;
4713 const int candidates =
4714 compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4715 const MV_REFERENCE_FRAME *ref_order =
4716 search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4717 int match = 0;
4718 for (j = 0; j < candidates; ++j) {
4719 if (refs[i] == ref_order[j]) {
4720 match = 1;
4721 break;
4722 }
4723 }
4724 if (!match) return 1;
4725 }
4726
4727 return 0;
4728 }
4729
4730 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4731 static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
4732 const MV_REFERENCE_FRAME *ref_frames,
4733 int *const is_ref_match) {
4734 if (is_inter_block(mbmi)) {
4735 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4736 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4737 if (has_second_ref(mbmi)) {
4738 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4739 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4740 }
4741 }
4742 }
4743
4744 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4745 static inline int compound_skip_using_neighbor_refs(
4746 MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4747 const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4748 // Exclude non-extended compound modes from pruning
4749 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4750 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4751 return 0;
4752
4753 if (prune_ext_comp_using_neighbors >= 3) return 1;
4754
4755 int is_ref_match[2] = { 0 }; // 0 - match for forward refs
4756 // 1 - match for backward refs
4757 // Check if ref frames of this block matches with left neighbor.
4758 if (xd->left_available)
4759 match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4760
4761 // Check if ref frames of this block matches with above neighbor.
4762 if (xd->up_available)
4763 match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4764
4765 // Combine ref frame match with neighbors in forward and backward refs.
4766 const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4767
4768 // Pruning based on ref frame match with neighbors.
4769 if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4770 return 1;
4771 }
4772
4773 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4774 static inline void update_best_single_mode(InterModeSearchState *search_state,
4775 const PREDICTION_MODE this_mode,
4776 const MV_REFERENCE_FRAME ref_frame,
4777 int64_t this_rd) {
4778 if (this_rd < search_state->best_single_rd[ref_frame]) {
4779 search_state->best_single_rd[ref_frame] = this_rd;
4780 search_state->best_single_mode[ref_frame] = this_mode;
4781 }
4782 }
4783
4784 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4785 static inline int skip_compound_using_best_single_mode_ref(
4786 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4787 const PREDICTION_MODE *best_single_mode,
4788 int prune_comp_using_best_single_mode_ref) {
4789 // Exclude non-extended compound modes from pruning
4790 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4791 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4792 return 0;
4793
4794 assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4795 const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4796 // Get ref frame direction corresponding to NEWMV
4797 // 0 - NEWMV corresponding to forward direction
4798 // 1 - NEWMV corresponding to backward direction
4799 const int newmv_dir = comp_mode_ref0 != NEWMV;
4800
4801 // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4802 // have NEWMV as single mode winner.
4803 // Example: For an extended-compound mode,
4804 // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4805 // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4806 // - Avoid pruning this mode, if best single mode corresponding to ref frame
4807 // ALTREF_FRAME is NEWMV
4808 const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4809 if (single_mode == NEWMV) return 0;
4810
4811 // Avoid pruning the compound mode when best single mode is not available
4812 if (prune_comp_using_best_single_mode_ref == 1)
4813 if (single_mode == MB_MODE_COUNT) return 0;
4814 return 1;
4815 }
4816
compare_int64(const void * a,const void * b)4817 static int compare_int64(const void *a, const void *b) {
4818 int64_t a64 = *((int64_t *)a);
4819 int64_t b64 = *((int64_t *)b);
4820 if (a64 < b64) {
4821 return -1;
4822 } else if (a64 == b64) {
4823 return 0;
4824 } else {
4825 return 1;
4826 }
4827 }
4828
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4829 static inline void update_search_state(
4830 InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4831 PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4832 const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4833 THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4834 const MACROBLOCKD *xd = &x->e_mbd;
4835 const MB_MODE_INFO *mbmi = xd->mi[0];
4836 const int skip_ctx = av1_get_skip_txfm_context(xd);
4837 const int skip_txfm =
4838 mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4839 const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4840
4841 search_state->best_rd = new_best_rd_stats->rdcost;
4842 search_state->best_mode_index = new_best_mode;
4843 *best_rd_stats_dst = *new_best_rd_stats;
4844 search_state->best_mbmode = *mbmi;
4845 search_state->best_skip2 = skip_txfm;
4846 search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4847 // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4848 // rate_uv because av1_txfm_search process is replaced by rd estimation.
4849 // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4850 // These two values will be updated when av1_txfm_search is called.
4851 if (txfm_search_done) {
4852 search_state->best_rate_y =
4853 new_best_rd_stats_y->rate +
4854 x->mode_costs.skip_txfm_cost[skip_ctx]
4855 [new_best_rd_stats->skip_txfm || skip_txfm];
4856 search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4857 }
4858 search_state->best_y_rdcost = *new_best_rd_stats_y;
4859 memcpy(ctx->blk_skip, txfm_info->blk_skip,
4860 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4861 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4862 }
4863
4864 // Find the best RD for a reference frame (among single reference modes)
4865 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4866 static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4867 assert(ref_frame_rd[0] == INT64_MAX);
4868 int64_t ref_copy[REF_FRAMES - 1];
4869 memcpy(ref_copy, ref_frame_rd + 1,
4870 sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4871 qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4872
4873 int64_t cutoff = ref_copy[0];
4874 // The cut-off is within 10% of the best.
4875 if (cutoff != INT64_MAX) {
4876 assert(cutoff < INT64_MAX / 200);
4877 cutoff = (110 * cutoff) / 100;
4878 }
4879 ref_frame_rd[0] = cutoff;
4880 }
4881
4882 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4883 static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4884 MV_REFERENCE_FRAME frame1,
4885 MV_REFERENCE_FRAME frame2) {
4886 assert(frame2 > 0);
4887 return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4888 ref_frame_rd[frame2] <= ref_frame_rd[0];
4889 }
4890
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4891 static inline void evaluate_motion_mode_for_winner_candidates(
4892 const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4893 HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4894 PICK_MODE_CONTEXT *const ctx,
4895 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4896 const motion_mode_best_st_candidate *const best_motion_mode_cands,
4897 int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4898 InterModeSearchState *const search_state, int64_t *yrd) {
4899 const AV1_COMMON *const cm = &cpi->common;
4900 const int num_planes = av1_num_planes(cm);
4901 MACROBLOCKD *const xd = &x->e_mbd;
4902 MB_MODE_INFO *const mbmi = xd->mi[0];
4903 InterModesInfo *const inter_modes_info = x->inter_modes_info;
4904 const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4905
4906 for (int cand = 0; cand < num_best_cand; cand++) {
4907 RD_STATS rd_stats;
4908 RD_STATS rd_stats_y;
4909 RD_STATS rd_stats_uv;
4910 av1_init_rd_stats(&rd_stats);
4911 av1_init_rd_stats(&rd_stats_y);
4912 av1_init_rd_stats(&rd_stats_uv);
4913 int rate_mv;
4914
4915 rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4916 args->skip_motion_mode =
4917 best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4918 *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4919 rd_stats.rate =
4920 best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4921
4922 // Continue if the best candidate is compound.
4923 if (!is_inter_singleref_mode(mbmi->mode)) continue;
4924
4925 x->txfm_search_info.skip_txfm = 0;
4926 struct macroblockd_plane *pd = xd->plane;
4927 const BUFFER_SET orig_dst = {
4928 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4929 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4930 };
4931
4932 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4933 // Initialize motion mode to simple translation
4934 // Calculation of switchable rate depends on it.
4935 mbmi->motion_mode = 0;
4936 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4937 for (int i = 0; i < num_planes; i++) {
4938 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4939 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4940 }
4941
4942 int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4943 search_state->best_skip_rd[1] };
4944 int64_t this_yrd = INT64_MAX;
4945 int64_t ret_value = motion_mode_rd(
4946 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4947 search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4948 do_tx_search, inter_modes_info, 1, &this_yrd);
4949
4950 if (ret_value != INT64_MAX) {
4951 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4952 const THR_MODES mode_enum = get_prediction_mode_idx(
4953 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4954 // Collect mode stats for multiwinner mode processing
4955 store_winner_mode_stats(
4956 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4957 mode_enum, NULL, bsize, rd_stats.rdcost,
4958 cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4959 if (rd_stats.rdcost < search_state->best_rd) {
4960 *yrd = this_yrd;
4961 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4962 &rd_stats_uv, mode_enum, x, do_tx_search);
4963 if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4964 }
4965 }
4966 }
4967 }
4968
4969 /*!\cond */
4970 // Arguments for speed feature pruning of inter mode search
4971 typedef struct {
4972 int *skip_motion_mode;
4973 mode_skip_mask_t *mode_skip_mask;
4974 InterModeSearchState *search_state;
4975 int skip_ref_frame_mask;
4976 int reach_first_comp_mode;
4977 int mode_thresh_mul_fact;
4978 int num_single_modes_processed;
4979 int prune_cpd_using_sr_stats_ready;
4980 } InterModeSFArgs;
4981 /*!\endcond */
4982
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args,int is_low_temp_var)4983 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4984 int64_t *ref_frame_rd, int midx,
4985 InterModeSFArgs *args, int is_low_temp_var) {
4986 const SPEED_FEATURES *const sf = &cpi->sf;
4987 MACROBLOCKD *const xd = &x->e_mbd;
4988 // Get the actual prediction mode we are trying in this iteration
4989 const THR_MODES mode_enum = av1_default_mode_order[midx];
4990 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4991 const PREDICTION_MODE this_mode = mode_def->mode;
4992 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4993 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4994 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4995 const int comp_pred = second_ref_frame > INTRA_FRAME;
4996
4997 if (ref_frame == INTRA_FRAME) return 1;
4998
4999 const FRAME_UPDATE_TYPE update_type =
5000 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5001 if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5002 comp_pred) {
5003 return 1;
5004 }
5005
5006 // This is for real time encoding.
5007 if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5008 this_mode != NEARESTMV)
5009 return 1;
5010
5011 // Check if this mode should be skipped because it is incompatible with the
5012 // current frame
5013 if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5014 return 1;
5015 const int ret = inter_mode_search_order_independent_skip(
5016 cpi, x, args->mode_skip_mask, args->search_state,
5017 args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5018 if (ret == 1) return 1;
5019 *(args->skip_motion_mode) = (ret == 2);
5020
5021 // We've reached the first compound prediction mode, get stats from the
5022 // single reference predictors to help with pruning.
5023 // Disable this pruning logic if interpolation filter search was skipped for
5024 // single prediction modes as it can result in aggressive pruning of compound
5025 // prediction modes due to the absence of modelled_rd populated by
5026 // av1_interpolation_filter_search().
5027 // TODO(Remya): Check the impact of the sf
5028 // 'prune_comp_search_by_single_result' if compound prediction modes are
5029 // enabled in future for REALTIME encode.
5030 if (!sf->interp_sf.skip_interp_filter_search &&
5031 sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5032 args->reach_first_comp_mode == 0) {
5033 analyze_single_states(cpi, args->search_state);
5034 args->reach_first_comp_mode = 1;
5035 }
5036
5037 // Prune aggressively when best mode is skippable.
5038 int mul_fact = args->search_state->best_mode_skippable
5039 ? args->mode_thresh_mul_fact
5040 : (1 << MODE_THRESH_QBITS);
5041 int64_t mode_threshold =
5042 (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5043 MODE_THRESH_QBITS;
5044
5045 if (args->search_state->best_rd < mode_threshold) return 1;
5046
5047 // Skip this compound mode based on the RD results from the single prediction
5048 // modes
5049 if (!sf->interp_sf.skip_interp_filter_search &&
5050 sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5051 if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5052 ref_frame, second_ref_frame, x))
5053 return 1;
5054 }
5055
5056 if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5057 // After we done with single reference modes, find the 2nd best RD
5058 // for a reference frame. Only search compound modes that have a reference
5059 // frame at least as good as the 2nd best.
5060 if (!args->prune_cpd_using_sr_stats_ready &&
5061 args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5062 find_top_ref(ref_frame_rd);
5063 args->prune_cpd_using_sr_stats_ready = 1;
5064 }
5065 if (args->prune_cpd_using_sr_stats_ready &&
5066 !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5067 return 1;
5068 }
5069
5070 // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5071 if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5072 (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5073 return 1;
5074 }
5075
5076 if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5077 if (compound_skip_using_neighbor_refs(
5078 xd, this_mode, ref_frames,
5079 sf->inter_sf.prune_ext_comp_using_neighbors))
5080 return 1;
5081 }
5082
5083 if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5084 if (skip_compound_using_best_single_mode_ref(
5085 this_mode, ref_frames, args->search_state->best_single_mode,
5086 sf->inter_sf.prune_comp_using_best_single_mode_ref))
5087 return 1;
5088 }
5089
5090 if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5091 const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5092 if (skip_nearest_near_mv_using_refmv_weight(
5093 x, this_mode, ref_frame_type,
5094 args->search_state->best_mbmode.mode)) {
5095 // Ensure the mode is pruned only when the current block has obtained a
5096 // valid inter mode.
5097 assert(is_inter_mode(args->search_state->best_mbmode.mode));
5098 return 1;
5099 }
5100 }
5101
5102 if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5103 ref_frame == GOLDEN_FRAME && !comp_pred) {
5104 const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5105 if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5106 args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5107 if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5108 return 1;
5109 }
5110 }
5111
5112 return 0;
5113 }
5114
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)5115 static void record_best_compound(REFERENCE_MODE reference_mode,
5116 RD_STATS *rd_stats, int comp_pred, int rdmult,
5117 InterModeSearchState *search_state,
5118 int compmode_cost) {
5119 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5120
5121 if (reference_mode == REFERENCE_MODE_SELECT) {
5122 single_rate = rd_stats->rate - compmode_cost;
5123 hybrid_rate = rd_stats->rate;
5124 } else {
5125 single_rate = rd_stats->rate;
5126 hybrid_rate = rd_stats->rate + compmode_cost;
5127 }
5128
5129 single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5130 hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5131
5132 if (!comp_pred) {
5133 if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5134 search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5135 } else {
5136 if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5137 search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5138 }
5139 if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5140 search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5141 }
5142
5143 // Does a transform search over a list of the best inter mode candidates.
5144 // This is called if the original mode search computed an RD estimate
5145 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)5146 static void tx_search_best_inter_candidates(
5147 AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5148 int64_t best_rd_so_far, BLOCK_SIZE bsize,
5149 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5150 InterModeSearchState *search_state, RD_STATS *rd_cost,
5151 PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5152 AV1_COMMON *const cm = &cpi->common;
5153 MACROBLOCKD *const xd = &x->e_mbd;
5154 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5155 const ModeCosts *mode_costs = &x->mode_costs;
5156 const int num_planes = av1_num_planes(cm);
5157 const int skip_ctx = av1_get_skip_txfm_context(xd);
5158 MB_MODE_INFO *const mbmi = xd->mi[0];
5159 InterModesInfo *inter_modes_info = x->inter_modes_info;
5160 inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5161 search_state->best_rd = best_rd_so_far;
5162 search_state->best_mode_index = THR_INVALID;
5163 // Initialize best mode stats for winner mode processing
5164 x->winner_mode_count = 0;
5165 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5166 NULL, bsize, best_rd_so_far,
5167 cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5168 inter_modes_info->num =
5169 inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5170 ? inter_modes_info->num
5171 : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5172 const int64_t top_est_rd =
5173 inter_modes_info->num > 0
5174 ? inter_modes_info
5175 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5176 : INT64_MAX;
5177 *yrd = INT64_MAX;
5178 int64_t best_rd_in_this_partition = INT64_MAX;
5179 int num_inter_mode_cands = inter_modes_info->num;
5180 int newmv_mode_evaled = 0;
5181 int max_allowed_cands = INT_MAX;
5182 if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5183 // The bound on the no. of inter mode candidates, beyond which the
5184 // candidates are limited if a newmv mode got evaluated, is set as
5185 // max_allowed_cands + 1.
5186 const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5187 assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5188 max_allowed_cands =
5189 num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5190 }
5191
5192 int num_mode_thresh = INT_MAX;
5193 if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5194 // Bound the no. of transform searches per prediction mode beyond a
5195 // threshold.
5196 const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5197 assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5198 num_mode_thresh =
5199 num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5200 }
5201
5202 int num_tx_cands = 0;
5203 int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5204 // Iterate over best inter mode candidates and perform tx search
5205 for (int j = 0; j < num_inter_mode_cands; ++j) {
5206 const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5207 *mbmi = inter_modes_info->mbmi_arr[data_idx];
5208 const PREDICTION_MODE prediction_mode = mbmi->mode;
5209 int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5210 if (curr_est_rd * 0.80 > top_est_rd) break;
5211
5212 if (num_tx_cands > num_mode_thresh) {
5213 if ((prediction_mode != NEARESTMV &&
5214 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5215 (prediction_mode == NEARESTMV &&
5216 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5217 continue;
5218 }
5219
5220 txfm_info->skip_txfm = 0;
5221 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5222
5223 // Select prediction reference frames.
5224 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5225 for (int i = 0; i < num_planes; i++) {
5226 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5227 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5228 }
5229
5230 bool is_predictor_built = false;
5231
5232 // Initialize RD stats
5233 RD_STATS rd_stats;
5234 RD_STATS rd_stats_y;
5235 RD_STATS rd_stats_uv;
5236 const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5237 int64_t skip_rd = INT64_MAX;
5238 const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5239 cm->seq_params->enable_masked_compound,
5240 cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5241 /*eval_motion_mode=*/0);
5242 if (txfm_rd_gate_level) {
5243 // Check if the mode is good enough based on skip RD
5244 int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5245 skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5246 int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5247 skip_rd, txfm_rd_gate_level, 0);
5248 if (!eval_txfm) continue;
5249 }
5250
5251 // Build the prediction for this mode
5252 if (!is_predictor_built) {
5253 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5254 av1_num_planes(cm) - 1);
5255 }
5256 if (mbmi->motion_mode == OBMC_CAUSAL) {
5257 av1_build_obmc_inter_predictors_sb(cm, xd);
5258 }
5259
5260 num_tx_cands++;
5261 if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5262 num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5263 int64_t this_yrd = INT64_MAX;
5264 // Do the transform search
5265 if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5266 mode_rate, search_state->best_rd)) {
5267 continue;
5268 } else {
5269 const int y_rate =
5270 rd_stats.skip_txfm
5271 ? mode_costs->skip_txfm_cost[skip_ctx][1]
5272 : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5273 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5274
5275 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5276 inter_mode_data_push(
5277 tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5278 rd_stats_y.rate + rd_stats_uv.rate +
5279 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5280 }
5281 }
5282 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5283 if (rd_stats.rdcost < best_rd_in_this_partition) {
5284 best_rd_in_this_partition = rd_stats.rdcost;
5285 *yrd = this_yrd;
5286 }
5287
5288 const THR_MODES mode_enum = get_prediction_mode_idx(
5289 prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5290
5291 // Collect mode stats for multiwinner mode processing
5292 const int txfm_search_done = 1;
5293 store_winner_mode_stats(
5294 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5295 NULL, bsize, rd_stats.rdcost,
5296 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5297
5298 if (rd_stats.rdcost < search_state->best_rd) {
5299 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5300 &rd_stats_uv, mode_enum, x, txfm_search_done);
5301 search_state->best_skip_rd[0] = skip_rd;
5302 // Limit the total number of modes to be evaluated if the first is valid
5303 // and transform skip or compound
5304 if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5305 if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5306 // Evaluate more candidates at high quantizers where occurrence of
5307 // transform skip is high.
5308 const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5309 const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5310 num_inter_mode_cands =
5311 AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5312 } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5313 const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5314 // Evaluate more candidates at low quantizers where occurrence of
5315 // single reference mode is high.
5316 const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5317 { 10, 7, 5, 3 } };
5318 const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5319 num_inter_mode_cands = AOMMIN(
5320 max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5321 }
5322 }
5323 }
5324 // If the number of candidates evaluated exceeds max_allowed_cands, break if
5325 // a newmv mode was evaluated already.
5326 if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5327 }
5328 }
5329
5330 // Indicates number of winner simple translation modes to be used
5331 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5332
5333 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5334 // speed feature. This list consists of modes that have only searched
5335 // SIMPLE_TRANSLATION. The final list will be used to search other motion
5336 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)5337 static void handle_winner_cand(
5338 MB_MODE_INFO *const mbmi,
5339 motion_mode_best_st_candidate *best_motion_mode_cands,
5340 int max_winner_motion_mode_cand, int64_t this_rd,
5341 motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5342 // Number of current motion mode candidates in list
5343 const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5344 int valid_motion_mode_cand_loc = num_motion_mode_cand;
5345
5346 // find the best location to insert new motion mode candidate
5347 for (int j = 0; j < num_motion_mode_cand; j++) {
5348 if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5349 valid_motion_mode_cand_loc = j;
5350 break;
5351 }
5352 }
5353
5354 // Insert motion mode if location is found
5355 if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5356 if (num_motion_mode_cand > 0 &&
5357 valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5358 memmove(
5359 &best_motion_mode_cands
5360 ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5361 &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5362 (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5363 valid_motion_mode_cand_loc) *
5364 sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5365 motion_mode_cand->mbmi = *mbmi;
5366 motion_mode_cand->rd_cost = this_rd;
5367 motion_mode_cand->skip_motion_mode = skip_motion_mode;
5368 best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5369 *motion_mode_cand;
5370 best_motion_mode_cands->num_motion_mode_cand =
5371 AOMMIN(max_winner_motion_mode_cand,
5372 best_motion_mode_cands->num_motion_mode_cand + 1);
5373 }
5374 }
5375
5376 /*!\brief Search intra modes in interframes
5377 *
5378 * \ingroup intra_mode_search
5379 *
5380 * This function searches for the best intra mode when the current frame is an
5381 * interframe. This function however does *not* handle luma palette mode.
5382 * Palette mode is currently handled by \ref av1_search_palette_mode.
5383 *
5384 * This function will first iterate through the luma mode candidates to find the
5385 * best luma intra mode. Once the best luma mode it's found, it will then search
5386 * for the best chroma mode. Because palette mode is currently not handled by
5387 * here, a cache of uv mode is stored in
5388 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5389 * av1_search_palette_mode.
5390 *
5391 * \param[in,out] search_state Struct keep track of the prediction mode
5392 * search state in interframe.
5393 *
5394 * \param[in] cpi Top-level encoder structure.
5395 * \param[in,out] x Pointer to struct holding all the data for
5396 * the current prediction block.
5397 * \param[out] rd_cost Stores the best rd_cost among all the
5398 * prediction modes searched.
5399 * \param[in] bsize Current block size.
5400 * \param[in,out] ctx Structure to hold the number of 4x4 blks to
5401 * copy the tx_type and txfm_skip arrays.
5402 * for only the Y plane.
5403 * \param[in] sf_args Stores the list of intra mode candidates
5404 * to be searched.
5405 * \param[in] intra_ref_frame_cost The entropy cost for signaling that the
5406 * current ref frame is an intra frame.
5407 * \param[in] yrd_threshold The rdcost threshold for luma intra mode to
5408 * terminate chroma intra mode search.
5409 *
5410 * \remark If a new best mode is found, search_state and rd_costs are updated
5411 * correspondingly. While x is also modified, it is only used as a temporary
5412 * buffer, and the final decisions are stored in search_state.
5413 */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,const InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5414 static inline void search_intra_modes_in_interframe(
5415 InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5416 RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5417 const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5418 int64_t yrd_threshold) {
5419 const AV1_COMMON *const cm = &cpi->common;
5420 const SPEED_FEATURES *const sf = &cpi->sf;
5421 const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5422 MACROBLOCKD *const xd = &x->e_mbd;
5423 MB_MODE_INFO *const mbmi = xd->mi[0];
5424 IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5425
5426 int is_best_y_mode_intra = 0;
5427 RD_STATS best_intra_rd_stats_y;
5428 int64_t best_rd_y = INT64_MAX;
5429 int best_mode_cost_y = -1;
5430 MB_MODE_INFO best_mbmi = *xd->mi[0];
5431 THR_MODES best_mode_enum = THR_INVALID;
5432 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5433 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5434 const int num_4x4 = bsize_to_num_blk(bsize);
5435
5436 // Performs luma search
5437 int64_t best_model_rd = INT64_MAX;
5438 int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5439 for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5440 top_intra_model_rd[i] = INT64_MAX;
5441 }
5442 for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5443 if (sf->intra_sf.skip_intra_in_interframe &&
5444 search_state->intra_search_state.skip_intra_modes)
5445 break;
5446 set_y_mode_and_delta_angle(
5447 mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5448 assert(mbmi->mode < INTRA_MODE_END);
5449
5450 // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5451 if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5452 continue;
5453
5454 const THR_MODES mode_enum =
5455 get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5456 if ((!intra_mode_cfg->enable_smooth_intra ||
5457 cpi->sf.intra_sf.disable_smooth_intra) &&
5458 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5459 mbmi->mode == SMOOTH_V_PRED))
5460 continue;
5461 if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5462 continue;
5463 if (av1_is_directional_mode(mbmi->mode) &&
5464 !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5465 mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5466 continue;
5467 const PREDICTION_MODE this_mode = mbmi->mode;
5468
5469 assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5470 assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5471 init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5472 x->txfm_search_info.skip_txfm = 0;
5473
5474 if (this_mode != DC_PRED) {
5475 // Only search the oblique modes if the best so far is
5476 // one of the neighboring directional modes
5477 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5478 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5479 if (search_state->best_mode_index != THR_INVALID &&
5480 search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5481 continue;
5482 }
5483 if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5484 if (conditional_skipintra(
5485 this_mode, search_state->intra_search_state.best_intra_mode))
5486 continue;
5487 }
5488 }
5489
5490 RD_STATS intra_rd_stats_y;
5491 int mode_cost_y;
5492 int64_t intra_rd_y = INT64_MAX;
5493 const int is_luma_result_valid = av1_handle_intra_y_mode(
5494 intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5495 &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5496 &best_model_rd, top_intra_model_rd);
5497 if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5498 is_best_y_mode_intra = 1;
5499 if (intra_rd_y < best_rd_y) {
5500 best_intra_rd_stats_y = intra_rd_stats_y;
5501 best_mode_cost_y = mode_cost_y;
5502 best_rd_y = intra_rd_y;
5503 best_mbmi = *mbmi;
5504 best_mode_enum = mode_enum;
5505 memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5506 sizeof(best_blk_skip[0]) * num_4x4);
5507 av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5508 }
5509 }
5510 }
5511
5512 if (!is_best_y_mode_intra) {
5513 return;
5514 }
5515
5516 assert(best_rd_y < INT64_MAX);
5517
5518 // Restores the best luma mode
5519 *mbmi = best_mbmi;
5520 memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5521 sizeof(best_blk_skip[0]) * num_4x4);
5522 av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5523
5524 // Performs chroma search
5525 RD_STATS intra_rd_stats, intra_rd_stats_uv;
5526 av1_init_rd_stats(&intra_rd_stats);
5527 av1_init_rd_stats(&intra_rd_stats_uv);
5528 const int num_planes = av1_num_planes(cm);
5529 if (num_planes > 1) {
5530 const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5531 intra_search_state, cpi, x, bsize, &intra_rd_stats,
5532 &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5533
5534 if (!intra_uv_mode_valid) {
5535 return;
5536 }
5537 }
5538
5539 // Merge the luma and chroma rd stats
5540 assert(best_mode_cost_y >= 0);
5541 intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5542 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5543 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5544 // in the tokenonly rate, but for intra blocks, tx_size is always coded
5545 // (prediction granularity), so we account for it in the full rate,
5546 // not the tokenonly rate.
5547 best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5548 }
5549
5550 const ModeCosts *mode_costs = &x->mode_costs;
5551 const PREDICTION_MODE mode = mbmi->mode;
5552 if (num_planes > 1 && xd->is_chroma_ref) {
5553 const int uv_mode_cost =
5554 mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5555 intra_rd_stats.rate +=
5556 intra_rd_stats_uv.rate +
5557 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5558 }
5559
5560 // Intra block is always coded as non-skip
5561 intra_rd_stats.skip_txfm = 0;
5562 intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5563 // Add in the cost of the no skip flag.
5564 const int skip_ctx = av1_get_skip_txfm_context(xd);
5565 intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5566 // Calculate the final RD estimate for this mode.
5567 const int64_t this_rd =
5568 RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5569 // Keep record of best intra rd
5570 if (this_rd < search_state->best_intra_rd) {
5571 search_state->best_intra_rd = this_rd;
5572 intra_search_state->best_intra_mode = mode;
5573 }
5574
5575 for (int i = 0; i < REFERENCE_MODES; ++i) {
5576 search_state->best_pred_rd[i] =
5577 AOMMIN(search_state->best_pred_rd[i], this_rd);
5578 }
5579
5580 intra_rd_stats.rdcost = this_rd;
5581
5582 // Collect mode stats for multiwinner mode processing
5583 const int txfm_search_done = 1;
5584 store_winner_mode_stats(
5585 &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5586 &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5587 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5588 if (intra_rd_stats.rdcost < search_state->best_rd) {
5589 update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5590 &best_intra_rd_stats_y, &intra_rd_stats_uv,
5591 best_mode_enum, x, txfm_search_done);
5592 }
5593 }
5594
5595 #if !CONFIG_REALTIME_ONLY
5596 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5597 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5598 static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5599 MACROBLOCK *x, BLOCK_SIZE bsize,
5600 int mi_row, int mi_col,
5601 int64_t *inter_cost,
5602 int64_t *intra_cost) {
5603 const AV1_COMMON *const cm = &cpi->common;
5604 // Only consider full SB.
5605 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5606 const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5607 const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5608 (block_size_high[sb_size] / tpl_bsize_1d);
5609 SuperBlockEnc *sb_enc = &x->sb_enc;
5610 if (sb_enc->tpl_data_count == len) {
5611 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5612 const int tpl_stride = sb_enc->tpl_stride;
5613 const int tplw = mi_size_wide[tpl_bsize];
5614 const int tplh = mi_size_high[tpl_bsize];
5615 const int nw = mi_size_wide[bsize] / tplw;
5616 const int nh = mi_size_high[bsize] / tplh;
5617 if (nw >= 1 && nh >= 1) {
5618 const int of_h = mi_row % mi_size_high[sb_size];
5619 const int of_w = mi_col % mi_size_wide[sb_size];
5620 const int start = of_h / tplh * tpl_stride + of_w / tplw;
5621
5622 for (int k = 0; k < nh; k++) {
5623 for (int l = 0; l < nw; l++) {
5624 *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5625 *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5626 }
5627 }
5628 *inter_cost /= nw * nh;
5629 *intra_cost /= nw * nh;
5630 }
5631 }
5632 }
5633 #endif // !CONFIG_REALTIME_ONLY
5634
5635 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5636 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,const SPEED_FEATURES * const sf,int64_t inter_cost,int64_t intra_cost)5637 static inline void skip_intra_modes_in_interframe(
5638 AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5639 InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5640 int64_t inter_cost, int64_t intra_cost) {
5641 MACROBLOCKD *const xd = &x->e_mbd;
5642 const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5643 if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5644 bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5645 const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5646 const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5647 if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5648 x->source_variance > 128) {
5649 search_state->intra_search_state.skip_intra_modes = 1;
5650 return;
5651 }
5652 }
5653
5654 const unsigned int src_var_thresh_intra_skip = 1;
5655 const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5656 if (!(skip_intra_in_interframe &&
5657 (x->source_variance > src_var_thresh_intra_skip)))
5658 return;
5659
5660 // Prune intra search based on best inter mode being transfrom skip.
5661 if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5662 const int qindex_thresh[2] = { 200, MAXQ };
5663 const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5664 if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5665 (x->qindex <= qindex_thresh[ind])) {
5666 search_state->intra_search_state.skip_intra_modes = 1;
5667 return;
5668 } else if ((skip_intra_in_interframe >= 4) &&
5669 (inter_cost < 0 || intra_cost < 0)) {
5670 search_state->intra_search_state.skip_intra_modes = 1;
5671 return;
5672 }
5673 }
5674 // Use ML model to prune intra search.
5675 if (inter_cost >= 0 && intra_cost >= 0) {
5676 const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5677 ? &av1_intrap_nn_config
5678 : &av1_intrap_hd_nn_config;
5679 float nn_features[6];
5680 float scores[2] = { 0.0f };
5681
5682 nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5683 nn_features[1] = (float)mi_size_wide_log2[bsize];
5684 nn_features[2] = (float)mi_size_high_log2[bsize];
5685 nn_features[3] = (float)intra_cost;
5686 nn_features[4] = (float)inter_cost;
5687 const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5688 const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5689 nn_features[5] = (float)(ac_q_max / ac_q);
5690
5691 av1_nn_predict(nn_features, nn_config, 1, scores);
5692
5693 // For two parameters, the max prob returned from av1_nn_softmax equals
5694 // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5695 // calling of av1_nn_softmax.
5696 const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5697 assert(skip_intra_in_interframe <= 5);
5698 if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5699 search_state->intra_search_state.skip_intra_modes = 1;
5700 }
5701 }
5702 }
5703
skip_interp_filter_search(const AV1_COMP * cpi,int is_single_pred)5704 static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
5705 int is_single_pred) {
5706 const MODE encoding_mode = cpi->oxcf.mode;
5707 if (encoding_mode == REALTIME) {
5708 return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
5709 (cpi->sf.interp_sf.skip_interp_filter_search ||
5710 cpi->sf.winner_mode_sf.winner_mode_ifs));
5711 } else if (encoding_mode == GOOD) {
5712 // Skip interpolation filter search for single prediction modes.
5713 return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
5714 }
5715 return false;
5716 }
5717
get_block_temp_var(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)5718 static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
5719 BLOCK_SIZE bsize) {
5720 const AV1_COMMON *const cm = &cpi->common;
5721 const SPEED_FEATURES *const sf = &cpi->sf;
5722
5723 if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5724 !sf->rt_sf.short_circuit_low_temp_var ||
5725 !sf->rt_sf.prune_inter_modes_using_temp_var) {
5726 return 0;
5727 }
5728
5729 const int mi_row = x->e_mbd.mi_row;
5730 const int mi_col = x->e_mbd.mi_col;
5731 int is_low_temp_var = 0;
5732
5733 if (cm->seq_params->sb_size == BLOCK_64X64)
5734 is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5735 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5736 else
5737 is_low_temp_var = av1_get_force_skip_low_temp_var(
5738 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5739
5740 return is_low_temp_var;
5741 }
5742
5743 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5744 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5745 struct macroblock *x, struct RD_STATS *rd_cost,
5746 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5747 int64_t best_rd_so_far) {
5748 AV1_COMMON *const cm = &cpi->common;
5749 const FeatureFlags *const features = &cm->features;
5750 const int num_planes = av1_num_planes(cm);
5751 const SPEED_FEATURES *const sf = &cpi->sf;
5752 MACROBLOCKD *const xd = &x->e_mbd;
5753 MB_MODE_INFO *const mbmi = xd->mi[0];
5754 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5755 int i;
5756 const ModeCosts *mode_costs = &x->mode_costs;
5757 const int *comp_inter_cost =
5758 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5759
5760 InterModeSearchState search_state;
5761 init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5762 INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5763 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5764 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5765 };
5766 HandleInterModeArgs args = { { NULL },
5767 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5768 { NULL },
5769 { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5770 MAX_SB_SIZE >> 1 },
5771 NULL,
5772 NULL,
5773 NULL,
5774 search_state.modelled_rd,
5775 INT_MAX,
5776 INT_MAX,
5777 search_state.simple_rd,
5778 0,
5779 false,
5780 interintra_modes,
5781 { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5782 { { 0, 0 } },
5783 { 0 },
5784 0,
5785 0,
5786 -1,
5787 -1,
5788 -1,
5789 { 0 },
5790 { 0 },
5791 UINT_MAX };
5792 // Currently, is_low_temp_var is used in real time encoding.
5793 const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5794
5795 for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5796 // Indicates the appropriate number of simple translation winner modes for
5797 // exhaustive motion mode evaluation
5798 const int max_winner_motion_mode_cand =
5799 num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
5800 assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5801 motion_mode_candidate motion_mode_cand;
5802 motion_mode_best_st_candidate best_motion_mode_cands;
5803 // Initializing the number of motion mode candidates to zero.
5804 best_motion_mode_cands.num_motion_mode_cand = 0;
5805 for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5806 best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5807
5808 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5809
5810 av1_invalid_rd_stats(rd_cost);
5811
5812 for (i = 0; i < REF_FRAMES; ++i) {
5813 x->warp_sample_info[i].num = -1;
5814 }
5815
5816 // Ref frames that are selected by square partition blocks.
5817 int picked_ref_frames_mask = 0;
5818 if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
5819 mbmi->partition != PARTITION_NONE) {
5820 // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5821 // partition blocks. prune_ref_frame_for_rect_partitions >=2
5822 // implies prune for vert, horiz and extended partition blocks.
5823 if ((mbmi->partition != PARTITION_VERT &&
5824 mbmi->partition != PARTITION_HORZ) ||
5825 sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5826 picked_ref_frames_mask =
5827 fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5828 }
5829 }
5830
5831 #if CONFIG_COLLECT_COMPONENT_TIMING
5832 start_timing(cpi, set_params_rd_pick_inter_mode_time);
5833 #endif
5834 // Skip ref frames that never selected by square blocks.
5835 const int skip_ref_frame_mask =
5836 picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5837 mode_skip_mask_t mode_skip_mask;
5838 unsigned int ref_costs_single[REF_FRAMES];
5839 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5840 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5841 // init params, set frame modes, speed features
5842 set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5843 skip_ref_frame_mask, ref_costs_single,
5844 ref_costs_comp, yv12_mb);
5845 #if CONFIG_COLLECT_COMPONENT_TIMING
5846 end_timing(cpi, set_params_rd_pick_inter_mode_time);
5847 #endif
5848
5849 int64_t best_est_rd = INT64_MAX;
5850 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5851 // If do_tx_search is 0, only estimated RD should be computed.
5852 // If do_tx_search is 1, all modes have TX search performed.
5853 const int do_tx_search =
5854 !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5855 (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
5856 num_pels_log2_lookup[bsize] > 8));
5857 InterModesInfo *inter_modes_info = x->inter_modes_info;
5858 inter_modes_info->num = 0;
5859
5860 // Temporary buffers used by handle_inter_mode().
5861 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5862
5863 // The best RD found for the reference frame, among single reference modes.
5864 // Note that the 0-th element will contain a cut-off that is later used
5865 // to determine if we should skip a compound mode.
5866 int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5867 INT64_MAX, INT64_MAX, INT64_MAX,
5868 INT64_MAX, INT64_MAX };
5869
5870 // Prepared stats used later to check if we could skip intra mode eval.
5871 int64_t inter_cost = -1;
5872 int64_t intra_cost = -1;
5873 // Need to tweak the threshold for hdres speed 0 & 1.
5874 const int mi_row = xd->mi_row;
5875 const int mi_col = xd->mi_col;
5876
5877 // Obtain the relevant tpl stats for pruning inter modes
5878 PruneInfoFromTpl inter_cost_info_from_tpl;
5879 #if !CONFIG_REALTIME_ONLY
5880 if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
5881 // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5882 // prune_ref_by_selective_ref_frame()
5883 // x->tpl_keep_ref_frame[id] = 0 => ref frame can be pruned in
5884 // prune_ref_by_selective_ref_frame()
5885 // Populating valid_refs[idx] = 1 ensures that
5886 // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5887 // pruned ref frame.
5888 int valid_refs[INTER_REFS_PER_FRAME];
5889 for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5890 const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5891 valid_refs[frame - 1] =
5892 x->tpl_keep_ref_frame[frame] ||
5893 !prune_ref_by_selective_ref_frame(
5894 cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5895 }
5896 av1_zero(inter_cost_info_from_tpl);
5897 get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5898 &inter_cost_info_from_tpl);
5899 }
5900
5901 const int do_pruning =
5902 (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5903 if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5904 cpi->oxcf.algo_cfg.enable_tpl_model)
5905 calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5906 &intra_cost);
5907 #endif // !CONFIG_REALTIME_ONLY
5908
5909 // Initialize best mode stats for winner mode processing.
5910 const int max_winner_mode_count =
5911 winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
5912 zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
5913 x->winner_mode_count = 0;
5914 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5915 NULL, bsize, best_rd_so_far,
5916 sf->winner_mode_sf.multi_winner_mode_type, 0);
5917
5918 int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5919 if (sf->inter_sf.prune_inter_modes_if_skippable) {
5920 // Higher multiplication factor values for lower quantizers.
5921 mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5922 }
5923
5924 // Initialize arguments for mode loop speed features
5925 InterModeSFArgs sf_args = { &args.skip_motion_mode,
5926 &mode_skip_mask,
5927 &search_state,
5928 skip_ref_frame_mask,
5929 0,
5930 mode_thresh_mul_fact,
5931 0,
5932 0 };
5933 int64_t best_inter_yrd = INT64_MAX;
5934
5935 // This is the main loop of this function. It loops over all possible inter
5936 // modes and calls handle_inter_mode() to compute the RD for each.
5937 // Here midx is just an iterator index that should not be used by itself
5938 // except to keep track of the number of modes searched. It should be used
5939 // with av1_default_mode_order to get the enum that defines the mode, which
5940 // can be used with av1_mode_defs to get the prediction mode and the ref
5941 // frames.
5942 // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5943 // good speedup for real time case. If we decide to use compound mode in real
5944 // time, maybe we can modify av1_default_mode_order table.
5945 THR_MODES mode_start = THR_INTER_MODE_START;
5946 THR_MODES mode_end = THR_INTER_MODE_END;
5947 const CurrentFrame *const current_frame = &cm->current_frame;
5948 if (current_frame->reference_mode == SINGLE_REFERENCE) {
5949 mode_start = SINGLE_REF_MODE_START;
5950 mode_end = SINGLE_REF_MODE_END;
5951 }
5952
5953 for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5954 // Get the actual prediction mode we are trying in this iteration
5955 const THR_MODES mode_enum = av1_default_mode_order[midx];
5956 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5957 const PREDICTION_MODE this_mode = mode_def->mode;
5958 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5959
5960 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5961 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5962 const int is_single_pred =
5963 ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5964 const int comp_pred = second_ref_frame > INTRA_FRAME;
5965
5966 init_mbmi(mbmi, this_mode, ref_frames, cm);
5967
5968 txfm_info->skip_txfm = 0;
5969 sf_args.num_single_modes_processed += is_single_pred;
5970 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5971 #if CONFIG_COLLECT_COMPONENT_TIMING
5972 start_timing(cpi, skip_inter_mode_time);
5973 #endif
5974 // Apply speed features to decide if this inter mode can be skipped
5975 const int is_skip_inter_mode = skip_inter_mode(
5976 cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5977 #if CONFIG_COLLECT_COMPONENT_TIMING
5978 end_timing(cpi, skip_inter_mode_time);
5979 #endif
5980 if (is_skip_inter_mode) continue;
5981
5982 // Select prediction reference frames.
5983 for (i = 0; i < num_planes; i++) {
5984 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5985 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5986 }
5987
5988 mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5989 mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5990 mbmi->filter_intra_mode_info.use_filter_intra = 0;
5991 mbmi->ref_mv_idx = 0;
5992
5993 const int64_t ref_best_rd = search_state.best_rd;
5994 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5995 av1_init_rd_stats(&rd_stats);
5996
5997 const int ref_frame_cost = comp_pred
5998 ? ref_costs_comp[ref_frame][second_ref_frame]
5999 : ref_costs_single[ref_frame];
6000 const int compmode_cost =
6001 is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6002 const int real_compmode_cost =
6003 cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6004 ? compmode_cost
6005 : 0;
6006 // Point to variables that are maintained between loop iterations
6007 args.single_newmv = search_state.single_newmv;
6008 args.single_newmv_rate = search_state.single_newmv_rate;
6009 args.single_newmv_valid = search_state.single_newmv_valid;
6010 args.single_comp_cost = real_compmode_cost;
6011 args.ref_frame_cost = ref_frame_cost;
6012 args.best_pred_sse = search_state.best_pred_sse;
6013 args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6014
6015 int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6016 search_state.best_skip_rd[1] };
6017 int64_t this_yrd = INT64_MAX;
6018 #if CONFIG_COLLECT_COMPONENT_TIMING
6019 start_timing(cpi, handle_inter_mode_time);
6020 #endif
6021 int64_t this_rd = handle_inter_mode(
6022 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6023 ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6024 inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6025 &this_yrd);
6026 #if CONFIG_COLLECT_COMPONENT_TIMING
6027 end_timing(cpi, handle_inter_mode_time);
6028 #endif
6029 if (current_frame->reference_mode != SINGLE_REFERENCE) {
6030 if (!args.skip_ifs &&
6031 sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6032 is_inter_singleref_mode(this_mode)) {
6033 collect_single_states(x, &search_state, mbmi);
6034 }
6035
6036 if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6037 is_inter_singleref_mode(this_mode))
6038 update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6039 }
6040
6041 if (this_rd == INT64_MAX) continue;
6042
6043 if (mbmi->skip_txfm) {
6044 rd_stats_y.rate = 0;
6045 rd_stats_uv.rate = 0;
6046 }
6047
6048 if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6049 this_rd < ref_frame_rd[ref_frame]) {
6050 ref_frame_rd[ref_frame] = this_rd;
6051 }
6052
6053 // Did this mode help, i.e., is it the new best mode
6054 if (this_rd < search_state.best_rd) {
6055 assert(IMPLIES(comp_pred,
6056 cm->current_frame.reference_mode != SINGLE_REFERENCE));
6057 search_state.best_pred_sse = x->pred_sse[ref_frame];
6058 best_inter_yrd = this_yrd;
6059 update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6060 &rd_stats_uv, mode_enum, x, do_tx_search);
6061 if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6062 // skip_rd[0] is the best total rd for a skip mode so far.
6063 // skip_rd[1] is the best total rd for a skip mode so far in luma.
6064 // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6065 // When do_tx_search = 0, skip_rd[1] is updated.
6066 search_state.best_skip_rd[1] = skip_rd[1];
6067 }
6068 if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6069 // Add this mode to motion mode candidate list for motion mode search
6070 // if using motion_mode_for_winner_cand speed feature
6071 handle_winner_cand(mbmi, &best_motion_mode_cands,
6072 max_winner_motion_mode_cand, this_rd,
6073 &motion_mode_cand, args.skip_motion_mode);
6074 }
6075
6076 /* keep record of best compound/single-only prediction */
6077 record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6078 x->rdmult, &search_state, compmode_cost);
6079 }
6080
6081 #if CONFIG_COLLECT_COMPONENT_TIMING
6082 start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6083 #endif
6084 if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6085 // For the single ref winner candidates, evaluate other motion modes (non
6086 // simple translation).
6087 evaluate_motion_mode_for_winner_candidates(
6088 cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6089 &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6090 &search_state, &best_inter_yrd);
6091 }
6092 #if CONFIG_COLLECT_COMPONENT_TIMING
6093 end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6094 #endif
6095
6096 #if CONFIG_COLLECT_COMPONENT_TIMING
6097 start_timing(cpi, do_tx_search_time);
6098 #endif
6099 if (do_tx_search != 1) {
6100 // A full tx search has not yet been done, do tx search for
6101 // top mode candidates
6102 tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6103 yv12_mb, mi_row, mi_col, &search_state,
6104 rd_cost, ctx, &best_inter_yrd);
6105 }
6106 #if CONFIG_COLLECT_COMPONENT_TIMING
6107 end_timing(cpi, do_tx_search_time);
6108 #endif
6109
6110 #if CONFIG_COLLECT_COMPONENT_TIMING
6111 start_timing(cpi, handle_intra_mode_time);
6112 #endif
6113 // Gate intra mode evaluation if best of inter is skip except when source
6114 // variance is extremely low and also based on max intra bsize.
6115 skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6116 intra_cost);
6117
6118 const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6119 search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6120 &sf_args, intra_ref_frame_cost,
6121 best_inter_yrd);
6122 #if CONFIG_COLLECT_COMPONENT_TIMING
6123 end_timing(cpi, handle_intra_mode_time);
6124 #endif
6125
6126 #if CONFIG_COLLECT_COMPONENT_TIMING
6127 start_timing(cpi, refine_winner_mode_tx_time);
6128 #endif
6129 int winner_mode_count =
6130 sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6131 // In effect only when fast tx search speed features are enabled.
6132 refine_winner_mode_tx(
6133 cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6134 &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6135 search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6136 #if CONFIG_COLLECT_COMPONENT_TIMING
6137 end_timing(cpi, refine_winner_mode_tx_time);
6138 #endif
6139
6140 // Initialize default mode evaluation params
6141 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6142
6143 // Only try palette mode when the best mode so far is an intra mode.
6144 const int try_palette =
6145 cpi->oxcf.tool_cfg.enable_palette &&
6146 av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6147 !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6148 RD_STATS this_rd_cost;
6149 int this_skippable = 0;
6150 if (try_palette) {
6151 #if CONFIG_COLLECT_COMPONENT_TIMING
6152 start_timing(cpi, av1_search_palette_mode_time);
6153 #endif
6154 this_skippable = av1_search_palette_mode(
6155 &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6156 ctx, &this_rd_cost, search_state.best_rd);
6157 #if CONFIG_COLLECT_COMPONENT_TIMING
6158 end_timing(cpi, av1_search_palette_mode_time);
6159 #endif
6160 if (this_rd_cost.rdcost < search_state.best_rd) {
6161 search_state.best_mode_index = THR_DC;
6162 mbmi->mv[0].as_int = 0;
6163 rd_cost->rate = this_rd_cost.rate;
6164 rd_cost->dist = this_rd_cost.dist;
6165 rd_cost->rdcost = this_rd_cost.rdcost;
6166 search_state.best_rd = rd_cost->rdcost;
6167 search_state.best_mbmode = *mbmi;
6168 search_state.best_skip2 = 0;
6169 search_state.best_mode_skippable = this_skippable;
6170 memcpy(ctx->blk_skip, txfm_info->blk_skip,
6171 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6172 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6173 }
6174 }
6175
6176 search_state.best_mbmode.skip_mode = 0;
6177 if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6178 is_comp_ref_allowed(bsize)) {
6179 const struct segmentation *const seg = &cm->seg;
6180 unsigned char segment_id = mbmi->segment_id;
6181 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6182 rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6183 }
6184 }
6185
6186 // Make sure that the ref_mv_idx is only nonzero when we're
6187 // using a mode which can support ref_mv_idx
6188 if (search_state.best_mbmode.ref_mv_idx != 0 &&
6189 !(search_state.best_mbmode.mode == NEWMV ||
6190 search_state.best_mbmode.mode == NEW_NEWMV ||
6191 have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6192 search_state.best_mbmode.ref_mv_idx = 0;
6193 }
6194
6195 if (search_state.best_mode_index == THR_INVALID ||
6196 search_state.best_rd >= best_rd_so_far) {
6197 rd_cost->rate = INT_MAX;
6198 rd_cost->rdcost = INT64_MAX;
6199 return;
6200 }
6201
6202 const InterpFilter interp_filter = features->interp_filter;
6203 assert((interp_filter == SWITCHABLE) ||
6204 (interp_filter ==
6205 search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6206 !is_inter_block(&search_state.best_mbmode));
6207 assert((interp_filter == SWITCHABLE) ||
6208 (interp_filter ==
6209 search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6210 !is_inter_block(&search_state.best_mbmode));
6211
6212 if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6213 av1_update_rd_thresh_fact(
6214 cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6215 search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6216 }
6217
6218 // macroblock modes
6219 *mbmi = search_state.best_mbmode;
6220 txfm_info->skip_txfm |= search_state.best_skip2;
6221
6222 // Note: this section is needed since the mode may have been forced to
6223 // GLOBALMV by the all-zero mode handling of ref-mv.
6224 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6225 // Correct the interp filters for GLOBALMV
6226 if (is_nontrans_global_motion(xd, xd->mi[0])) {
6227 int_interpfilters filters =
6228 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6229 assert(mbmi->interp_filters.as_int == filters.as_int);
6230 (void)filters;
6231 }
6232 }
6233
6234 txfm_info->skip_txfm |= search_state.best_mode_skippable;
6235
6236 assert(search_state.best_mode_index != THR_INVALID);
6237
6238 #if CONFIG_INTERNAL_STATS
6239 store_coding_context(x, ctx, search_state.best_mode_index,
6240 search_state.best_mode_skippable);
6241 #else
6242 store_coding_context(x, ctx, search_state.best_mode_skippable);
6243 #endif // CONFIG_INTERNAL_STATS
6244
6245 if (mbmi->palette_mode_info.palette_size[1] > 0) {
6246 assert(try_palette);
6247 av1_restore_uv_color_map(cpi, x);
6248 }
6249 }
6250
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)6251 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6252 TileDataEnc *tile_data, MACROBLOCK *x,
6253 int mi_row, int mi_col,
6254 RD_STATS *rd_cost, BLOCK_SIZE bsize,
6255 PICK_MODE_CONTEXT *ctx,
6256 int64_t best_rd_so_far) {
6257 const AV1_COMMON *const cm = &cpi->common;
6258 const FeatureFlags *const features = &cm->features;
6259 MACROBLOCKD *const xd = &x->e_mbd;
6260 MB_MODE_INFO *const mbmi = xd->mi[0];
6261 unsigned char segment_id = mbmi->segment_id;
6262 const int comp_pred = 0;
6263 int i;
6264 unsigned int ref_costs_single[REF_FRAMES];
6265 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6266 const ModeCosts *mode_costs = &x->mode_costs;
6267 const int *comp_inter_cost =
6268 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6269 InterpFilter best_filter = SWITCHABLE;
6270 int64_t this_rd = INT64_MAX;
6271 int rate2 = 0;
6272 const int64_t distortion2 = 0;
6273 (void)mi_row;
6274 (void)mi_col;
6275 (void)tile_data;
6276
6277 av1_collect_neighbors_ref_counts(xd);
6278
6279 estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6280 ref_costs_comp);
6281
6282 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6283 for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6284
6285 rd_cost->rate = INT_MAX;
6286
6287 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6288
6289 mbmi->palette_mode_info.palette_size[0] = 0;
6290 mbmi->palette_mode_info.palette_size[1] = 0;
6291 mbmi->filter_intra_mode_info.use_filter_intra = 0;
6292 mbmi->mode = GLOBALMV;
6293 mbmi->motion_mode = SIMPLE_TRANSLATION;
6294 mbmi->uv_mode = UV_DC_PRED;
6295 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6296 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6297 else
6298 mbmi->ref_frame[0] = LAST_FRAME;
6299 mbmi->ref_frame[1] = NONE_FRAME;
6300 mbmi->mv[0].as_int =
6301 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6302 features->allow_high_precision_mv, bsize, mi_col,
6303 mi_row, features->cur_frame_force_integer_mv)
6304 .as_int;
6305 mbmi->tx_size = max_txsize_lookup[bsize];
6306 x->txfm_search_info.skip_txfm = 1;
6307
6308 mbmi->ref_mv_idx = 0;
6309
6310 mbmi->motion_mode = SIMPLE_TRANSLATION;
6311 av1_count_overlappable_neighbors(cm, xd);
6312 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6313 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6314 mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6315 // Select the samples according to motion vector difference
6316 if (mbmi->num_proj_ref > 1) {
6317 mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6318 mbmi->num_proj_ref, bsize);
6319 }
6320 }
6321
6322 const InterpFilter interp_filter = features->interp_filter;
6323 set_default_interp_filters(mbmi, interp_filter);
6324
6325 if (interp_filter != SWITCHABLE) {
6326 best_filter = interp_filter;
6327 } else {
6328 best_filter = EIGHTTAP_REGULAR;
6329 if (av1_is_interp_needed(xd)) {
6330 int rs;
6331 int best_rs = INT_MAX;
6332 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6333 mbmi->interp_filters = av1_broadcast_interp_filter(i);
6334 rs = av1_get_switchable_rate(x, xd, interp_filter,
6335 cm->seq_params->enable_dual_filter);
6336 if (rs < best_rs) {
6337 best_rs = rs;
6338 best_filter = mbmi->interp_filters.as_filters.y_filter;
6339 }
6340 }
6341 }
6342 }
6343 // Set the appropriate filter
6344 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6345 rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6346 cm->seq_params->enable_dual_filter);
6347
6348 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6349 rate2 += comp_inter_cost[comp_pred];
6350
6351 // Estimate the reference frame signaling cost and add it
6352 // to the rolling cost variable.
6353 rate2 += ref_costs_single[LAST_FRAME];
6354 this_rd = RDCOST(x->rdmult, rate2, distortion2);
6355
6356 rd_cost->rate = rate2;
6357 rd_cost->dist = distortion2;
6358 rd_cost->rdcost = this_rd;
6359
6360 if (this_rd >= best_rd_so_far) {
6361 rd_cost->rate = INT_MAX;
6362 rd_cost->rdcost = INT64_MAX;
6363 return;
6364 }
6365
6366 assert((interp_filter == SWITCHABLE) ||
6367 (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6368
6369 if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6370 av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6371 cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6372 THR_GLOBALMV, THR_INTER_MODE_START,
6373 THR_INTER_MODE_END, THR_DC, MAX_MODES);
6374 }
6375
6376 #if CONFIG_INTERNAL_STATS
6377 store_coding_context(x, ctx, THR_GLOBALMV, 0);
6378 #else
6379 store_coding_context(x, ctx, 0);
6380 #endif // CONFIG_INTERNAL_STATS
6381 }
6382
6383 /*!\cond */
6384 struct calc_target_weighted_pred_ctxt {
6385 const OBMCBuffer *obmc_buffer;
6386 const uint8_t *tmp;
6387 int tmp_stride;
6388 int overlap;
6389 };
6390 /*!\endcond */
6391
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6392 static inline void calc_target_weighted_pred_above(
6393 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6394 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6395 (void)nb_mi;
6396 (void)num_planes;
6397 (void)rel_mi_row;
6398 (void)dir;
6399
6400 struct calc_target_weighted_pred_ctxt *ctxt =
6401 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6402
6403 const int bw = xd->width << MI_SIZE_LOG2;
6404 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6405
6406 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6407 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6408 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6409 const int is_hbd = is_cur_buf_hbd(xd);
6410
6411 if (!is_hbd) {
6412 for (int row = 0; row < ctxt->overlap; ++row) {
6413 const uint8_t m0 = mask1d[row];
6414 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6415 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6416 wsrc[col] = m1 * tmp[col];
6417 mask[col] = m0;
6418 }
6419 wsrc += bw;
6420 mask += bw;
6421 tmp += ctxt->tmp_stride;
6422 }
6423 } else {
6424 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6425
6426 for (int row = 0; row < ctxt->overlap; ++row) {
6427 const uint8_t m0 = mask1d[row];
6428 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6429 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6430 wsrc[col] = m1 * tmp16[col];
6431 mask[col] = m0;
6432 }
6433 wsrc += bw;
6434 mask += bw;
6435 tmp16 += ctxt->tmp_stride;
6436 }
6437 }
6438 }
6439
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6440 static inline void calc_target_weighted_pred_left(
6441 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6442 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6443 (void)nb_mi;
6444 (void)num_planes;
6445 (void)rel_mi_col;
6446 (void)dir;
6447
6448 struct calc_target_weighted_pred_ctxt *ctxt =
6449 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6450
6451 const int bw = xd->width << MI_SIZE_LOG2;
6452 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6453
6454 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6455 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6456 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6457 const int is_hbd = is_cur_buf_hbd(xd);
6458
6459 if (!is_hbd) {
6460 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6461 for (int col = 0; col < ctxt->overlap; ++col) {
6462 const uint8_t m0 = mask1d[col];
6463 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6464 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6465 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6466 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6467 }
6468 wsrc += bw;
6469 mask += bw;
6470 tmp += ctxt->tmp_stride;
6471 }
6472 } else {
6473 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6474
6475 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6476 for (int col = 0; col < ctxt->overlap; ++col) {
6477 const uint8_t m0 = mask1d[col];
6478 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6479 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6480 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6481 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6482 }
6483 wsrc += bw;
6484 mask += bw;
6485 tmp16 += ctxt->tmp_stride;
6486 }
6487 }
6488 }
6489
6490 // This function has a structure similar to av1_build_obmc_inter_prediction
6491 //
6492 // The OBMC predictor is computed as:
6493 //
6494 // PObmc(x,y) =
6495 // AOM_BLEND_A64(Mh(x),
6496 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6497 // PLeft(x, y))
6498 //
6499 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6500 // rounding, this can be written as:
6501 //
6502 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6503 // Mh(x) * Mv(y) * P(x,y) +
6504 // Mh(x) * Cv(y) * Pabove(x,y) +
6505 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6506 //
6507 // Where :
6508 //
6509 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6510 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6511 //
6512 // This function computes 'wsrc' and 'mask' as:
6513 //
6514 // wsrc(x, y) =
6515 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6516 // Mh(x) * Cv(y) * Pabove(x,y) +
6517 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6518 //
6519 // mask(x, y) = Mh(x) * Mv(y)
6520 //
6521 // These can then be used to efficiently approximate the error for any
6522 // predictor P in the context of the provided neighbouring predictors by
6523 // computing:
6524 //
6525 // error(x, y) =
6526 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6527 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6528 static inline void calc_target_weighted_pred(
6529 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6530 const uint8_t *above, int above_stride, const uint8_t *left,
6531 int left_stride) {
6532 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6533 const int bw = xd->width << MI_SIZE_LOG2;
6534 const int bh = xd->height << MI_SIZE_LOG2;
6535 const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6536 int32_t *mask_buf = obmc_buffer->mask;
6537 int32_t *wsrc_buf = obmc_buffer->wsrc;
6538
6539 const int is_hbd = is_cur_buf_hbd(xd);
6540 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6541
6542 // plane 0 should not be sub-sampled
6543 assert(xd->plane[0].subsampling_x == 0);
6544 assert(xd->plane[0].subsampling_y == 0);
6545
6546 av1_zero_array(wsrc_buf, bw * bh);
6547 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6548
6549 // handle above row
6550 if (xd->up_available) {
6551 const int overlap =
6552 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6553 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6554 above_stride, overlap };
6555 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6556 max_neighbor_obmc[mi_size_wide_log2[bsize]],
6557 calc_target_weighted_pred_above, &ctxt);
6558 }
6559
6560 for (int i = 0; i < bw * bh; ++i) {
6561 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6562 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6563 }
6564
6565 // handle left column
6566 if (xd->left_available) {
6567 const int overlap =
6568 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6569 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6570 left_stride, overlap };
6571 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6572 max_neighbor_obmc[mi_size_high_log2[bsize]],
6573 calc_target_weighted_pred_left, &ctxt);
6574 }
6575
6576 if (!is_hbd) {
6577 const uint8_t *src = x->plane[0].src.buf;
6578
6579 for (int row = 0; row < bh; ++row) {
6580 for (int col = 0; col < bw; ++col) {
6581 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6582 }
6583 wsrc_buf += bw;
6584 src += x->plane[0].src.stride;
6585 }
6586 } else {
6587 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6588
6589 for (int row = 0; row < bh; ++row) {
6590 for (int col = 0; col < bw; ++col) {
6591 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6592 }
6593 wsrc_buf += bw;
6594 src += x->plane[0].src.stride;
6595 }
6596 }
6597 }
6598