1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72 #include "av1/encoder/var_based_part.h"
73
74 #define LAST_NEW_MV_INDEX 6
75
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81 10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82 9975, 9951, 9927, 9903, 9879, 9854, 9830, 9806, 9782, 9758, 9734,
83 9710, 9686, 9662, 9638, 9614, 9589, 9565, 9541, 9517, 9493, 9469,
84 9445, 9421, 9397, 9373, 9349, 9324, 9300, 9276, 9252, 9228, 9204,
85 9180, 9156, 9132, 9108, 9083, 9059, 9035, 9011, 8987, 8963, 8939,
86 8915, 8891, 8867, 8843, 8818, 8794, 8770, 8746, 8722, 8698, 8674,
87 8650, 8626, 8602, 8578, 8553, 8529, 8505, 8481, 8457, 8433, 8409,
88 8385, 8361, 8337, 8312, 8288, 8264, 8240, 8216, 8192, 8168, 8144,
89 8120, 8096, 8072, 8047, 8023, 7999, 7975, 7951, 7927, 7903, 7879,
90 7855, 7831, 7806, 7782, 7758, 7734, 7710, 7686, 7662, 7638, 7614,
91 7590, 7566, 7541, 7517, 7493, 7469, 7445, 7421, 7397, 7373, 7349,
92 7325, 7301, 7276, 7252, 7228, 7204, 7180, 7156, 7132, 7108, 7084,
93 7060, 7035, 7011, 6987, 6963, 6939, 6915, 6891, 6867, 6843, 6819,
94 6795, 6770, 6746, 6722, 6698, 6674, 6650, 6626, 6602, 6578, 6554,
95 6530, 6505, 6481, 6457, 6433, 6409, 6385, 6361, 6337, 6313, 6289,
96 6264, 6240, 6216, 6192, 6168, 6144, 6120, 6096, 6072, 6048, 6024,
97 5999, 5975, 5951, 5927, 5903, 5879, 5855, 5831, 5807, 5783, 5758,
98 5734, 5710, 5686, 5662, 5638, 5614, 5590, 5566, 5542, 5518, 5493,
99 5469, 5445, 5421, 5397, 5373, 5349, 5325, 5301, 5277, 5253, 5228,
100 5204, 5180, 5156, 5132, 5108, 5084, 5060, 5036, 5012, 4987, 4963,
101 4939, 4915, 4891, 4867, 4843, 4819, 4795, 4771, 4747, 4722, 4698,
102 4674, 4650, 4626, 4602, 4578, 4554, 4530, 4506, 4482, 4457, 4433,
103 4409, 4385, 4361, 4337, 4313, 4289, 4265, 4241, 4216, 4192, 4168,
104 4144, 4120, 4096
105 };
106
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108 THR_NEARESTMV,
109 THR_NEARESTL2,
110 THR_NEARESTL3,
111 THR_NEARESTB,
112 THR_NEARESTA2,
113 THR_NEARESTA,
114 THR_NEARESTG,
115
116 THR_NEWMV,
117 THR_NEWL2,
118 THR_NEWL3,
119 THR_NEWB,
120 THR_NEWA2,
121 THR_NEWA,
122 THR_NEWG,
123
124 THR_NEARMV,
125 THR_NEARL2,
126 THR_NEARL3,
127 THR_NEARB,
128 THR_NEARA2,
129 THR_NEARA,
130 THR_NEARG,
131
132 THR_GLOBALMV,
133 THR_GLOBALL2,
134 THR_GLOBALL3,
135 THR_GLOBALB,
136 THR_GLOBALA2,
137 THR_GLOBALA,
138 THR_GLOBALG,
139
140 THR_COMP_NEAREST_NEARESTLA,
141 THR_COMP_NEAREST_NEARESTL2A,
142 THR_COMP_NEAREST_NEARESTL3A,
143 THR_COMP_NEAREST_NEARESTGA,
144 THR_COMP_NEAREST_NEARESTLB,
145 THR_COMP_NEAREST_NEARESTL2B,
146 THR_COMP_NEAREST_NEARESTL3B,
147 THR_COMP_NEAREST_NEARESTGB,
148 THR_COMP_NEAREST_NEARESTLA2,
149 THR_COMP_NEAREST_NEARESTL2A2,
150 THR_COMP_NEAREST_NEARESTL3A2,
151 THR_COMP_NEAREST_NEARESTGA2,
152 THR_COMP_NEAREST_NEARESTLL2,
153 THR_COMP_NEAREST_NEARESTLL3,
154 THR_COMP_NEAREST_NEARESTLG,
155 THR_COMP_NEAREST_NEARESTBA,
156
157 THR_COMP_NEAR_NEARLB,
158 THR_COMP_NEW_NEWLB,
159 THR_COMP_NEW_NEARESTLB,
160 THR_COMP_NEAREST_NEWLB,
161 THR_COMP_NEW_NEARLB,
162 THR_COMP_NEAR_NEWLB,
163 THR_COMP_GLOBAL_GLOBALLB,
164
165 THR_COMP_NEAR_NEARLA,
166 THR_COMP_NEW_NEWLA,
167 THR_COMP_NEW_NEARESTLA,
168 THR_COMP_NEAREST_NEWLA,
169 THR_COMP_NEW_NEARLA,
170 THR_COMP_NEAR_NEWLA,
171 THR_COMP_GLOBAL_GLOBALLA,
172
173 THR_COMP_NEAR_NEARL2A,
174 THR_COMP_NEW_NEWL2A,
175 THR_COMP_NEW_NEARESTL2A,
176 THR_COMP_NEAREST_NEWL2A,
177 THR_COMP_NEW_NEARL2A,
178 THR_COMP_NEAR_NEWL2A,
179 THR_COMP_GLOBAL_GLOBALL2A,
180
181 THR_COMP_NEAR_NEARL3A,
182 THR_COMP_NEW_NEWL3A,
183 THR_COMP_NEW_NEARESTL3A,
184 THR_COMP_NEAREST_NEWL3A,
185 THR_COMP_NEW_NEARL3A,
186 THR_COMP_NEAR_NEWL3A,
187 THR_COMP_GLOBAL_GLOBALL3A,
188
189 THR_COMP_NEAR_NEARGA,
190 THR_COMP_NEW_NEWGA,
191 THR_COMP_NEW_NEARESTGA,
192 THR_COMP_NEAREST_NEWGA,
193 THR_COMP_NEW_NEARGA,
194 THR_COMP_NEAR_NEWGA,
195 THR_COMP_GLOBAL_GLOBALGA,
196
197 THR_COMP_NEAR_NEARL2B,
198 THR_COMP_NEW_NEWL2B,
199 THR_COMP_NEW_NEARESTL2B,
200 THR_COMP_NEAREST_NEWL2B,
201 THR_COMP_NEW_NEARL2B,
202 THR_COMP_NEAR_NEWL2B,
203 THR_COMP_GLOBAL_GLOBALL2B,
204
205 THR_COMP_NEAR_NEARL3B,
206 THR_COMP_NEW_NEWL3B,
207 THR_COMP_NEW_NEARESTL3B,
208 THR_COMP_NEAREST_NEWL3B,
209 THR_COMP_NEW_NEARL3B,
210 THR_COMP_NEAR_NEWL3B,
211 THR_COMP_GLOBAL_GLOBALL3B,
212
213 THR_COMP_NEAR_NEARGB,
214 THR_COMP_NEW_NEWGB,
215 THR_COMP_NEW_NEARESTGB,
216 THR_COMP_NEAREST_NEWGB,
217 THR_COMP_NEW_NEARGB,
218 THR_COMP_NEAR_NEWGB,
219 THR_COMP_GLOBAL_GLOBALGB,
220
221 THR_COMP_NEAR_NEARLA2,
222 THR_COMP_NEW_NEWLA2,
223 THR_COMP_NEW_NEARESTLA2,
224 THR_COMP_NEAREST_NEWLA2,
225 THR_COMP_NEW_NEARLA2,
226 THR_COMP_NEAR_NEWLA2,
227 THR_COMP_GLOBAL_GLOBALLA2,
228
229 THR_COMP_NEAR_NEARL2A2,
230 THR_COMP_NEW_NEWL2A2,
231 THR_COMP_NEW_NEARESTL2A2,
232 THR_COMP_NEAREST_NEWL2A2,
233 THR_COMP_NEW_NEARL2A2,
234 THR_COMP_NEAR_NEWL2A2,
235 THR_COMP_GLOBAL_GLOBALL2A2,
236
237 THR_COMP_NEAR_NEARL3A2,
238 THR_COMP_NEW_NEWL3A2,
239 THR_COMP_NEW_NEARESTL3A2,
240 THR_COMP_NEAREST_NEWL3A2,
241 THR_COMP_NEW_NEARL3A2,
242 THR_COMP_NEAR_NEWL3A2,
243 THR_COMP_GLOBAL_GLOBALL3A2,
244
245 THR_COMP_NEAR_NEARGA2,
246 THR_COMP_NEW_NEWGA2,
247 THR_COMP_NEW_NEARESTGA2,
248 THR_COMP_NEAREST_NEWGA2,
249 THR_COMP_NEW_NEARGA2,
250 THR_COMP_NEAR_NEWGA2,
251 THR_COMP_GLOBAL_GLOBALGA2,
252
253 THR_COMP_NEAR_NEARLL2,
254 THR_COMP_NEW_NEWLL2,
255 THR_COMP_NEW_NEARESTLL2,
256 THR_COMP_NEAREST_NEWLL2,
257 THR_COMP_NEW_NEARLL2,
258 THR_COMP_NEAR_NEWLL2,
259 THR_COMP_GLOBAL_GLOBALLL2,
260
261 THR_COMP_NEAR_NEARLL3,
262 THR_COMP_NEW_NEWLL3,
263 THR_COMP_NEW_NEARESTLL3,
264 THR_COMP_NEAREST_NEWLL3,
265 THR_COMP_NEW_NEARLL3,
266 THR_COMP_NEAR_NEWLL3,
267 THR_COMP_GLOBAL_GLOBALLL3,
268
269 THR_COMP_NEAR_NEARLG,
270 THR_COMP_NEW_NEWLG,
271 THR_COMP_NEW_NEARESTLG,
272 THR_COMP_NEAREST_NEWLG,
273 THR_COMP_NEW_NEARLG,
274 THR_COMP_NEAR_NEWLG,
275 THR_COMP_GLOBAL_GLOBALLG,
276
277 THR_COMP_NEAR_NEARBA,
278 THR_COMP_NEW_NEWBA,
279 THR_COMP_NEW_NEARESTBA,
280 THR_COMP_NEAREST_NEWBA,
281 THR_COMP_NEW_NEARBA,
282 THR_COMP_NEAR_NEWBA,
283 THR_COMP_GLOBAL_GLOBALBA,
284
285 THR_DC,
286 THR_PAETH,
287 THR_SMOOTH,
288 THR_SMOOTH_V,
289 THR_SMOOTH_H,
290 THR_H_PRED,
291 THR_V_PRED,
292 THR_D135_PRED,
293 THR_D203_PRED,
294 THR_D157_PRED,
295 THR_D67_PRED,
296 THR_D113_PRED,
297 THR_D45_PRED,
298 };
299
300 /*!\cond */
301 typedef struct SingleInterModeState {
302 int64_t rd;
303 MV_REFERENCE_FRAME ref_frame;
304 int valid;
305 } SingleInterModeState;
306
307 typedef struct InterModeSearchState {
308 int64_t best_rd;
309 int64_t best_skip_rd[2];
310 MB_MODE_INFO best_mbmode;
311 int best_rate_y;
312 int best_rate_uv;
313 int best_mode_skippable;
314 int best_skip2;
315 THR_MODES best_mode_index;
316 int num_available_refs;
317 int64_t dist_refs[REF_FRAMES];
318 int dist_order_refs[REF_FRAMES];
319 int64_t mode_threshold[MAX_MODES];
320 int64_t best_intra_rd;
321 unsigned int best_pred_sse;
322
323 /*!
324 * \brief Keep track of best intra rd for use in compound mode.
325 */
326 int64_t best_pred_rd[REFERENCE_MODES];
327 // Save a set of single_newmv for each checked ref_mv.
328 int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329 int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330 int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331 int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332 // The rd of simple translation in single inter modes
333 int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334 int64_t best_single_rd[REF_FRAMES];
335 PREDICTION_MODE best_single_mode[REF_FRAMES];
336
337 // Single search results by [directions][modes][reference frames]
338 SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339 int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340 SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341 [FWD_REFS];
342 int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343 MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344 IntraModeSearchState intra_search_state;
345 RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350 for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351 InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352 md->ready = 0;
353 md->num = 0;
354 md->dist_sum = 0;
355 md->ld_sum = 0;
356 md->sse_sum = 0;
357 md->sse_sse_sum = 0;
358 md->sse_ld_sum = 0;
359 }
360 }
361
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363 int64_t sse, int *est_residue_cost,
364 int64_t *est_dist) {
365 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366 if (md->ready) {
367 if (sse < md->dist_mean) {
368 *est_residue_cost = 0;
369 *est_dist = sse;
370 } else {
371 *est_dist = (int64_t)round(md->dist_mean);
372 const double est_ld = md->a * sse + md->b;
373 // Clamp estimated rate cost by INT_MAX / 2.
374 // TODO(angiebird@google.com): find better solution than clamping.
375 if (fabs(est_ld) < 1e-2) {
376 *est_residue_cost = INT_MAX / 2;
377 } else {
378 double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379 if (est_residue_cost_dbl < 0) {
380 *est_residue_cost = 0;
381 } else {
382 *est_residue_cost =
383 (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384 }
385 }
386 if (*est_residue_cost <= 0) {
387 *est_residue_cost = 0;
388 *est_dist = sse;
389 }
390 }
391 return 1;
392 }
393 return 0;
394 }
395
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398 const int block_idx = inter_mode_data_block_idx(bsize);
399 InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400 if (block_idx == -1) continue;
401 if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402 continue;
403 } else {
404 if (md->ready == 0) {
405 md->dist_mean = md->dist_sum / md->num;
406 md->ld_mean = md->ld_sum / md->num;
407 md->sse_mean = md->sse_sum / md->num;
408 md->sse_sse_mean = md->sse_sse_sum / md->num;
409 md->sse_ld_mean = md->sse_ld_sum / md->num;
410 } else {
411 const double factor = 3;
412 md->dist_mean =
413 (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414 md->ld_mean =
415 (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416 md->sse_mean =
417 (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418 md->sse_sse_mean =
419 (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420 (factor + 1);
421 md->sse_ld_mean =
422 (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423 (factor + 1);
424 }
425
426 const double my = md->ld_mean;
427 const double mx = md->sse_mean;
428 const double dx = sqrt(md->sse_sse_mean);
429 const double dxy = md->sse_ld_mean;
430
431 md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432 md->b = my - md->a * mx;
433 md->ready = 1;
434
435 md->num = 0;
436 md->dist_sum = 0;
437 md->ld_sum = 0;
438 md->sse_sum = 0;
439 md->sse_sse_sum = 0;
440 md->sse_ld_sum = 0;
441 }
442 (void)rdmult;
443 }
444 }
445
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
447 BLOCK_SIZE bsize, int64_t sse,
448 int64_t dist, int residue_cost) {
449 if (residue_cost == 0 || sse == dist) return;
450 const int block_idx = inter_mode_data_block_idx(bsize);
451 if (block_idx == -1) return;
452 InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453 if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454 const double ld = (sse - dist) * 1. / residue_cost;
455 ++rd_model->num;
456 rd_model->dist_sum += dist;
457 rd_model->ld_sum += ld;
458 rd_model->sse_sum += sse;
459 rd_model->sse_sse_sum += (double)sse * (double)sse;
460 rd_model->sse_ld_sum += sse * ld;
461 }
462 }
463
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
465 int mode_rate, int64_t sse,
466 int64_t rd, RD_STATS *rd_cost,
467 RD_STATS *rd_cost_y,
468 RD_STATS *rd_cost_uv,
469 const MB_MODE_INFO *mbmi) {
470 const int num = inter_modes_info->num;
471 assert(num < MAX_INTER_MODES);
472 inter_modes_info->mbmi_arr[num] = *mbmi;
473 inter_modes_info->mode_rate_arr[num] = mode_rate;
474 inter_modes_info->sse_arr[num] = sse;
475 inter_modes_info->est_rd_arr[num] = rd;
476 inter_modes_info->rd_cost_arr[num] = *rd_cost;
477 inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
478 inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
479 ++inter_modes_info->num;
480 }
481
compare_rd_idx_pair(const void * a,const void * b)482 static int compare_rd_idx_pair(const void *a, const void *b) {
483 if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
484 // To avoid inconsistency in qsort() ordering when two elements are equal,
485 // using idx as tie breaker. Refer aomedia:2928
486 if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
487 return 0;
488 else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
489 return 1;
490 else
491 return -1;
492 } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
493 return 1;
494 } else {
495 return -1;
496 }
497 }
498
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)499 static AOM_INLINE void inter_modes_info_sort(
500 const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
501 if (inter_modes_info->num == 0) {
502 return;
503 }
504 for (int i = 0; i < inter_modes_info->num; ++i) {
505 rd_idx_pair_arr[i].idx = i;
506 rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
507 }
508 qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
509 compare_rd_idx_pair);
510 }
511
512 // Similar to get_horver_correlation, but also takes into account first
513 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)514 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
515 int width, int height, float *hcorr,
516 float *vcorr) {
517 // The following notation is used:
518 // x - current pixel
519 // y - left neighbor pixel
520 // z - top neighbor pixel
521 int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
522 int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
523 int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
524
525 // First, process horizontal correlation on just the first row
526 x_sum += diff[0];
527 x2_sum += diff[0] * diff[0];
528 x_firstrow += diff[0];
529 x2_firstrow += diff[0] * diff[0];
530 for (int j = 1; j < width; ++j) {
531 const int16_t x = diff[j];
532 const int16_t y = diff[j - 1];
533 x_sum += x;
534 x_firstrow += x;
535 x2_sum += x * x;
536 x2_firstrow += x * x;
537 xy_sum += x * y;
538 }
539
540 // Process vertical correlation in the first column
541 x_firstcol += diff[0];
542 x2_firstcol += diff[0] * diff[0];
543 for (int i = 1; i < height; ++i) {
544 const int16_t x = diff[i * stride];
545 const int16_t z = diff[(i - 1) * stride];
546 x_sum += x;
547 x_firstcol += x;
548 x2_sum += x * x;
549 x2_firstcol += x * x;
550 xz_sum += x * z;
551 }
552
553 // Now process horiz and vert correlation through the rest unit
554 for (int i = 1; i < height; ++i) {
555 for (int j = 1; j < width; ++j) {
556 const int16_t x = diff[i * stride + j];
557 const int16_t y = diff[i * stride + j - 1];
558 const int16_t z = diff[(i - 1) * stride + j];
559 x_sum += x;
560 x2_sum += x * x;
561 xy_sum += x * y;
562 xz_sum += x * z;
563 }
564 }
565
566 for (int j = 0; j < width; ++j) {
567 x_finalrow += diff[(height - 1) * stride + j];
568 x2_finalrow +=
569 diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
570 }
571 for (int i = 0; i < height; ++i) {
572 x_finalcol += diff[i * stride + width - 1];
573 x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
574 }
575
576 int64_t xhor_sum = x_sum - x_finalcol;
577 int64_t xver_sum = x_sum - x_finalrow;
578 int64_t y_sum = x_sum - x_firstcol;
579 int64_t z_sum = x_sum - x_firstrow;
580 int64_t x2hor_sum = x2_sum - x2_finalcol;
581 int64_t x2ver_sum = x2_sum - x2_finalrow;
582 int64_t y2_sum = x2_sum - x2_firstcol;
583 int64_t z2_sum = x2_sum - x2_firstrow;
584
585 const float num_hor = (float)(height * (width - 1));
586 const float num_ver = (float)((height - 1) * width);
587
588 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
589 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
590
591 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
592 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
593
594 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
595 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
596
597 if (xhor_var_n > 0 && y_var_n > 0) {
598 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
599 *hcorr = *hcorr < 0 ? 0 : *hcorr;
600 } else {
601 *hcorr = 1.0;
602 }
603 if (xver_var_n > 0 && z_var_n > 0) {
604 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
605 *vcorr = *vcorr < 0 ? 0 : *vcorr;
606 } else {
607 *vcorr = 1.0;
608 }
609 }
610
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)611 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
612 int64_t *sse_y) {
613 const AV1_COMMON *cm = &cpi->common;
614 const int num_planes = av1_num_planes(cm);
615 const MACROBLOCKD *xd = &x->e_mbd;
616 const MB_MODE_INFO *mbmi = xd->mi[0];
617 int64_t total_sse = 0;
618 for (int plane = 0; plane < num_planes; ++plane) {
619 if (plane && !xd->is_chroma_ref) break;
620 const struct macroblock_plane *const p = &x->plane[plane];
621 const struct macroblockd_plane *const pd = &xd->plane[plane];
622 const BLOCK_SIZE bs =
623 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
624 unsigned int sse;
625
626 cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
627 pd->dst.stride, &sse);
628 total_sse += sse;
629 if (!plane && sse_y) *sse_y = sse;
630 }
631 total_sse <<= 4;
632 return total_sse;
633 }
634
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)635 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
636 intptr_t block_size, int64_t *ssz) {
637 int i;
638 int64_t error = 0, sqcoeff = 0;
639
640 for (i = 0; i < block_size; i++) {
641 const int diff = coeff[i] - dqcoeff[i];
642 error += diff * diff;
643 sqcoeff += coeff[i] * coeff[i];
644 }
645
646 *ssz = sqcoeff;
647 return error;
648 }
649
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)650 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
651 intptr_t block_size) {
652 int64_t error = 0;
653
654 for (int i = 0; i < block_size; i++) {
655 const int diff = coeff[i] - dqcoeff[i];
656 error += diff * diff;
657 }
658
659 return error;
660 }
661
662 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)663 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
664 const tran_low_t *dqcoeff, intptr_t block_size,
665 int64_t *ssz, int bd) {
666 int i;
667 int64_t error = 0, sqcoeff = 0;
668 int shift = 2 * (bd - 8);
669 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
670
671 for (i = 0; i < block_size; i++) {
672 const int64_t diff = coeff[i] - dqcoeff[i];
673 error += diff * diff;
674 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
675 }
676 assert(error >= 0 && sqcoeff >= 0);
677 error = (error + rounding) >> shift;
678 sqcoeff = (sqcoeff + rounding) >> shift;
679
680 *ssz = sqcoeff;
681 return error;
682 }
683 #endif
684
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)685 static int conditional_skipintra(PREDICTION_MODE mode,
686 PREDICTION_MODE best_intra_mode) {
687 if (mode == D113_PRED && best_intra_mode != V_PRED &&
688 best_intra_mode != D135_PRED)
689 return 1;
690 if (mode == D67_PRED && best_intra_mode != V_PRED &&
691 best_intra_mode != D45_PRED)
692 return 1;
693 if (mode == D203_PRED && best_intra_mode != H_PRED &&
694 best_intra_mode != D45_PRED)
695 return 1;
696 if (mode == D157_PRED && best_intra_mode != H_PRED &&
697 best_intra_mode != D135_PRED)
698 return 1;
699 return 0;
700 }
701
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)702 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
703 int16_t mode_context) {
704 if (is_inter_compound_mode(mode)) {
705 return mode_costs
706 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
707 }
708
709 int mode_cost = 0;
710 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
711
712 assert(is_inter_mode(mode));
713
714 if (mode == NEWMV) {
715 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
716 return mode_cost;
717 } else {
718 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
719 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
720
721 if (mode == GLOBALMV) {
722 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
723 return mode_cost;
724 } else {
725 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
726 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
727 mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
728 return mode_cost;
729 }
730 }
731 }
732
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)733 static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
734 int ref_idx) {
735 return ref_idx ? compound_ref1_mode(this_mode)
736 : compound_ref0_mode(this_mode);
737 }
738
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])739 static AOM_INLINE void estimate_ref_frame_costs(
740 const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
741 int segment_id, unsigned int *ref_costs_single,
742 unsigned int (*ref_costs_comp)[REF_FRAMES]) {
743 int seg_ref_active =
744 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
745 if (seg_ref_active) {
746 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
747 int ref_frame;
748 for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
749 memset(ref_costs_comp[ref_frame], 0,
750 REF_FRAMES * sizeof((*ref_costs_comp)[0]));
751 } else {
752 int intra_inter_ctx = av1_get_intra_inter_context(xd);
753 ref_costs_single[INTRA_FRAME] =
754 mode_costs->intra_inter_cost[intra_inter_ctx][0];
755 unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
756
757 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
758 ref_costs_single[i] = base_cost;
759
760 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
761 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
762 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
763 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
764 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
765 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
766
767 // Determine cost of a single ref frame, where frame types are represented
768 // by a tree:
769 // Level 0: add cost whether this ref is a forward or backward ref
770 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
773 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
774 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
775 ref_costs_single[ALTREF2_FRAME] +=
776 mode_costs->single_ref_cost[ctx_p1][0][1];
777 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
778
779 // Level 1: if this ref is forward ref,
780 // add cost whether it is last/last2 or last3/golden
781 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
782 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
783 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
784 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
785
786 // Level 1: if this ref is backward ref
787 // then add cost whether this ref is altref or backward ref
788 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
789 ref_costs_single[ALTREF2_FRAME] +=
790 mode_costs->single_ref_cost[ctx_p2][1][0];
791 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
792
793 // Level 2: further add cost whether this ref is last or last2
794 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
795 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
796
797 // Level 2: last3 or golden
798 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
799 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
800
801 // Level 2: bwdref or altref2
802 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
803 ref_costs_single[ALTREF2_FRAME] +=
804 mode_costs->single_ref_cost[ctx_p6][5][1];
805
806 if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
807 // Similar to single ref, determine cost of compound ref frames.
808 // cost_compound_refs = cost_first_ref + cost_second_ref
809 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
810 const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
811 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
812 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
813 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
814
815 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
816 unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
817
818 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
819 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
820 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
821 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
822 ref_bicomp_costs[ALTREF_FRAME] = 0;
823
824 // cost of first ref frame
825 ref_bicomp_costs[LAST_FRAME] +=
826 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827 ref_bicomp_costs[LAST2_FRAME] +=
828 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
829 ref_bicomp_costs[LAST3_FRAME] +=
830 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831 ref_bicomp_costs[GOLDEN_FRAME] +=
832 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
833
834 ref_bicomp_costs[LAST_FRAME] +=
835 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
836 ref_bicomp_costs[LAST2_FRAME] +=
837 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
838
839 ref_bicomp_costs[LAST3_FRAME] +=
840 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
841 ref_bicomp_costs[GOLDEN_FRAME] +=
842 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
843
844 // cost of second ref frame
845 ref_bicomp_costs[BWDREF_FRAME] +=
846 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847 ref_bicomp_costs[ALTREF2_FRAME] +=
848 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
849 ref_bicomp_costs[ALTREF_FRAME] +=
850 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
851
852 ref_bicomp_costs[BWDREF_FRAME] +=
853 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
854 ref_bicomp_costs[ALTREF2_FRAME] +=
855 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
856
857 // cost: if one ref frame is forward ref, the other ref is backward ref
858 int ref0, ref1;
859 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
860 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
861 ref_costs_comp[ref0][ref1] =
862 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
863 }
864 }
865
866 // cost: if both ref frames are the same side.
867 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
868 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
869 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
870 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
871 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
872 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
873 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
874 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
875 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
876 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
877 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
878 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
879 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
880 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
881 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
882 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
883 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
884 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
885 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
886 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
887 } else {
888 int ref0, ref1;
889 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
890 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
891 ref_costs_comp[ref0][ref1] = 512;
892 }
893 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
894 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
895 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
896 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
897 }
898 }
899 }
900
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int skippable)901 static AOM_INLINE void store_coding_context(
902 #if CONFIG_INTERNAL_STATS
903 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
904 #else
905 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
906 #endif // CONFIG_INTERNAL_STATS
907 int skippable) {
908 MACROBLOCKD *const xd = &x->e_mbd;
909
910 // Take a snapshot of the coding context so it can be
911 // restored if we decide to encode this way
912 ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
913 ctx->skippable = skippable;
914 #if CONFIG_INTERNAL_STATS
915 ctx->best_mode_index = mode_index;
916 #endif // CONFIG_INTERNAL_STATS
917 ctx->mic = *xd->mi[0];
918 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
919 av1_ref_frame_type(xd->mi[0]->ref_frame));
920 }
921
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])922 static AOM_INLINE void setup_buffer_ref_mvs_inter(
923 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
924 BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
925 const AV1_COMMON *cm = &cpi->common;
926 const int num_planes = av1_num_planes(cm);
927 const YV12_BUFFER_CONFIG *scaled_ref_frame =
928 av1_get_scaled_ref_frame(cpi, ref_frame);
929 MACROBLOCKD *const xd = &x->e_mbd;
930 MB_MODE_INFO *const mbmi = xd->mi[0];
931 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
932 const struct scale_factors *const sf =
933 get_ref_scale_factors_const(cm, ref_frame);
934 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
935 assert(yv12 != NULL);
936
937 if (scaled_ref_frame) {
938 // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
939 // support scaling.
940 av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
941 num_planes);
942 } else {
943 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
944 }
945
946 // Gets an initial list of candidate vectors from neighbours and orders them
947 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
948 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
949 mbmi_ext->mode_context);
950 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
951 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
952 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
953 // Further refinement that is encode side only to test the top few candidates
954 // in full and choose the best as the center point for subsequent searches.
955 // The current implementation doesn't support scaling.
956 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
957 ref_frame, block_size);
958
959 // Go back to unscaled reference.
960 if (scaled_ref_frame) {
961 // We had temporarily setup pred block based on scaled reference above. Go
962 // back to unscaled reference now, for subsequent use.
963 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
964 }
965 }
966
967 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
968 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
969
970 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)971 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
972 const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
973 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
974 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
975 xd->mb_to_bottom_edge +
976 RIGHT_BOTTOM_MARGIN };
977 clamp_mv(mv, &mv_limits);
978 }
979
980 /* If the current mode shares the same mv with other modes with higher cost,
981 * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)982 static int skip_repeated_mv(const AV1_COMMON *const cm,
983 const MACROBLOCK *const x,
984 PREDICTION_MODE this_mode,
985 const MV_REFERENCE_FRAME ref_frames[2],
986 InterModeSearchState *search_state) {
987 const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
988 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
989 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
990 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
991 PREDICTION_MODE compare_mode = MB_MODE_COUNT;
992 if (!is_comp_pred) {
993 if (this_mode == NEARMV) {
994 if (ref_mv_count == 0) {
995 // NEARMV has the same motion vector as NEARESTMV
996 compare_mode = NEARESTMV;
997 }
998 if (ref_mv_count == 1 &&
999 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1000 // NEARMV has the same motion vector as GLOBALMV
1001 compare_mode = GLOBALMV;
1002 }
1003 }
1004 if (this_mode == GLOBALMV) {
1005 if (ref_mv_count == 0 &&
1006 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1007 // GLOBALMV has the same motion vector as NEARESTMV
1008 compare_mode = NEARESTMV;
1009 }
1010 if (ref_mv_count == 1) {
1011 // GLOBALMV has the same motion vector as NEARMV
1012 compare_mode = NEARMV;
1013 }
1014 }
1015
1016 if (compare_mode != MB_MODE_COUNT) {
1017 // Use modelled_rd to check whether compare mode was searched
1018 if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1019 INT64_MAX) {
1020 const int16_t mode_ctx =
1021 av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1022 const int compare_cost =
1023 cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1024 const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1025
1026 // Only skip if the mode cost is larger than compare mode cost
1027 if (this_cost > compare_cost) {
1028 search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1029 search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1030 return 1;
1031 }
1032 }
1033 }
1034 }
1035 return 0;
1036 }
1037
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1038 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1039 const AV1_COMMON *cm,
1040 const MACROBLOCK *x) {
1041 const MACROBLOCKD *const xd = &x->e_mbd;
1042 *out_mv = in_mv;
1043 lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1044 cm->features.cur_frame_force_integer_mv);
1045 clamp_mv2(&out_mv->as_mv, xd);
1046 return av1_is_fullmv_in_range(&x->mv_limits,
1047 get_fullmv_from_mv(&out_mv->as_mv));
1048 }
1049
1050 // To use single newmv directly for compound modes, need to clamp the mv to the
1051 // valid mv range. Without this, encoder would generate out of range mv, and
1052 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1053 static INLINE void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1054 int ref_idx) {
1055 const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1056 SubpelMvLimits mv_limits;
1057
1058 av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1059 clamp_mv(&mv->as_mv, &mv_limits);
1060 }
1061
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1062 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1063 const BLOCK_SIZE bsize, int_mv *cur_mv,
1064 int *const rate_mv, HandleInterModeArgs *const args,
1065 inter_mode_info *mode_info) {
1066 MACROBLOCKD *const xd = &x->e_mbd;
1067 MB_MODE_INFO *const mbmi = xd->mi[0];
1068 const int is_comp_pred = has_second_ref(mbmi);
1069 const PREDICTION_MODE this_mode = mbmi->mode;
1070 const int refs[2] = { mbmi->ref_frame[0],
1071 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1072 const int ref_mv_idx = mbmi->ref_mv_idx;
1073
1074 if (is_comp_pred) {
1075 const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1076 const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1077 if (this_mode == NEW_NEWMV) {
1078 if (valid_mv0) {
1079 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1080 clamp_mv_in_range(x, &cur_mv[0], 0);
1081 }
1082 if (valid_mv1) {
1083 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1084 clamp_mv_in_range(x, &cur_mv[1], 1);
1085 }
1086 *rate_mv = 0;
1087 for (int i = 0; i < 2; ++i) {
1088 const int_mv ref_mv = av1_get_ref_mv(x, i);
1089 *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1090 x->mv_costs->nmv_joint_cost,
1091 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1092 }
1093 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1094 if (valid_mv1) {
1095 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1096 clamp_mv_in_range(x, &cur_mv[1], 1);
1097 }
1098 const int_mv ref_mv = av1_get_ref_mv(x, 1);
1099 *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1100 x->mv_costs->nmv_joint_cost,
1101 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1102 } else {
1103 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1104 if (valid_mv0) {
1105 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1106 clamp_mv_in_range(x, &cur_mv[0], 0);
1107 }
1108 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1109 *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1110 x->mv_costs->nmv_joint_cost,
1111 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1112 }
1113 } else {
1114 // Single ref case.
1115 const int ref_idx = 0;
1116 int search_range = INT_MAX;
1117
1118 if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1119 const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1120 int min_mv_diff = INT_MAX;
1121 int best_match = -1;
1122 MV prev_ref_mv[2] = { { 0 } };
1123 for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1124 prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1125 idx, &x->mbmi_ext)
1126 .as_mv;
1127 const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1128 abs(ref_mv.col - prev_ref_mv[idx].col));
1129
1130 if (min_mv_diff > ref_mv_diff) {
1131 min_mv_diff = ref_mv_diff;
1132 best_match = idx;
1133 }
1134 }
1135
1136 if (min_mv_diff < (16 << 3)) {
1137 if (args->single_newmv_valid[best_match][refs[0]]) {
1138 search_range = min_mv_diff;
1139 search_range +=
1140 AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1141 prev_ref_mv[best_match].row),
1142 abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1143 prev_ref_mv[best_match].col));
1144 // Get full pixel search range.
1145 search_range = (search_range + 4) >> 3;
1146 }
1147 }
1148 }
1149
1150 int_mv best_mv;
1151 av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1152 mode_info, &best_mv, args);
1153 if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1154
1155 args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1156 args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1157 args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1158 cur_mv[0].as_int = best_mv.as_int;
1159
1160 // Return after single_newmv is set.
1161 if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1162 }
1163
1164 return 0;
1165 }
1166
update_mode_start_end_index(const AV1_COMP * const cpi,const MB_MODE_INFO * const mbmi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1167 static INLINE void update_mode_start_end_index(
1168 const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1169 int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1170 int interintra_allowed, int eval_motion_mode) {
1171 *mode_index_start = (int)SIMPLE_TRANSLATION;
1172 *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1173 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1174 if (!eval_motion_mode) {
1175 *mode_index_end = (int)SIMPLE_TRANSLATION;
1176 } else {
1177 // Set the start index appropriately to process motion modes other than
1178 // simple translation
1179 *mode_index_start = 1;
1180 }
1181 }
1182 if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1183 *mode_index_end = SIMPLE_TRANSLATION;
1184 }
1185
1186 /*!\brief AV1 motion mode search
1187 *
1188 * \ingroup inter_mode_search
1189 * Function to search over and determine the motion mode. It will update
1190 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1191 * WARPED_CAUSAL and determine any necessary side information for the selected
1192 * motion mode. It will also perform the full transform search, unless the
1193 * input parameter do_tx_search indicates to do an estimation of the RD rather
1194 * than an RD corresponding to a full transform search. It will return the
1195 * RD for the final motion_mode.
1196 * Do the RD search for a given inter mode and compute all information relevant
1197 * to the input mode. It will compute the best MV,
1198 * compound parameters (if the mode is a compound mode) and interpolation filter
1199 * parameters.
1200 *
1201 * \param[in] cpi Top-level encoder structure.
1202 * \param[in] tile_data Pointer to struct holding adaptive
1203 * data/contexts/models for the tile during
1204 * encoding.
1205 * \param[in] x Pointer to struct holding all the data for
1206 * the current macroblock.
1207 * \param[in] bsize Current block size.
1208 * \param[in,out] rd_stats Struct to keep track of the overall RD
1209 * information.
1210 * \param[in,out] rd_stats_y Struct to keep track of the RD information
1211 * for only the Y plane.
1212 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
1213 * for only the UV planes.
1214 * \param[in] args HandleInterModeArgs struct holding
1215 * miscellaneous arguments for inter mode
1216 * search. See the documentation for this
1217 * struct for a description of each member.
1218 * \param[in] ref_best_rd Best RD found so far for this block.
1219 * It is used for early termination of this
1220 * search if the RD exceeds this value.
1221 * \param[in,out] ref_skip_rd A length 2 array, where skip_rd[0] is the
1222 * best total RD for a skip mode so far, and
1223 * skip_rd[1] is the best RD for a skip mode so
1224 * far in luma. This is used as a speed feature
1225 * to skip the transform search if the computed
1226 * skip RD for the current mode is not better
1227 * than the best skip_rd so far.
1228 * \param[in,out] rate_mv The rate associated with the motion vectors.
1229 * This will be modified if a motion search is
1230 * done in the motion mode search.
1231 * \param[in,out] orig_dst A prediction buffer to hold a computed
1232 * prediction. This will eventually hold the
1233 * final prediction, and the tmp_dst info will
1234 * be copied here.
1235 * \param[in,out] best_est_rd Estimated RD for motion mode search if
1236 * do_tx_search (see below) is 0.
1237 * \param[in] do_tx_search Parameter to indicate whether or not to do
1238 * a full transform search. This will compute
1239 * an estimated RD for the modes without the
1240 * transform search and later perform the full
1241 * transform search on the best candidates.
1242 * \param[in] inter_modes_info InterModesInfo struct to hold inter mode
1243 * information to perform a full transform
1244 * search only on winning candidates searched
1245 * with an estimate for transform coding RD.
1246 * \param[in] eval_motion_mode Boolean whether or not to evaluate motion
1247 * motion modes other than SIMPLE_TRANSLATION.
1248 * \param[out] yrd Stores the rdcost corresponding to encoding
1249 * the luma plane.
1250 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1251 * current motion mode being tested should be skipped. It returns 0 if the
1252 * motion mode search is a success.
1253 */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1254 static int64_t motion_mode_rd(
1255 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1256 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1257 RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1258 int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1259 int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1260 int eval_motion_mode, int64_t *yrd) {
1261 const AV1_COMMON *const cm = &cpi->common;
1262 const FeatureFlags *const features = &cm->features;
1263 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1264 const int num_planes = av1_num_planes(cm);
1265 MACROBLOCKD *xd = &x->e_mbd;
1266 MB_MODE_INFO *mbmi = xd->mi[0];
1267 const int is_comp_pred = has_second_ref(mbmi);
1268 const PREDICTION_MODE this_mode = mbmi->mode;
1269 const int rate2_nocoeff = rd_stats->rate;
1270 int best_xskip_txfm = 0;
1271 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1272 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1273 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1274 const int rate_mv0 = *rate_mv;
1275 const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1276 is_interintra_allowed(mbmi) &&
1277 mbmi->compound_idx;
1278 WARP_SAMPLE_INFO *const warp_sample_info =
1279 &x->warp_sample_info[mbmi->ref_frame[0]];
1280 int *pts0 = warp_sample_info->pts;
1281 int *pts_inref0 = warp_sample_info->pts_inref;
1282
1283 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1284 const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1285 av1_invalid_rd_stats(&best_rd_stats);
1286 mbmi->num_proj_ref = 1; // assume num_proj_ref >=1
1287 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1288 *yrd = INT64_MAX;
1289 if (features->switchable_motion_mode) {
1290 // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1291 // is allowed.
1292 last_motion_mode_allowed = motion_mode_allowed(
1293 xd->global_motion, xd, mbmi, features->allow_warped_motion);
1294 }
1295
1296 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1297 // Collect projection samples used in least squares approximation of
1298 // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1299 if (warp_sample_info->num < 0) {
1300 warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1301 }
1302 mbmi->num_proj_ref = warp_sample_info->num;
1303 }
1304 const int total_samples = mbmi->num_proj_ref;
1305 if (total_samples == 0) {
1306 // Do not search WARPED_CAUSAL if there are no samples to use to determine
1307 // warped parameters.
1308 last_motion_mode_allowed = OBMC_CAUSAL;
1309 }
1310
1311 const MB_MODE_INFO base_mbmi = *mbmi;
1312 MB_MODE_INFO best_mbmi;
1313 const int interp_filter = features->interp_filter;
1314 const int switchable_rate =
1315 av1_is_interp_needed(xd)
1316 ? av1_get_switchable_rate(x, xd, interp_filter,
1317 cm->seq_params->enable_dual_filter)
1318 : 0;
1319 int64_t best_rd = INT64_MAX;
1320 int best_rate_mv = rate_mv0;
1321 const int mi_row = xd->mi_row;
1322 const int mi_col = xd->mi_col;
1323 int mode_index_start, mode_index_end;
1324 // Modify the start and end index according to speed features. For example,
1325 // if SIMPLE_TRANSLATION has already been searched according to
1326 // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1327 // to avoid searching it again.
1328 update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1329 last_motion_mode_allowed, interintra_allowed,
1330 eval_motion_mode);
1331 // Main function loop. This loops over all of the possible motion modes and
1332 // computes RD to determine the best one. This process includes computing
1333 // any necessary side information for the motion mode and performing the
1334 // transform search.
1335 for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1336 mode_index++) {
1337 if (args->skip_motion_mode && mode_index) continue;
1338 int tmp_rate2 = rate2_nocoeff;
1339 const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1340 int tmp_rate_mv = rate_mv0;
1341
1342 *mbmi = base_mbmi;
1343 if (is_interintra_mode) {
1344 // Only use SIMPLE_TRANSLATION for interintra
1345 mbmi->motion_mode = SIMPLE_TRANSLATION;
1346 } else {
1347 mbmi->motion_mode = (MOTION_MODE)mode_index;
1348 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1349 }
1350
1351 // Do not search OBMC if the probability of selecting it is below a
1352 // predetermined threshold for this update_type and block size.
1353 const FRAME_UPDATE_TYPE update_type =
1354 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1355 int use_actual_frame_probs = 1;
1356 int prune_obmc;
1357 #if CONFIG_FPMT_TEST
1358 use_actual_frame_probs =
1359 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1360 if (!use_actual_frame_probs) {
1361 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1362 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1363 }
1364 #endif
1365 if (use_actual_frame_probs) {
1366 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1367 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1368 }
1369 if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1370 mbmi->motion_mode == OBMC_CAUSAL)
1371 continue;
1372
1373 if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1374 // SIMPLE_TRANSLATION mode: no need to recalculate.
1375 // The prediction is calculated before motion_mode_rd() is called in
1376 // handle_inter_mode()
1377 } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1378 const uint32_t cur_mv = mbmi->mv[0].as_int;
1379 // OBMC_CAUSAL not allowed for compound prediction
1380 assert(!is_comp_pred);
1381 if (have_newmv_in_inter_mode(this_mode)) {
1382 av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1383 &mbmi->mv[0], NULL);
1384 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1385 }
1386 if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1387 // Build the predictor according to the current motion vector if it has
1388 // not already been built
1389 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1390 0, av1_num_planes(cm) - 1);
1391 }
1392 // Build the inter predictor by blending the predictor corresponding to
1393 // this MV, and the neighboring blocks using the OBMC model
1394 av1_build_obmc_inter_prediction(
1395 cm, xd, args->above_pred_buf, args->above_pred_stride,
1396 args->left_pred_buf, args->left_pred_stride);
1397 #if !CONFIG_REALTIME_ONLY
1398 } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1399 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1400 mbmi->motion_mode = WARPED_CAUSAL;
1401 mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1402 mbmi->interp_filters =
1403 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1404
1405 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1406 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1407 // Select the samples according to motion vector difference
1408 if (mbmi->num_proj_ref > 1) {
1409 mbmi->num_proj_ref = av1_selectSamples(
1410 &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1411 }
1412
1413 // Compute the warped motion parameters with a least squares fit
1414 // using the collected samples
1415 if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1416 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1417 &mbmi->wm_params, mi_row, mi_col)) {
1418 assert(!is_comp_pred);
1419 if (have_newmv_in_inter_mode(this_mode)) {
1420 // Refine MV for NEWMV mode
1421 const int_mv mv0 = mbmi->mv[0];
1422 const WarpedMotionParams wm_params0 = mbmi->wm_params;
1423 const int num_proj_ref0 = mbmi->num_proj_ref;
1424
1425 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1426 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1427 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1428 &ref_mv.as_mv, NULL);
1429
1430 // Refine MV in a small range.
1431 av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1432 total_samples);
1433
1434 if (mv0.as_int != mbmi->mv[0].as_int) {
1435 // Keep the refined MV and WM parameters.
1436 tmp_rate_mv = av1_mv_bit_cost(
1437 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1438 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1439 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1440 } else {
1441 // Restore the old MV and WM parameters.
1442 mbmi->mv[0] = mv0;
1443 mbmi->wm_params = wm_params0;
1444 mbmi->num_proj_ref = num_proj_ref0;
1445 }
1446 }
1447
1448 // Build the warped predictor
1449 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1450 av1_num_planes(cm) - 1);
1451 } else {
1452 continue;
1453 }
1454 #endif // !CONFIG_REALTIME_ONLY
1455 } else if (is_interintra_mode) {
1456 const int ret =
1457 av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1458 &tmp_rate_mv, &tmp_rate2, orig_dst);
1459 if (ret < 0) continue;
1460 }
1461
1462 // If we are searching newmv and the mv is the same as refmv, skip the
1463 // current mode
1464 if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1465
1466 // Update rd_stats for the current motion mode
1467 txfm_info->skip_txfm = 0;
1468 rd_stats->dist = 0;
1469 rd_stats->sse = 0;
1470 rd_stats->skip_txfm = 1;
1471 rd_stats->rate = tmp_rate2;
1472 const ModeCosts *mode_costs = &x->mode_costs;
1473 if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1474 if (interintra_allowed) {
1475 rd_stats->rate +=
1476 mode_costs->interintra_cost[size_group_lookup[bsize]]
1477 [mbmi->ref_frame[1] == INTRA_FRAME];
1478 }
1479 if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1480 (mbmi->ref_frame[1] != INTRA_FRAME)) {
1481 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1482 rd_stats->rate +=
1483 mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1484 } else {
1485 rd_stats->rate +=
1486 mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1487 }
1488 }
1489
1490 int64_t this_yrd = INT64_MAX;
1491
1492 if (!do_tx_search) {
1493 // Avoid doing a transform search here to speed up the overall mode
1494 // search. It will be done later in the mode search if the current
1495 // motion mode seems promising.
1496 int64_t curr_sse = -1;
1497 int64_t sse_y = -1;
1498 int est_residue_cost = 0;
1499 int64_t est_dist = 0;
1500 int64_t est_rd = 0;
1501 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1502 curr_sse = get_sse(cpi, x, &sse_y);
1503 const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1504 &est_residue_cost, &est_dist);
1505 (void)has_est_rd;
1506 assert(has_est_rd);
1507 } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1508 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1509 model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1510 cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1511 NULL, &curr_sse, NULL, NULL, NULL);
1512 sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1513 }
1514 est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1515 if (est_rd * 0.80 > *best_est_rd) {
1516 mbmi->ref_frame[1] = ref_frame_1;
1517 continue;
1518 }
1519 const int mode_rate = rd_stats->rate;
1520 rd_stats->rate += est_residue_cost;
1521 rd_stats->dist = est_dist;
1522 rd_stats->rdcost = est_rd;
1523 if (rd_stats->rdcost < *best_est_rd) {
1524 *best_est_rd = rd_stats->rdcost;
1525 assert(sse_y >= 0);
1526 ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
1527 ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1528 : INT64_MAX;
1529 }
1530 if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1531 if (!is_comp_pred) {
1532 assert(curr_sse >= 0);
1533 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1534 rd_stats->rdcost, rd_stats, rd_stats_y,
1535 rd_stats_uv, mbmi);
1536 }
1537 } else {
1538 assert(curr_sse >= 0);
1539 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1540 rd_stats->rdcost, rd_stats, rd_stats_y,
1541 rd_stats_uv, mbmi);
1542 }
1543 mbmi->skip_txfm = 0;
1544 } else {
1545 // Perform full transform search
1546 int64_t skip_rd = INT64_MAX;
1547 int64_t skip_rdy = INT64_MAX;
1548 if (cpi->sf.inter_sf.txfm_rd_gate_level) {
1549 // Check if the mode is good enough based on skip RD
1550 int64_t sse_y = INT64_MAX;
1551 int64_t curr_sse = get_sse(cpi, x, &sse_y);
1552 skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1553 skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1554 int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1555 cpi->sf.inter_sf.txfm_rd_gate_level, 0);
1556 if (!eval_txfm) continue;
1557 }
1558
1559 // Do transform search
1560 const int mode_rate = rd_stats->rate;
1561 if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1562 rd_stats->rate, ref_best_rd)) {
1563 if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1564 return INT64_MAX;
1565 }
1566 continue;
1567 }
1568 const int skip_ctx = av1_get_skip_txfm_context(xd);
1569 const int y_rate =
1570 rd_stats->skip_txfm
1571 ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1572 : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1573 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1574
1575 const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1576 if (curr_rd < ref_best_rd) {
1577 ref_best_rd = curr_rd;
1578 ref_skip_rd[0] = skip_rd;
1579 ref_skip_rd[1] = skip_rdy;
1580 }
1581 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1582 inter_mode_data_push(
1583 tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1584 rd_stats_y->rate + rd_stats_uv->rate +
1585 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1586 }
1587 }
1588
1589 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1590 if (is_nontrans_global_motion(xd, xd->mi[0])) {
1591 mbmi->interp_filters =
1592 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1593 }
1594 }
1595
1596 const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1597 if (mode_index == 0) {
1598 args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1599 }
1600 if (mode_index == 0 || tmp_rd < best_rd) {
1601 // Update best_rd data if this is the best motion mode so far
1602 best_mbmi = *mbmi;
1603 best_rd = tmp_rd;
1604 best_rd_stats = *rd_stats;
1605 best_rd_stats_y = *rd_stats_y;
1606 best_rate_mv = tmp_rate_mv;
1607 *yrd = this_yrd;
1608 if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1609 memcpy(best_blk_skip, txfm_info->blk_skip,
1610 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1611 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1612 best_xskip_txfm = mbmi->skip_txfm;
1613 }
1614 }
1615 // Update RD and mbmi stats for selected motion mode
1616 mbmi->ref_frame[1] = ref_frame_1;
1617 *rate_mv = best_rate_mv;
1618 if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1619 av1_invalid_rd_stats(rd_stats);
1620 restore_dst_buf(xd, *orig_dst, num_planes);
1621 return INT64_MAX;
1622 }
1623 *mbmi = best_mbmi;
1624 *rd_stats = best_rd_stats;
1625 *rd_stats_y = best_rd_stats_y;
1626 if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1627 memcpy(txfm_info->blk_skip, best_blk_skip,
1628 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1629 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1630 txfm_info->skip_txfm = best_xskip_txfm;
1631
1632 restore_dst_buf(xd, *orig_dst, num_planes);
1633 return 0;
1634 }
1635
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst)1636 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1637 MACROBLOCK *const x, BLOCK_SIZE bsize,
1638 const BUFFER_SET *const orig_dst) {
1639 assert(bsize < BLOCK_SIZES_ALL);
1640 const AV1_COMMON *cm = &cpi->common;
1641 const int num_planes = av1_num_planes(cm);
1642 MACROBLOCKD *const xd = &x->e_mbd;
1643 const int mi_row = xd->mi_row;
1644 const int mi_col = xd->mi_col;
1645 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 0,
1646 av1_num_planes(cm) - 1);
1647
1648 int64_t total_sse = 0;
1649 for (int plane = 0; plane < num_planes; ++plane) {
1650 const struct macroblock_plane *const p = &x->plane[plane];
1651 const struct macroblockd_plane *const pd = &xd->plane[plane];
1652 const BLOCK_SIZE plane_bsize =
1653 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1654 const int bw = block_size_wide[plane_bsize];
1655 const int bh = block_size_high[plane_bsize];
1656
1657 av1_subtract_plane(x, plane_bsize, plane);
1658 int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh) << 4;
1659 sse >>= ((cpi->frame_info.bit_depth - 8) * 2);
1660 total_sse += sse;
1661 }
1662 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1663 rd_stats->dist = rd_stats->sse = total_sse;
1664 rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1665 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1666
1667 restore_dst_buf(xd, *orig_dst, num_planes);
1668 return 0;
1669 }
1670
1671 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1672 // mode
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1673 static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1674 int ref_idx,
1675 const MV_REFERENCE_FRAME *ref_frame,
1676 PREDICTION_MODE single_mode) {
1677 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1678 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1679 assert(single_mode != NEWMV);
1680 if (single_mode == NEARESTMV) {
1681 return 0;
1682 } else if (single_mode == NEARMV) {
1683 // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1684 // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1685 if (ref_mv_count < 2) return 1;
1686 } else if (single_mode == GLOBALMV) {
1687 // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1688 if (ref_mv_count == 0) return 1;
1689 // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1690 else if (ref_mv_count == 1)
1691 return 0;
1692
1693 int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1694 // Check GLOBALMV is matching with any mv in ref_mv_stack
1695 for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1696 int_mv this_mv;
1697
1698 if (ref_idx == 0)
1699 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1700 else
1701 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1702
1703 if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1704 return 1;
1705 }
1706 }
1707 return 0;
1708 }
1709
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1710 static INLINE int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1711 int ref_idx, int ref_mv_idx,
1712 int skip_repeated_ref_mv,
1713 const MV_REFERENCE_FRAME *ref_frame,
1714 const MB_MODE_INFO_EXT *mbmi_ext) {
1715 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1716 assert(is_inter_singleref_mode(single_mode));
1717 if (single_mode == NEWMV) {
1718 this_mv->as_int = INVALID_MV;
1719 } else if (single_mode == GLOBALMV) {
1720 if (skip_repeated_ref_mv &&
1721 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1722 return 0;
1723 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1724 } else {
1725 assert(single_mode == NEARMV || single_mode == NEARESTMV);
1726 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1727 const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1728 if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1729 assert(ref_mv_offset >= 0);
1730 if (ref_idx == 0) {
1731 *this_mv =
1732 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1733 } else {
1734 *this_mv =
1735 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1736 }
1737 } else {
1738 if (skip_repeated_ref_mv &&
1739 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1740 return 0;
1741 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1742 }
1743 }
1744 return 1;
1745 }
1746
1747 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1748 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type)1749 static INLINE int skip_nearest_near_mv_using_refmv_weight(
1750 const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1751 const int8_t ref_frame_type) {
1752 if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1753
1754 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1755 const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1756 const int ref_mv_count =
1757 AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1758
1759 if (ref_mv_count == 0) return 0;
1760 // If ref mv list has at least one nearest candidate do not prune NEARESTMV
1761 if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1762
1763 // Count number of ref mvs populated from nearest candidates
1764 int nearest_refmv_count = 0;
1765 for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1766 if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1767 }
1768
1769 // nearest_refmv_count indicates the closeness of block motion characteristics
1770 // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1771 // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1772 // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1773 // mode since these modes work well for blocks that shares similar motion
1774 // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1775 // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1776 // mode is pruned if none of the ref mvs are populated from nearest candidate.
1777 const int prune_thresh = 1 + (ref_mv_count >= 2);
1778 if (nearest_refmv_count < prune_thresh) return 1;
1779 return 0;
1780 }
1781
1782 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1783 static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1784 const AV1_COMMON *cm, const MACROBLOCK *x,
1785 int skip_repeated_ref_mv) {
1786 const MACROBLOCKD *xd = &x->e_mbd;
1787 const MB_MODE_INFO *mbmi = xd->mi[0];
1788 const int is_comp_pred = has_second_ref(mbmi);
1789
1790 int ret = 1;
1791 for (int i = 0; i < is_comp_pred + 1; ++i) {
1792 int_mv this_mv;
1793 this_mv.as_int = INVALID_MV;
1794 ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1795 skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1796 if (!ret) return 0;
1797 const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1798 if (single_mode == NEWMV) {
1799 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1800 cur_mv[i] =
1801 (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1802 .this_mv
1803 : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1804 .comp_mv;
1805 } else {
1806 ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1807 }
1808 }
1809 return ret;
1810 }
1811
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1812 static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
1813 const MB_MODE_INFO_EXT *mbmi_ext,
1814 const int (*const drl_mode_cost0)[2],
1815 int8_t ref_frame_type) {
1816 int cost = 0;
1817 if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1818 for (int idx = 0; idx < 2; ++idx) {
1819 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1820 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1821 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1822 if (mbmi->ref_mv_idx == idx) return cost;
1823 }
1824 }
1825 return cost;
1826 }
1827
1828 if (have_nearmv_in_inter_mode(mbmi->mode)) {
1829 for (int idx = 1; idx < 3; ++idx) {
1830 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1831 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1832 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1833 if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1834 }
1835 }
1836 return cost;
1837 }
1838 return cost;
1839 }
1840
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1841 static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
1842 const MB_MODE_INFO *const mbmi,
1843 PREDICTION_MODE this_mode) {
1844 for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1845 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1846 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1847 if (single_mode == NEWMV &&
1848 args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1849 return 0;
1850 }
1851 }
1852 return 1;
1853 }
1854
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1855 static int get_drl_refmv_count(const MACROBLOCK *const x,
1856 const MV_REFERENCE_FRAME *ref_frame,
1857 PREDICTION_MODE mode) {
1858 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1859 const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1860 const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1861 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1862 const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1863 const int has_drl =
1864 (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1865 const int ref_set =
1866 has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1867
1868 return ref_set;
1869 }
1870
1871 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1872 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1873 const int qindex,
1874 const int ref_mv_idx) {
1875 if (reduce_inter_modes >= 3) return 1;
1876 // Q-index logic based pruning is enabled only for
1877 // reduce_inter_modes = 2.
1878 assert(reduce_inter_modes == 2);
1879 // When reduce_inter_modes=2, pruning happens as below based on q index.
1880 // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1881 // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1882 // For q index range between 171 and 255: no pruning.
1883 const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1884 return (ref_mv_idx >= min_prune_ref_mv_idx);
1885 }
1886
1887 // Whether this reference motion vector can be skipped, based on initial
1888 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1889 static bool ref_mv_idx_early_breakout(
1890 const SPEED_FEATURES *const sf,
1891 const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1892 const HandleInterModeArgs *const args, int64_t ref_best_rd,
1893 int ref_mv_idx) {
1894 MACROBLOCKD *xd = &x->e_mbd;
1895 MB_MODE_INFO *mbmi = xd->mi[0];
1896 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1897 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1898 const int is_comp_pred = has_second_ref(mbmi);
1899 if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1900 if (mbmi->ref_frame[0] == LAST2_FRAME ||
1901 mbmi->ref_frame[0] == LAST3_FRAME ||
1902 mbmi->ref_frame[1] == LAST2_FRAME ||
1903 mbmi->ref_frame[1] == LAST3_FRAME) {
1904 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1905 if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1906 REF_CAT_LEVEL) {
1907 return true;
1908 }
1909 }
1910 // TODO(any): Experiment with reduce_inter_modes for compound prediction
1911 if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1912 have_newmv_in_inter_mode(mbmi->mode)) {
1913 if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1914 mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1915 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1916 const int do_prune = prune_ref_mv_idx_using_qindex(
1917 sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1918 if (do_prune &&
1919 (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1920 REF_CAT_LEVEL)) {
1921 return true;
1922 }
1923 }
1924 }
1925 }
1926
1927 mbmi->ref_mv_idx = ref_mv_idx;
1928 if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1929 return true;
1930 }
1931 size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1932 const int drl_cost = get_drl_cost(
1933 mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1934 est_rd_rate += drl_cost;
1935 if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1936 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1937 return true;
1938 }
1939 return false;
1940 }
1941
1942 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1943 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1944 RD_STATS *rd_stats,
1945 HandleInterModeArgs *args,
1946 int ref_mv_idx, int64_t ref_best_rd,
1947 BLOCK_SIZE bsize) {
1948 MACROBLOCKD *xd = &x->e_mbd;
1949 MB_MODE_INFO *mbmi = xd->mi[0];
1950 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1951 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1952 const AV1_COMMON *cm = &cpi->common;
1953 const int is_comp_pred = has_second_ref(mbmi);
1954 const ModeCosts *mode_costs = &x->mode_costs;
1955
1956 struct macroblockd_plane *p = xd->plane;
1957 const BUFFER_SET orig_dst = {
1958 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1959 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1960 };
1961 av1_init_rd_stats(rd_stats);
1962
1963 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1964 mbmi->comp_group_idx = 0;
1965 mbmi->compound_idx = 1;
1966 if (mbmi->ref_frame[1] == INTRA_FRAME) {
1967 mbmi->ref_frame[1] = NONE_FRAME;
1968 }
1969 int16_t mode_ctx =
1970 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1971
1972 mbmi->num_proj_ref = 0;
1973 mbmi->motion_mode = SIMPLE_TRANSLATION;
1974 mbmi->ref_mv_idx = ref_mv_idx;
1975
1976 rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1977 const int drl_cost =
1978 get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
1979 rd_stats->rate += drl_cost;
1980
1981 int_mv cur_mv[2];
1982 if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
1983 return INT64_MAX;
1984 }
1985 assert(have_nearmv_in_inter_mode(mbmi->mode));
1986 for (int i = 0; i < is_comp_pred + 1; ++i) {
1987 mbmi->mv[i].as_int = cur_mv[i].as_int;
1988 }
1989 const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
1990 rd_stats->rate += ref_mv_cost;
1991
1992 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
1993 return INT64_MAX;
1994 }
1995
1996 mbmi->motion_mode = SIMPLE_TRANSLATION;
1997 mbmi->num_proj_ref = 0;
1998 if (is_comp_pred) {
1999 // Only compound_average
2000 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2001 mbmi->comp_group_idx = 0;
2002 mbmi->compound_idx = 1;
2003 }
2004 set_default_interp_filters(mbmi, cm->features.interp_filter);
2005
2006 const int mi_row = xd->mi_row;
2007 const int mi_col = xd->mi_col;
2008 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2009 AOM_PLANE_Y, AOM_PLANE_Y);
2010 int est_rate;
2011 int64_t est_dist;
2012 model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2013 NULL, NULL, NULL, NULL, NULL);
2014 return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2015 }
2016
2017 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2018 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2019 // it is included.
mask_set_bit(int * mask,int index)2020 static INLINE void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2021
mask_check_bit(int mask,int index)2022 static INLINE bool mask_check_bit(int mask, int index) {
2023 return (mask >> index) & 0x1;
2024 }
2025
2026 // Before performing the full MV search in handle_inter_mode, do a simple
2027 // translation search and see if we can eliminate any motion vectors.
2028 // Returns an integer where, if the i-th bit is set, it means that the i-th
2029 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2030 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2031 RD_STATS *rd_stats,
2032 HandleInterModeArgs *const args,
2033 int64_t ref_best_rd, BLOCK_SIZE bsize,
2034 const int ref_set) {
2035 AV1_COMMON *const cm = &cpi->common;
2036 const MACROBLOCKD *const xd = &x->e_mbd;
2037 const MB_MODE_INFO *const mbmi = xd->mi[0];
2038 const PREDICTION_MODE this_mode = mbmi->mode;
2039
2040 // Only search indices if they have some chance of being good.
2041 int good_indices = 0;
2042 for (int i = 0; i < ref_set; ++i) {
2043 if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2044 ref_best_rd, i)) {
2045 continue;
2046 }
2047 mask_set_bit(&good_indices, i);
2048 }
2049
2050 // Only prune in NEARMV mode, if the speed feature is set, and the block size
2051 // is large enough. If these conditions are not met, return all good indices
2052 // found so far.
2053 if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2054 return good_indices;
2055 if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2056 if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2057 // Do not prune when there is internal resizing. TODO(elliottk) fix this
2058 // so b/2384 can be resolved.
2059 if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2060 (mbmi->ref_frame[1] > 0 &&
2061 av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2062 return good_indices;
2063 }
2064
2065 // Calculate the RD cost for the motion vectors using simple translation.
2066 int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2067 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2068 // If this index is bad, ignore it.
2069 if (!mask_check_bit(good_indices, ref_mv_idx)) {
2070 continue;
2071 }
2072 idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2073 cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2074 }
2075 // Find the index with the best RD cost.
2076 int best_idx = 0;
2077 for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2078 if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2079 best_idx = i;
2080 }
2081 }
2082 // Only include indices that are good and within a % of the best.
2083 const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2084 // If the simple translation cost is not within this multiple of the
2085 // best RD, skip it. Note that the cutoff is derived experimentally.
2086 const double ref_dth = 5;
2087 int result = 0;
2088 for (int i = 0; i < ref_set; ++i) {
2089 if (mask_check_bit(good_indices, i) &&
2090 (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2091 (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2092 mask_set_bit(&result, i);
2093 }
2094 }
2095 return result;
2096 }
2097
2098 /*!\brief Motion mode information for inter mode search speedup.
2099 *
2100 * Used in a speed feature to search motion modes other than
2101 * SIMPLE_TRANSLATION only on winning candidates.
2102 */
2103 typedef struct motion_mode_candidate {
2104 /*!
2105 * Mode info for the motion mode candidate.
2106 */
2107 MB_MODE_INFO mbmi;
2108 /*!
2109 * Rate describing the cost of the motion vectors for this candidate.
2110 */
2111 int rate_mv;
2112 /*!
2113 * Rate before motion mode search and transform coding is applied.
2114 */
2115 int rate2_nocoeff;
2116 /*!
2117 * An integer value 0 or 1 which indicates whether or not to skip the motion
2118 * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2119 * candidate.
2120 */
2121 int skip_motion_mode;
2122 /*!
2123 * Total RD cost for this candidate.
2124 */
2125 int64_t rd_cost;
2126 } motion_mode_candidate;
2127
2128 /*!\cond */
2129 typedef struct motion_mode_best_st_candidate {
2130 motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2131 int num_motion_mode_cand;
2132 } motion_mode_best_st_candidate;
2133
2134 // Checks if the current reference frame matches with neighbouring block's
2135 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2136 static AOM_INLINE int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2137 MB_MODE_INFO *nb_mbmi) {
2138 MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2139 nb_mbmi->ref_frame[1] };
2140 MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2141 cur_mbmi->ref_frame[1] };
2142 const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2143 int match_found = 0;
2144
2145 for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2146 if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2147 (cur_ref_frames[i] == nb_ref_frames[1]))
2148 match_found = 1;
2149 }
2150 return match_found;
2151 }
2152
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2153 static AOM_INLINE int find_ref_match_in_above_nbs(const int total_mi_cols,
2154 MACROBLOCKD *xd) {
2155 if (!xd->up_available) return 1;
2156 const int mi_col = xd->mi_col;
2157 MB_MODE_INFO **cur_mbmi = xd->mi;
2158 // prev_row_mi points into the mi array, starting at the beginning of the
2159 // previous row.
2160 MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2161 const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2162 uint8_t mi_step;
2163 for (int above_mi_col = mi_col; above_mi_col < end_col;
2164 above_mi_col += mi_step) {
2165 MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2166 mi_step = mi_size_wide[above_mi[0]->bsize];
2167 int match_found = 0;
2168 if (is_inter_block(*above_mi))
2169 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2170 if (match_found) return 1;
2171 }
2172 return 0;
2173 }
2174
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2175 static AOM_INLINE int find_ref_match_in_left_nbs(const int total_mi_rows,
2176 MACROBLOCKD *xd) {
2177 if (!xd->left_available) return 1;
2178 const int mi_row = xd->mi_row;
2179 MB_MODE_INFO **cur_mbmi = xd->mi;
2180 // prev_col_mi points into the mi array, starting at the top of the
2181 // previous column
2182 MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2183 const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2184 uint8_t mi_step;
2185 for (int left_mi_row = mi_row; left_mi_row < end_row;
2186 left_mi_row += mi_step) {
2187 MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2188 mi_step = mi_size_high[left_mi[0]->bsize];
2189 int match_found = 0;
2190 if (is_inter_block(*left_mi))
2191 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2192 if (match_found) return 1;
2193 }
2194 return 0;
2195 }
2196 /*!\endcond */
2197
2198 /*! \brief Struct used to hold TPL data to
2199 * narrow down parts of the inter mode search.
2200 */
2201 typedef struct {
2202 /*!
2203 * The best inter cost out of all of the reference frames.
2204 */
2205 int64_t best_inter_cost;
2206 /*!
2207 * The inter cost for each reference frame.
2208 */
2209 int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2210 } PruneInfoFromTpl;
2211
2212 #if !CONFIG_REALTIME_ONLY
2213 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2214 static AOM_INLINE void get_block_level_tpl_stats(
2215 AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2216 PruneInfoFromTpl *inter_cost_info_from_tpl) {
2217 AV1_COMMON *const cm = &cpi->common;
2218
2219 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2220 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2221 const int tpl_idx = cpi->gf_frame_index;
2222 TplParams *const tpl_data = &cpi->ppi->tpl_data;
2223 if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2224 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2225 const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2226 const int mi_wide = mi_size_wide[bsize];
2227 const int mi_high = mi_size_high[bsize];
2228 const int tpl_stride = tpl_frame->stride;
2229 const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2230 const int mi_col_sr =
2231 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2232 const int mi_col_end_sr =
2233 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2234 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2235
2236 const int row_step = step;
2237 const int col_step_sr =
2238 coded_to_superres_mi(step, cm->superres_scale_denominator);
2239 for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2240 row += row_step) {
2241 for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2242 col += col_step_sr) {
2243 const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2244 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2245
2246 // Sums up the inter cost of corresponding ref frames
2247 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2248 inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2249 this_stats->pred_error[ref_idx];
2250 }
2251 }
2252 }
2253
2254 // Computes the best inter cost (minimum inter_cost)
2255 int64_t best_inter_cost = INT64_MAX;
2256 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2257 const int64_t cur_inter_cost =
2258 inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2259 // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2260 // calculating the minimum inter_cost
2261 if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2262 valid_refs[ref_idx])
2263 best_inter_cost = cur_inter_cost;
2264 }
2265 inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2266 }
2267 #endif
2268
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2269 static AOM_INLINE int prune_modes_based_on_tpl_stats(
2270 PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2271 const PREDICTION_MODE this_mode, int prune_mode_level) {
2272 const int have_newmv = have_newmv_in_inter_mode(this_mode);
2273 if ((prune_mode_level < 2) && have_newmv) return 0;
2274
2275 const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2276 if (best_inter_cost == INT64_MAX) return 0;
2277
2278 const int prune_level = prune_mode_level - 1;
2279 int64_t cur_inter_cost;
2280
2281 const int is_globalmv =
2282 (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2283 const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2284
2285 // Thresholds used for pruning:
2286 // Lower value indicates aggressive pruning and higher value indicates
2287 // conservative pruning which is set based on ref_mv_idx and speed feature.
2288 // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2289 // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2290 static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2291 { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2292 };
2293
2294 const int is_comp_pred = (refs[1] > INTRA_FRAME);
2295 if (!is_comp_pred) {
2296 cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2297 } else {
2298 const int64_t inter_cost_ref0 =
2299 inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2300 const int64_t inter_cost_ref1 =
2301 inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2302 // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2303 // more aggressive pruning
2304 cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2305 }
2306
2307 // Prune the mode if cur_inter_cost is greater than threshold times
2308 // best_inter_cost
2309 if (cur_inter_cost >
2310 ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2311 best_inter_cost) >>
2312 2))
2313 return 1;
2314 return 0;
2315 }
2316
2317 /*!\brief High level function to select parameters for compound mode.
2318 *
2319 * \ingroup inter_mode_search
2320 * The main search functionality is done in the call to av1_compound_type_rd().
2321 *
2322 * \param[in] cpi Top-level encoder structure.
2323 * \param[in] x Pointer to struct holding all the data for
2324 * the current macroblock.
2325 * \param[in] args HandleInterModeArgs struct holding
2326 * miscellaneous arguments for inter mode
2327 * search. See the documentation for this
2328 * struct for a description of each member.
2329 * \param[in] ref_best_rd Best RD found so far for this block.
2330 * It is used for early termination of this
2331 * search if the RD exceeds this value.
2332 * \param[in,out] cur_mv Current motion vector.
2333 * \param[in] bsize Current block size.
2334 * \param[in,out] compmode_interinter_cost RD of the selected interinter
2335 compound mode.
2336 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2337 * allocated buffers for the compound
2338 * predictors and masks in the compound type
2339 * search.
2340 * \param[in,out] orig_dst A prediction buffer to hold a computed
2341 * prediction. This will eventually hold the
2342 * final prediction, and the tmp_dst info will
2343 * be copied here.
2344 * \param[in] tmp_dst A temporary prediction buffer to hold a
2345 * computed prediction.
2346 * \param[in,out] rate_mv The rate associated with the motion vectors.
2347 * This will be modified if a motion search is
2348 * done in the motion mode search.
2349 * \param[in,out] rd_stats Struct to keep track of the overall RD
2350 * information.
2351 * \param[in,out] skip_rd An array of length 2 where skip_rd[0] is the
2352 * best total RD for a skip mode so far, and
2353 * skip_rd[1] is the best RD for a skip mode so
2354 * far in luma. This is used as a speed feature
2355 * to skip the transform search if the computed
2356 * skip RD for the current mode is not better
2357 * than the best skip_rd so far.
2358 * \param[in,out] skip_build_pred Indicates whether or not to build the inter
2359 * predictor. If this is 0, the inter predictor
2360 * has already been built and thus we can avoid
2361 * repeating computation.
2362 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2363 * a viable candidate.
2364 */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2365 static int process_compound_inter_mode(
2366 AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2367 int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2368 int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2369 const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2370 RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2371 MACROBLOCKD *xd = &x->e_mbd;
2372 MB_MODE_INFO *mbmi = xd->mi[0];
2373 const AV1_COMMON *cm = &cpi->common;
2374 const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2375 cm->seq_params->enable_masked_compound;
2376 int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2377 (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2378
2379 const int num_planes = av1_num_planes(cm);
2380 const int mi_row = xd->mi_row;
2381 const int mi_col = xd->mi_col;
2382 int is_luma_interp_done = 0;
2383 set_default_interp_filters(mbmi, cm->features.interp_filter);
2384
2385 int64_t best_rd_compound;
2386 int64_t rd_thresh;
2387 const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2388 const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2389 rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2390 comp_type_rd_scale);
2391 // Select compound type and any parameters related to that type
2392 // (for example, the mask parameters if it is a masked mode) and compute
2393 // the RD
2394 *compmode_interinter_cost = av1_compound_type_rd(
2395 cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2396 orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2397 ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2398 if (ref_best_rd < INT64_MAX &&
2399 (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2400 ref_best_rd) {
2401 restore_dst_buf(xd, *orig_dst, num_planes);
2402 return 1;
2403 }
2404
2405 // Build only uv predictor for COMPOUND_AVERAGE.
2406 // Note there is no need to call av1_enc_build_inter_predictor
2407 // for luma if COMPOUND_AVERAGE is selected because it is the first
2408 // candidate in av1_compound_type_rd, which means it used the dst_buf
2409 // rather than the tmp_buf.
2410 if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2411 if (num_planes > 1) {
2412 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2413 AOM_PLANE_U, num_planes - 1);
2414 }
2415 *skip_build_pred = 1;
2416 }
2417 return 0;
2418 }
2419
2420 // Speed feature to prune out MVs that are similar to previous MVs if they
2421 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2422 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2423 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2424 MB_MODE_INFO *mbmi, int pruning_factor) {
2425 int i;
2426 const int is_comp_pred = has_second_ref(mbmi);
2427 const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2428
2429 // Skip the evaluation if an MV match is found.
2430 if (ref_mv_idx > 0) {
2431 for (int idx = 0; idx < ref_mv_idx; ++idx) {
2432 if (save_mv[idx][0].as_int == INVALID_MV) continue;
2433
2434 int mv_diff = 0;
2435 for (i = 0; i < 1 + is_comp_pred; ++i) {
2436 mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2437 abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2438 }
2439
2440 // If this mode is not the best one, and current MV is similar to
2441 // previous stored MV, terminate this ref_mv_idx evaluation.
2442 if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2443 }
2444 }
2445
2446 if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2447 for (i = 0; i < is_comp_pred + 1; ++i)
2448 save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2449 }
2450
2451 return 0;
2452 }
2453
2454 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2455 *
2456 * \ingroup inter_mode_search
2457 *
2458 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2459 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2460 * Else returns 0.
2461 *
2462 * Note that the sse of here comes from single_motion_search. So it is
2463 * interpolated with the filter in motion search, not the actual interpolation
2464 * filter used in encoding.
2465 *
2466 * \param[in] fn_ptr A table of function pointers to compute SSE.
2467 * \param[in] x Pointer to struct holding all the data for
2468 * the current macroblock.
2469 * \param[in] bsize The current block_size.
2470 * \param[in] args The args to handle_inter_mode, used to track
2471 * the best SSE.
2472 * \param[in] prune_zero_mv_with_sse The argument holds speed feature
2473 * prune_zero_mv_with_sse value
2474 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2475 */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args,int prune_zero_mv_with_sse)2476 static AOM_INLINE int prune_zero_mv_with_sse(
2477 const aom_variance_fn_ptr_t *fn_ptr, const MACROBLOCK *x, BLOCK_SIZE bsize,
2478 const HandleInterModeArgs *args, int prune_zero_mv_with_sse) {
2479 const MACROBLOCKD *xd = &x->e_mbd;
2480 const MB_MODE_INFO *mbmi = xd->mi[0];
2481
2482 const int is_comp_pred = has_second_ref(mbmi);
2483 const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2484
2485 // Check that the global mv is the same as ZEROMV
2486 assert(mbmi->mv[0].as_int == 0);
2487 assert(IMPLIES(is_comp_pred, mbmi->mv[0].as_int == 0));
2488 assert(xd->global_motion[refs[0]].wmtype == TRANSLATION ||
2489 xd->global_motion[refs[0]].wmtype == IDENTITY);
2490
2491 // Don't prune if we have invalid data
2492 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2493 assert(mbmi->mv[0].as_int == 0);
2494 if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2495 return 0;
2496 }
2497 }
2498
2499 // Sum up the sse of ZEROMV and best NEWMV
2500 unsigned int this_sse_sum = 0;
2501 unsigned int best_sse_sum = 0;
2502 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2503 const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2504 const struct macroblockd_plane *pd = xd->plane;
2505 const struct buf_2d *src_buf = &p->src;
2506 const struct buf_2d *ref_buf = &pd->pre[idx];
2507 const uint8_t *src = src_buf->buf;
2508 const uint8_t *ref = ref_buf->buf;
2509 const int src_stride = src_buf->stride;
2510 const int ref_stride = ref_buf->stride;
2511
2512 unsigned int this_sse;
2513 fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2514 this_sse_sum += this_sse;
2515
2516 const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2517 best_sse_sum += best_sse;
2518 }
2519
2520 const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2521 if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2522 return 1;
2523 }
2524
2525 return 0;
2526 }
2527
2528 /*!\brief Searches for interpolation filter in realtime mode during winner eval
2529 *
2530 * \ingroup inter_mode_search
2531 *
2532 * Does a simple interpolation filter search during winner mode evaluation. This
2533 * is currently only used by realtime mode as \ref
2534 * av1_interpolation_filter_search is not called during realtime encoding.
2535 *
2536 * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2537 * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2538 * higher res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2539 * *
2540 * \param[in] cpi Pointer to the compressor. Used for feature
2541 * flags.
2542 * \param[in,out] x Pointer to macroblock. This is primarily
2543 * used to access the buffers.
2544 * \param[in] mi_row The current row in mi unit (4X4 pixels).
2545 * \param[in] mi_col The current col in mi unit (4X4 pixels).
2546 * \param[in] bsize The current block_size.
2547 * \return Returns true if a predictor is built in xd->dst, false otherwise.
2548 */
fast_interp_search(const AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize)2549 static AOM_INLINE bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2550 int mi_row, int mi_col,
2551 BLOCK_SIZE bsize) {
2552 static const InterpFilters filters_ref_set[3] = {
2553 { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2554 { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2555 { MULTITAP_SHARP, MULTITAP_SHARP }
2556 };
2557
2558 const AV1_COMMON *const cm = &cpi->common;
2559 MACROBLOCKD *const xd = &x->e_mbd;
2560 MB_MODE_INFO *const mi = xd->mi[0];
2561 int64_t best_cost = INT64_MAX;
2562 int best_filter_index = -1;
2563 // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2564 const int num_planes = av1_num_planes(cm);
2565 const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2566 assert(is_inter_mode(mi->mode));
2567 assert(mi->motion_mode == SIMPLE_TRANSLATION);
2568 assert(!is_inter_compound_mode(mi->mode));
2569
2570 if (!av1_is_interp_needed(xd)) {
2571 return false;
2572 }
2573
2574 struct macroblockd_plane *pd = xd->plane;
2575 const BUFFER_SET orig_dst = {
2576 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2577 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2578 };
2579 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2580 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2581 tmp_buf + 2 * MAX_SB_SQUARE },
2582 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2583 const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2584
2585 for (int i = 0; i < 3; ++i) {
2586 if (is_240p_or_lesser) {
2587 if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2588 continue;
2589 }
2590 } else {
2591 if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2592 continue;
2593 }
2594 }
2595 int64_t cost;
2596 RD_STATS tmp_rd = { 0 };
2597
2598 mi->interp_filters.as_filters = filters_ref_set[i];
2599 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2600
2601 model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2602 ? MODELRD_LEGACY
2603 : MODELRD_TYPE_INTERP_FILTER](
2604 cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2605 &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2606
2607 tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2608 cm->seq_params->enable_dual_filter);
2609 cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2610 if (cost < best_cost) {
2611 best_filter_index = i;
2612 best_cost = cost;
2613 swap_dst_buf(xd, dst_bufs, num_planes);
2614 }
2615 }
2616 assert(best_filter_index >= 0);
2617
2618 mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2619
2620 const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2621
2622 if (is_best_pred_in_orig) {
2623 swap_dst_buf(xd, dst_bufs, num_planes);
2624 } else {
2625 // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2626 // is_best_pred_in_orig is false, that means the current buffer is the
2627 // original one.
2628 assert(&orig_dst == dst_bufs[0]);
2629 assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2630 const int width = block_size_wide[bsize];
2631 const int height = block_size_high[bsize];
2632 #if CONFIG_AV1_HIGHBITDEPTH
2633 const bool is_hbd = is_cur_buf_hbd(xd);
2634 if (is_hbd) {
2635 aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2636 tmp_dst.stride[AOM_PLANE_Y],
2637 CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2638 orig_dst.stride[AOM_PLANE_Y], width, height);
2639 } else {
2640 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2641 orig_dst.plane[AOM_PLANE_Y],
2642 orig_dst.stride[AOM_PLANE_Y], width, height);
2643 }
2644 #else
2645 aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2646 orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2647 width, height);
2648 #endif
2649 }
2650
2651 // Build the YUV predictor.
2652 if (num_planes > 1) {
2653 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2654 AOM_PLANE_U, AOM_PLANE_V);
2655 }
2656
2657 return true;
2658 }
2659
2660 /*!\brief AV1 inter mode RD computation
2661 *
2662 * \ingroup inter_mode_search
2663 * Do the RD search for a given inter mode and compute all information relevant
2664 * to the input mode. It will compute the best MV,
2665 * compound parameters (if the mode is a compound mode) and interpolation filter
2666 * parameters.
2667 *
2668 * \param[in] cpi Top-level encoder structure.
2669 * \param[in] tile_data Pointer to struct holding adaptive
2670 * data/contexts/models for the tile during
2671 * encoding.
2672 * \param[in] x Pointer to structure holding all the data
2673 * for the current macroblock.
2674 * \param[in] bsize Current block size.
2675 * \param[in,out] rd_stats Struct to keep track of the overall RD
2676 * information.
2677 * \param[in,out] rd_stats_y Struct to keep track of the RD information
2678 * for only the Y plane.
2679 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
2680 * for only the UV planes.
2681 * \param[in] args HandleInterModeArgs struct holding
2682 * miscellaneous arguments for inter mode
2683 * search. See the documentation for this
2684 * struct for a description of each member.
2685 * \param[in] ref_best_rd Best RD found so far for this block.
2686 * It is used for early termination of this
2687 * search if the RD exceeds this value.
2688 * \param[in] tmp_buf Temporary buffer used to hold predictors
2689 * built in this search.
2690 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2691 * allocated buffers for the compound
2692 * predictors and masks in the compound type
2693 * search.
2694 * \param[in,out] best_est_rd Estimated RD for motion mode search if
2695 * do_tx_search (see below) is 0.
2696 * \param[in] do_tx_search Parameter to indicate whether or not to do
2697 * a full transform search. This will compute
2698 * an estimated RD for the modes without the
2699 * transform search and later perform the full
2700 * transform search on the best candidates.
2701 * \param[in,out] inter_modes_info InterModesInfo struct to hold inter mode
2702 * information to perform a full transform
2703 * search only on winning candidates searched
2704 * with an estimate for transform coding RD.
2705 * \param[in,out] motion_mode_cand A motion_mode_candidate struct to store
2706 * motion mode information used in a speed
2707 * feature to search motion modes other than
2708 * SIMPLE_TRANSLATION only on winning
2709 * candidates.
2710 * \param[in,out] skip_rd A length 2 array, where skip_rd[0] is the
2711 * best total RD for a skip mode so far, and
2712 * skip_rd[1] is the best RD for a skip mode so
2713 * far in luma. This is used as a speed feature
2714 * to skip the transform search if the computed
2715 * skip RD for the current mode is not better
2716 * than the best skip_rd so far.
2717 * \param[in] inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2718 * narrow down the search based on data
2719 * collected in the TPL model.
2720 * \param[out] yrd Stores the rdcost corresponding to encoding
2721 * the luma plane.
2722 *
2723 * \return The RD cost for the mode being searched.
2724 */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2725 static int64_t handle_inter_mode(
2726 AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2727 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2728 RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2729 uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2730 int64_t *best_est_rd, const int do_tx_search,
2731 InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2732 int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2733 int64_t *yrd) {
2734 const AV1_COMMON *cm = &cpi->common;
2735 const int num_planes = av1_num_planes(cm);
2736 MACROBLOCKD *xd = &x->e_mbd;
2737 MB_MODE_INFO *mbmi = xd->mi[0];
2738 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2739 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2740 const int is_comp_pred = has_second_ref(mbmi);
2741 const PREDICTION_MODE this_mode = mbmi->mode;
2742
2743 #if CONFIG_REALTIME_ONLY
2744 const int prune_modes_based_on_tpl = 0;
2745 #else // CONFIG_REALTIME_ONLY
2746 const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2747 const int prune_modes_based_on_tpl =
2748 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2749 av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2750 #endif // CONFIG_REALTIME_ONLY
2751 int i;
2752 // Reference frames for this mode
2753 const int refs[2] = { mbmi->ref_frame[0],
2754 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2755 int rate_mv = 0;
2756 int64_t rd = INT64_MAX;
2757 // Do first prediction into the destination buffer. Do the next
2758 // prediction into a temporary buffer. Then keep track of which one
2759 // of these currently holds the best predictor, and use the other
2760 // one for future predictions. In the end, copy from tmp_buf to
2761 // dst if necessary.
2762 struct macroblockd_plane *pd = xd->plane;
2763 const BUFFER_SET orig_dst = {
2764 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2765 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2766 };
2767 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2768 tmp_buf + 2 * MAX_SB_SQUARE },
2769 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2770
2771 int64_t ret_val = INT64_MAX;
2772 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2773 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2774 int64_t best_rd = INT64_MAX;
2775 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2776 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2777 int64_t best_yrd = INT64_MAX;
2778 MB_MODE_INFO best_mbmi = *mbmi;
2779 int best_xskip_txfm = 0;
2780 int64_t newmv_ret_val = INT64_MAX;
2781 inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2782
2783 // Do not prune the mode based on inter cost from tpl if the current ref frame
2784 // is the winner ref in neighbouring blocks.
2785 int ref_match_found_in_above_nb = 0;
2786 int ref_match_found_in_left_nb = 0;
2787 if (prune_modes_based_on_tpl) {
2788 ref_match_found_in_above_nb =
2789 find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2790 ref_match_found_in_left_nb =
2791 find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2792 }
2793
2794 // First, perform a simple translation search for each of the indices. If
2795 // an index performs well, it will be fully searched in the main loop
2796 // of this function.
2797 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2798 // Save MV results from first 2 ref_mv_idx.
2799 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2800 int best_ref_mv_idx = -1;
2801 const int idx_mask =
2802 ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2803 const int16_t mode_ctx =
2804 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2805 const ModeCosts *mode_costs = &x->mode_costs;
2806 const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2807 const int base_rate =
2808 args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2809
2810 // As per the experiments, in real-time preset impact of model rd based
2811 // breakouts is less on encoding time if the following conditions are true.
2812 // (1) compound mode is disabled
2813 // (2) interpolation filter search is disabled
2814 // TODO(any): Check the impact of model rd based breakouts in other presets
2815 const int skip_interp_search_modelrd_calc =
2816 cpi->oxcf.mode == REALTIME &&
2817 cm->current_frame.reference_mode == SINGLE_REFERENCE &&
2818 (cpi->sf.rt_sf.skip_interp_filter_search ||
2819 cpi->sf.winner_mode_sf.winner_mode_ifs);
2820
2821 for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2822 save_mv[i][0].as_int = INVALID_MV;
2823 save_mv[i][1].as_int = INVALID_MV;
2824 }
2825 args->start_mv_cnt = 0;
2826
2827 // Main loop of this function. This will iterate over all of the ref mvs
2828 // in the dynamic reference list and do the following:
2829 // 1.) Get the current MV. Create newmv MV if necessary
2830 // 2.) Search compound type and parameters if applicable
2831 // 3.) Do interpolation filter search
2832 // 4.) Build the inter predictor
2833 // 5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2834 // WARPED_CAUSAL)
2835 // 6.) Update stats if best so far
2836 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2837 mbmi->ref_mv_idx = ref_mv_idx;
2838
2839 mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2840 mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2841 const int drl_cost = get_drl_cost(
2842 mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2843 mode_info[ref_mv_idx].drl_cost = drl_cost;
2844 mode_info[ref_mv_idx].skip = 0;
2845
2846 if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2847 // MV did not perform well in simple translation search. Skip it.
2848 continue;
2849 }
2850 if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2851 !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2852 // Skip mode if TPL model indicates it will not be beneficial.
2853 if (prune_modes_based_on_tpl_stats(
2854 inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2855 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2856 continue;
2857 }
2858 av1_init_rd_stats(rd_stats);
2859
2860 // Initialize compound mode data
2861 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2862 mbmi->comp_group_idx = 0;
2863 mbmi->compound_idx = 1;
2864 if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2865
2866 mbmi->num_proj_ref = 0;
2867 mbmi->motion_mode = SIMPLE_TRANSLATION;
2868
2869 // Compute cost for signalling this DRL index
2870 rd_stats->rate = base_rate;
2871 rd_stats->rate += drl_cost;
2872
2873 int rs = 0;
2874 int compmode_interinter_cost = 0;
2875
2876 int_mv cur_mv[2];
2877
2878 // TODO(Cherma): Extend this speed feature to support compound mode
2879 int skip_repeated_ref_mv =
2880 is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2881 // Generate the current mv according to the prediction mode
2882 if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2883 continue;
2884 }
2885
2886 // The above call to build_cur_mv does not handle NEWMV modes. Build
2887 // the mv here if we have NEWMV for any predictors.
2888 if (have_newmv_in_inter_mode(this_mode)) {
2889 #if CONFIG_COLLECT_COMPONENT_TIMING
2890 start_timing(cpi, handle_newmv_time);
2891 #endif
2892 newmv_ret_val =
2893 handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2894 #if CONFIG_COLLECT_COMPONENT_TIMING
2895 end_timing(cpi, handle_newmv_time);
2896 #endif
2897
2898 if (newmv_ret_val != 0) continue;
2899
2900 if (is_inter_singleref_mode(this_mode) &&
2901 cur_mv[0].as_int != INVALID_MV) {
2902 const MV_REFERENCE_FRAME ref = refs[0];
2903 const unsigned int this_sse = x->pred_sse[ref];
2904 if (this_sse < args->best_single_sse_in_refs[ref]) {
2905 args->best_single_sse_in_refs[ref] = this_sse;
2906 }
2907
2908 if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2909 const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
2910 const int pix_idx = num_pels_log2_lookup[bsize] - 4;
2911 const double scale_factor[3][11] = {
2912 { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
2913 { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
2914 { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
2915 };
2916 assert(pix_idx >= 0);
2917 assert(th_idx <= 2);
2918 if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
2919 continue;
2920 }
2921 }
2922
2923 rd_stats->rate += rate_mv;
2924 }
2925 // Copy the motion vector for this mode into mbmi struct
2926 for (i = 0; i < is_comp_pred + 1; ++i) {
2927 mbmi->mv[i].as_int = cur_mv[i].as_int;
2928 }
2929
2930 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2931 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2932 continue;
2933 }
2934
2935 // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2936 // is enabled, and the current MV is similar to a previous one.
2937 if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2938 prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2939 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2940 continue;
2941
2942 if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2943 cpi->sf.gm_sf.gm_search_type == GM_DISABLE_SEARCH &&
2944 (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2945 if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2946 cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2947 continue;
2948 }
2949 }
2950
2951 int skip_build_pred = 0;
2952 const int mi_row = xd->mi_row;
2953 const int mi_col = xd->mi_col;
2954
2955 // Handle a compound predictor, continue if it is determined this
2956 // cannot be the best compound mode
2957 if (is_comp_pred) {
2958 #if CONFIG_COLLECT_COMPONENT_TIMING
2959 start_timing(cpi, compound_type_rd_time);
2960 #endif
2961 const int not_best_mode = process_compound_inter_mode(
2962 cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2963 rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2964 &skip_build_pred);
2965 #if CONFIG_COLLECT_COMPONENT_TIMING
2966 end_timing(cpi, compound_type_rd_time);
2967 #endif
2968 if (not_best_mode) continue;
2969 }
2970
2971 if (!skip_interp_search_modelrd_calc) {
2972 #if CONFIG_COLLECT_COMPONENT_TIMING
2973 start_timing(cpi, interpolation_filter_search_time);
2974 #endif
2975 // Determine the interpolation filter for this mode
2976 ret_val = av1_interpolation_filter_search(
2977 x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2978 &skip_build_pred, args, ref_best_rd);
2979 #if CONFIG_COLLECT_COMPONENT_TIMING
2980 end_timing(cpi, interpolation_filter_search_time);
2981 #endif
2982 if (args->modelled_rd != NULL && !is_comp_pred) {
2983 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
2984 }
2985 if (ret_val != 0) {
2986 restore_dst_buf(xd, orig_dst, num_planes);
2987 continue;
2988 } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
2989 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
2990 restore_dst_buf(xd, orig_dst, num_planes);
2991 continue;
2992 }
2993
2994 // Compute modelled RD if enabled
2995 if (args->modelled_rd != NULL) {
2996 if (is_comp_pred) {
2997 const int mode0 = compound_ref0_mode(this_mode);
2998 const int mode1 = compound_ref1_mode(this_mode);
2999 const int64_t mrd =
3000 AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3001 args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3002 if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3003 restore_dst_buf(xd, orig_dst, num_planes);
3004 continue;
3005 }
3006 }
3007 }
3008 }
3009
3010 rd_stats->rate += compmode_interinter_cost;
3011 if (skip_build_pred != 1) {
3012 // Build this inter predictor if it has not been previously built
3013 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3014 av1_num_planes(cm) - 1);
3015 }
3016
3017 #if CONFIG_COLLECT_COMPONENT_TIMING
3018 start_timing(cpi, motion_mode_rd_time);
3019 #endif
3020 int rate2_nocoeff = rd_stats->rate;
3021 // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3022 // OBMC_CAUSAL or WARPED_CAUSAL
3023 int64_t this_yrd;
3024 ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3025 rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3026 &orig_dst, best_est_rd, do_tx_search,
3027 inter_modes_info, 0, &this_yrd);
3028 #if CONFIG_COLLECT_COMPONENT_TIMING
3029 end_timing(cpi, motion_mode_rd_time);
3030 #endif
3031 assert(
3032 IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3033
3034 if (ret_val != INT64_MAX) {
3035 int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3036 const THR_MODES mode_enum = get_prediction_mode_idx(
3037 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3038 // Collect mode stats for multiwinner mode processing
3039 store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3040 rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3041 cpi->sf.winner_mode_sf.multi_winner_mode_type,
3042 do_tx_search);
3043 if (tmp_rd < best_rd) {
3044 best_yrd = this_yrd;
3045 // Update the best rd stats if we found the best mode so far
3046 best_rd_stats = *rd_stats;
3047 best_rd_stats_y = *rd_stats_y;
3048 best_rd_stats_uv = *rd_stats_uv;
3049 best_rd = tmp_rd;
3050 best_mbmi = *mbmi;
3051 best_xskip_txfm = txfm_info->skip_txfm;
3052 memcpy(best_blk_skip, txfm_info->blk_skip,
3053 sizeof(best_blk_skip[0]) * xd->height * xd->width);
3054 av1_copy_array(best_tx_type_map, xd->tx_type_map,
3055 xd->height * xd->width);
3056 motion_mode_cand->rate_mv = rate_mv;
3057 motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3058 }
3059
3060 if (tmp_rd < ref_best_rd) {
3061 ref_best_rd = tmp_rd;
3062 best_ref_mv_idx = ref_mv_idx;
3063 }
3064 }
3065 restore_dst_buf(xd, orig_dst, num_planes);
3066 }
3067
3068 if (best_rd == INT64_MAX) return INT64_MAX;
3069
3070 // re-instate status of the best choice
3071 *rd_stats = best_rd_stats;
3072 *rd_stats_y = best_rd_stats_y;
3073 *rd_stats_uv = best_rd_stats_uv;
3074 *yrd = best_yrd;
3075 *mbmi = best_mbmi;
3076 txfm_info->skip_txfm = best_xskip_txfm;
3077 assert(IMPLIES(mbmi->comp_group_idx == 1,
3078 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3079 memcpy(txfm_info->blk_skip, best_blk_skip,
3080 sizeof(best_blk_skip[0]) * xd->height * xd->width);
3081 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3082
3083 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3084
3085 return rd_stats->rdcost;
3086 }
3087
3088 /*!\brief Search for the best intrabc predictor
3089 *
3090 * \ingroup intra_mode_search
3091 * \callergraph
3092 * This function performs a motion search to find the best intrabc predictor.
3093 *
3094 * \returns Returns the best overall rdcost (including the non-intrabc modes
3095 * search before this function).
3096 */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)3097 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3098 PICK_MODE_CONTEXT *ctx,
3099 RD_STATS *rd_stats, BLOCK_SIZE bsize,
3100 int64_t best_rd) {
3101 const AV1_COMMON *const cm = &cpi->common;
3102 if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3103 cpi->sf.rt_sf.use_nonrd_pick_mode)
3104 return INT64_MAX;
3105 const int num_planes = av1_num_planes(cm);
3106
3107 MACROBLOCKD *const xd = &x->e_mbd;
3108 const TileInfo *tile = &xd->tile;
3109 MB_MODE_INFO *mbmi = xd->mi[0];
3110 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3111
3112 const int mi_row = xd->mi_row;
3113 const int mi_col = xd->mi_col;
3114 const int w = block_size_wide[bsize];
3115 const int h = block_size_high[bsize];
3116 const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3117 const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3118
3119 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3120 const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3121 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3122 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3123 mbmi_ext->mode_context);
3124 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3125 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3126 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3127 int_mv nearestmv, nearmv;
3128 av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3129 0);
3130
3131 if (nearestmv.as_int == INVALID_MV) {
3132 nearestmv.as_int = 0;
3133 }
3134 if (nearmv.as_int == INVALID_MV) {
3135 nearmv.as_int = 0;
3136 }
3137
3138 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3139 if (dv_ref.as_int == 0) {
3140 av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3141 }
3142 // Ref DV should not have sub-pel.
3143 assert((dv_ref.as_mv.col & 7) == 0);
3144 assert((dv_ref.as_mv.row & 7) == 0);
3145 mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3146
3147 struct buf_2d yv12_mb[MAX_MB_PLANE];
3148 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3149 for (int i = 0; i < num_planes; ++i) {
3150 xd->plane[i].pre[0] = yv12_mb[i];
3151 }
3152
3153 enum IntrabcMotionDirection {
3154 IBC_MOTION_ABOVE,
3155 IBC_MOTION_LEFT,
3156 IBC_MOTION_DIRECTIONS
3157 };
3158
3159 MB_MODE_INFO best_mbmi = *mbmi;
3160 RD_STATS best_rdstats = *rd_stats;
3161 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3162 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3163 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3164
3165 FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3166 const search_site_config *lookahead_search_sites =
3167 cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3168 av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3169 &dv_ref.as_mv, lookahead_search_sites,
3170 /*fine_search_interval=*/0);
3171 const IntraBCMVCosts *const dv_costs = x->dv_costs;
3172 av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3173
3174 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3175 dir < IBC_MOTION_DIRECTIONS; ++dir) {
3176 switch (dir) {
3177 case IBC_MOTION_ABOVE:
3178 fullms_params.mv_limits.col_min =
3179 (tile->mi_col_start - mi_col) * MI_SIZE;
3180 fullms_params.mv_limits.col_max =
3181 (tile->mi_col_end - mi_col) * MI_SIZE - w;
3182 fullms_params.mv_limits.row_min =
3183 (tile->mi_row_start - mi_row) * MI_SIZE;
3184 fullms_params.mv_limits.row_max =
3185 (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3186 break;
3187 case IBC_MOTION_LEFT:
3188 fullms_params.mv_limits.col_min =
3189 (tile->mi_col_start - mi_col) * MI_SIZE;
3190 fullms_params.mv_limits.col_max =
3191 (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3192 // TODO(aconverse@google.com): Minimize the overlap between above and
3193 // left areas.
3194 fullms_params.mv_limits.row_min =
3195 (tile->mi_row_start - mi_row) * MI_SIZE;
3196 int bottom_coded_mi_edge =
3197 AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3198 fullms_params.mv_limits.row_max =
3199 (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3200 break;
3201 default: assert(0);
3202 }
3203 assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3204 assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3205 assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3206 assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3207
3208 av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3209
3210 if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3211 fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3212 continue;
3213 }
3214
3215 const int step_param = cpi->mv_search_params.mv_step_param;
3216 const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3217 IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3218 int_mv best_mv, best_hash_mv;
3219
3220 int bestsme = av1_full_pixel_search(start_mv, &fullms_params, step_param,
3221 NULL, &best_mv.as_fullmv, NULL);
3222 const int hashsme = av1_intrabc_hash_search(
3223 cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3224 if (hashsme < bestsme) {
3225 best_mv = best_hash_mv;
3226 bestsme = hashsme;
3227 }
3228
3229 if (bestsme == INT_MAX) continue;
3230 const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3231 if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3232 get_fullmv_from_mv(&dv)))
3233 continue;
3234 if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3235 cm->seq_params->mib_size_log2))
3236 continue;
3237
3238 // DV should not have sub-pel.
3239 assert((dv.col & 7) == 0);
3240 assert((dv.row & 7) == 0);
3241 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3242 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3243 mbmi->use_intrabc = 1;
3244 mbmi->mode = DC_PRED;
3245 mbmi->uv_mode = UV_DC_PRED;
3246 mbmi->motion_mode = SIMPLE_TRANSLATION;
3247 mbmi->mv[0].as_mv = dv;
3248 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3249 mbmi->skip_txfm = 0;
3250 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3251 av1_num_planes(cm) - 1);
3252
3253 // TODO(aconverse@google.com): The full motion field defining discount
3254 // in MV_COST_WEIGHT is too large. Explore other values.
3255 const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3256 dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3257 const int rate_mode = x->mode_costs.intrabc_cost[1];
3258 RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3259 if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3260 &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3261 continue;
3262 rd_stats_yuv.rdcost =
3263 RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3264 if (rd_stats_yuv.rdcost < best_rd) {
3265 best_rd = rd_stats_yuv.rdcost;
3266 best_mbmi = *mbmi;
3267 best_rdstats = rd_stats_yuv;
3268 memcpy(best_blk_skip, txfm_info->blk_skip,
3269 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3270 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3271 }
3272 }
3273 *mbmi = best_mbmi;
3274 *rd_stats = best_rdstats;
3275 memcpy(txfm_info->blk_skip, best_blk_skip,
3276 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3277 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3278 #if CONFIG_RD_DEBUG
3279 mbmi->rd_stats = *rd_stats;
3280 #endif
3281 return best_rd;
3282 }
3283
3284 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3285 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3286 // the typedef will prevent doxygen from finding this function and generating
3287 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3288 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3289 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3290 struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3291 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3292 const AV1_COMMON *const cm = &cpi->common;
3293 MACROBLOCKD *const xd = &x->e_mbd;
3294 MB_MODE_INFO *const mbmi = xd->mi[0];
3295 const int num_planes = av1_num_planes(cm);
3296 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3297 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3298 uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3299 int64_t dist_y = 0, dist_uv = 0;
3300
3301 ctx->rd_stats.skip_txfm = 0;
3302 mbmi->ref_frame[0] = INTRA_FRAME;
3303 mbmi->ref_frame[1] = NONE_FRAME;
3304 mbmi->use_intrabc = 0;
3305 mbmi->mv[0].as_int = 0;
3306 mbmi->skip_mode = 0;
3307
3308 const int64_t intra_yrd =
3309 av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3310 &y_skip_txfm, bsize, best_rd, ctx);
3311
3312 // Initialize default mode evaluation params
3313 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3314
3315 if (intra_yrd < best_rd) {
3316 // Search intra modes for uv planes if needed
3317 if (num_planes > 1) {
3318 // Set up the tx variables for reproducing the y predictions in case we
3319 // need it for chroma-from-luma.
3320 if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3321 memcpy(txfm_info->blk_skip, ctx->blk_skip,
3322 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3323 av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3324 }
3325 const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3326 av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3327 &dist_uv, &uv_skip_txfm, bsize,
3328 max_uv_tx_size);
3329 }
3330
3331 // Intra block is always coded as non-skip
3332 rd_cost->rate =
3333 rate_y + rate_uv +
3334 x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3335 rd_cost->dist = dist_y + dist_uv;
3336 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3337 rd_cost->skip_txfm = 0;
3338 } else {
3339 rd_cost->rate = INT_MAX;
3340 }
3341
3342 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3343 best_rd = rd_cost->rdcost;
3344 if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3345 ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3346 memcpy(ctx->blk_skip, txfm_info->blk_skip,
3347 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3348 assert(rd_cost->rate != INT_MAX);
3349 }
3350 if (rd_cost->rate == INT_MAX) return;
3351
3352 ctx->mic = *xd->mi[0];
3353 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3354 av1_ref_frame_type(xd->mi[0]->ref_frame));
3355 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3356 }
3357
3358 static AOM_INLINE void calc_target_weighted_pred(
3359 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3360 const uint8_t *above, int above_stride, const uint8_t *left,
3361 int left_stride);
3362
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3363 static AOM_INLINE void rd_pick_skip_mode(
3364 RD_STATS *rd_cost, InterModeSearchState *search_state,
3365 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3366 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3367 const AV1_COMMON *const cm = &cpi->common;
3368 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3369 const int num_planes = av1_num_planes(cm);
3370 MACROBLOCKD *const xd = &x->e_mbd;
3371 MB_MODE_INFO *const mbmi = xd->mi[0];
3372
3373 x->compound_idx = 1; // COMPOUND_AVERAGE
3374 RD_STATS skip_mode_rd_stats;
3375 av1_invalid_rd_stats(&skip_mode_rd_stats);
3376
3377 if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3378 skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3379 return;
3380 }
3381
3382 const MV_REFERENCE_FRAME ref_frame =
3383 LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3384 const MV_REFERENCE_FRAME second_ref_frame =
3385 LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3386 const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3387 const THR_MODES mode_index =
3388 get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3389
3390 if (mode_index == THR_INVALID) {
3391 return;
3392 }
3393
3394 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3395 cpi->sf.inter_sf.disable_onesided_comp) &&
3396 cpi->all_one_sided_refs) {
3397 return;
3398 }
3399
3400 mbmi->mode = this_mode;
3401 mbmi->uv_mode = UV_DC_PRED;
3402 mbmi->ref_frame[0] = ref_frame;
3403 mbmi->ref_frame[1] = second_ref_frame;
3404 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3405 if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3406 MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3407 if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3408 mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3409 return;
3410 }
3411 av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3412 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3413 mbmi_ext->mode_context);
3414 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3415 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3416 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3417 }
3418
3419 assert(this_mode == NEAREST_NEARESTMV);
3420 if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3421 return;
3422 }
3423
3424 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3425 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3426 mbmi->comp_group_idx = 0;
3427 mbmi->compound_idx = x->compound_idx;
3428 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3429 mbmi->motion_mode = SIMPLE_TRANSLATION;
3430 mbmi->ref_mv_idx = 0;
3431 mbmi->skip_mode = mbmi->skip_txfm = 1;
3432 mbmi->palette_mode_info.palette_size[0] = 0;
3433 mbmi->palette_mode_info.palette_size[1] = 0;
3434
3435 set_default_interp_filters(mbmi, cm->features.interp_filter);
3436
3437 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3438 for (int i = 0; i < num_planes; i++) {
3439 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3440 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3441 }
3442
3443 BUFFER_SET orig_dst;
3444 for (int i = 0; i < num_planes; i++) {
3445 orig_dst.plane[i] = xd->plane[i].dst.buf;
3446 orig_dst.stride[i] = xd->plane[i].dst.stride;
3447 }
3448
3449 // Obtain the rdcost for skip_mode.
3450 skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst);
3451
3452 // Compare the use of skip_mode with the best intra/inter mode obtained.
3453 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3454 int64_t best_intra_inter_mode_cost = INT64_MAX;
3455 if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3456 const ModeCosts *mode_costs = &x->mode_costs;
3457 best_intra_inter_mode_cost = RDCOST(
3458 x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3459 rd_cost->dist);
3460 // Account for non-skip mode rate in total rd stats
3461 rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3462 av1_rd_cost_update(x->rdmult, rd_cost);
3463 }
3464
3465 if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3466 (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3467 assert(mode_index != THR_INVALID);
3468 search_state->best_mbmode.skip_mode = 1;
3469 search_state->best_mbmode = *mbmi;
3470 memset(search_state->best_mbmode.inter_tx_size,
3471 search_state->best_mbmode.tx_size,
3472 sizeof(search_state->best_mbmode.inter_tx_size));
3473 set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3474 search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3475 xd);
3476 search_state->best_mode_index = mode_index;
3477
3478 // Update rd_cost
3479 rd_cost->rate = skip_mode_rd_stats.rate;
3480 rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3481 rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3482
3483 search_state->best_rd = rd_cost->rdcost;
3484 search_state->best_skip2 = 1;
3485 search_state->best_mode_skippable = 1;
3486
3487 x->txfm_search_info.skip_txfm = 1;
3488 }
3489 }
3490
3491 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3492 static AOM_INLINE MB_MODE_INFO *get_winner_mode_stats(
3493 MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3494 int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3495 RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3496 THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3497 int mode_idx) {
3498 MB_MODE_INFO *winner_mbmi;
3499 if (multi_winner_mode_type) {
3500 assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3501 WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3502 winner_mbmi = &winner_mode_stat->mbmi;
3503
3504 *winner_rd_cost = &winner_mode_stat->rd_cost;
3505 *winner_rate_y = winner_mode_stat->rate_y;
3506 *winner_rate_uv = winner_mode_stat->rate_uv;
3507 *winner_mode_index = winner_mode_stat->mode_index;
3508 } else {
3509 winner_mbmi = best_mbmode;
3510 *winner_rd_cost = best_rd_cost;
3511 *winner_rate_y = best_rate_y;
3512 *winner_rate_uv = best_rate_uv;
3513 *winner_mode_index = *best_mode_index;
3514 }
3515 return winner_mbmi;
3516 }
3517
3518 // speed feature: fast intra/inter transform type search
3519 // Used for speed >= 2
3520 // When this speed feature is on, in rd mode search, only DCT is used.
3521 // After the mode is determined, this function is called, to select
3522 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3523 static AOM_INLINE void refine_winner_mode_tx(
3524 const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3525 PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3526 MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3527 int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3528 const AV1_COMMON *const cm = &cpi->common;
3529 MACROBLOCKD *const xd = &x->e_mbd;
3530 MB_MODE_INFO *const mbmi = xd->mi[0];
3531 TxfmSearchParams *txfm_params = &x->txfm_search_params;
3532 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3533 int64_t best_rd;
3534 const int num_planes = av1_num_planes(cm);
3535
3536 if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3537 rd_cost->skip_txfm))
3538 return;
3539
3540 // Set params for winner mode evaluation
3541 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3542
3543 // No best mode identified so far
3544 if (*best_mode_index == THR_INVALID) return;
3545
3546 best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3547 for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3548 RD_STATS *winner_rd_stats = NULL;
3549 int winner_rate_y = 0, winner_rate_uv = 0;
3550 THR_MODES winner_mode_index = 0;
3551
3552 // TODO(any): Combine best mode and multi-winner mode processing paths
3553 // Get winner mode stats for current mode index
3554 MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3555 x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3556 &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3557 cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3558
3559 if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3560 winner_mode_index != THR_INVALID &&
3561 is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3562 rd_cost->skip_txfm)) {
3563 RD_STATS rd_stats = *winner_rd_stats;
3564 int skip_blk = 0;
3565 RD_STATS rd_stats_y, rd_stats_uv;
3566 const int skip_ctx = av1_get_skip_txfm_context(xd);
3567
3568 *mbmi = *winner_mbmi;
3569
3570 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3571
3572 // Select prediction reference frames.
3573 for (int i = 0; i < num_planes; i++) {
3574 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3575 if (has_second_ref(mbmi))
3576 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3577 }
3578
3579 if (is_inter_mode(mbmi->mode)) {
3580 const int mi_row = xd->mi_row;
3581 const int mi_col = xd->mi_col;
3582 bool is_predictor_built = false;
3583 const PREDICTION_MODE prediction_mode = mbmi->mode;
3584 // Do interpolation filter search for realtime mode if applicable.
3585 if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3586 cpi->oxcf.mode == REALTIME &&
3587 cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3588 is_inter_mode(prediction_mode) &&
3589 mbmi->motion_mode == SIMPLE_TRANSLATION &&
3590 !is_inter_compound_mode(prediction_mode)) {
3591 is_predictor_built =
3592 fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3593 }
3594 if (!is_predictor_built) {
3595 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3596 av1_num_planes(cm) - 1);
3597 }
3598 if (mbmi->motion_mode == OBMC_CAUSAL)
3599 av1_build_obmc_inter_predictors_sb(cm, xd);
3600
3601 av1_subtract_plane(x, bsize, 0);
3602 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3603 !xd->lossless[mbmi->segment_id]) {
3604 av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3605 INT64_MAX);
3606 assert(rd_stats_y.rate != INT_MAX);
3607 } else {
3608 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3609 INT64_MAX);
3610 memset(mbmi->inter_tx_size, mbmi->tx_size,
3611 sizeof(mbmi->inter_tx_size));
3612 for (int i = 0; i < xd->height * xd->width; ++i)
3613 set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3614 }
3615 } else {
3616 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3617 INT64_MAX);
3618 }
3619
3620 if (num_planes > 1) {
3621 av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3622 } else {
3623 av1_init_rd_stats(&rd_stats_uv);
3624 }
3625
3626 const ModeCosts *mode_costs = &x->mode_costs;
3627 if (is_inter_mode(mbmi->mode) &&
3628 RDCOST(x->rdmult,
3629 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3630 rd_stats_uv.rate,
3631 (rd_stats_y.dist + rd_stats_uv.dist)) >
3632 RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3633 (rd_stats_y.sse + rd_stats_uv.sse))) {
3634 skip_blk = 1;
3635 rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3636 rd_stats_uv.rate = 0;
3637 rd_stats_y.dist = rd_stats_y.sse;
3638 rd_stats_uv.dist = rd_stats_uv.sse;
3639 } else {
3640 skip_blk = 0;
3641 rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3642 }
3643 int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3644 winner_rate_y - winner_rate_uv;
3645 int64_t this_rd =
3646 RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3647 if (best_rd > this_rd) {
3648 *best_mbmode = *mbmi;
3649 *best_mode_index = winner_mode_index;
3650 av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3651 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3652 rd_cost->rate = this_rate;
3653 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3654 rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3655 rd_cost->rdcost = this_rd;
3656 best_rd = this_rd;
3657 *best_skip2 = skip_blk;
3658 }
3659 }
3660 }
3661 }
3662
3663 /*!\cond */
3664 typedef struct {
3665 // Mask for each reference frame, specifying which prediction modes to NOT try
3666 // during search.
3667 uint32_t pred_modes[REF_FRAMES];
3668 // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3669 // reference frames (i, j).
3670 // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3671 // (NONE_FRAME).
3672 bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3673 } mode_skip_mask_t;
3674 /*!\endcond */
3675
3676 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3677 static AOM_INLINE void disable_reference(
3678 MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3679 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3680 ref_combo[ref][ref2 + 1] = true;
3681 }
3682 }
3683
3684 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3685 static AOM_INLINE void disable_inter_references_except_altref(
3686 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3687 disable_reference(LAST_FRAME, ref_combo);
3688 disable_reference(LAST2_FRAME, ref_combo);
3689 disable_reference(LAST3_FRAME, ref_combo);
3690 disable_reference(GOLDEN_FRAME, ref_combo);
3691 disable_reference(BWDREF_FRAME, ref_combo);
3692 disable_reference(ALTREF2_FRAME, ref_combo);
3693 }
3694
3695 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3696 { LAST_FRAME, NONE_FRAME }, { ALTREF_FRAME, NONE_FRAME },
3697 { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, NONE_FRAME },
3698 { INTRA_FRAME, NONE_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
3699 { LAST_FRAME, GOLDEN_FRAME }, { LAST_FRAME, INTRA_FRAME },
3700 { LAST_FRAME, BWDREF_FRAME }, { LAST_FRAME, LAST3_FRAME },
3701 { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3702 { BWDREF_FRAME, NONE_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
3703 { ALTREF_FRAME, INTRA_FRAME }, { BWDREF_FRAME, INTRA_FRAME },
3704 };
3705
3706 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3707
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3708 static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
3709 REF_SET ref_set) {
3710 if (ref_set == REF_SET_FULL) {
3711 // Everything available by default.
3712 memset(mask, 0, sizeof(*mask));
3713 } else {
3714 // All modes available by default.
3715 memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3716 // All references disabled first.
3717 for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3718 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3719 mask->ref_combo[ref1][ref2 + 1] = true;
3720 }
3721 }
3722 const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3723 int num_ref_combos;
3724
3725 // Then enable reduced set of references explicitly.
3726 switch (ref_set) {
3727 case REF_SET_REDUCED:
3728 ref_set_combos = reduced_ref_combos;
3729 num_ref_combos =
3730 (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3731 break;
3732 case REF_SET_REALTIME:
3733 ref_set_combos = real_time_ref_combos;
3734 num_ref_combos =
3735 (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3736 break;
3737 default: assert(0); num_ref_combos = 0;
3738 }
3739
3740 for (int i = 0; i < num_ref_combos; ++i) {
3741 const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3742 mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3743 }
3744 }
3745 }
3746
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3747 static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
3748 const AV1_COMP *cpi, MACROBLOCK *x,
3749 BLOCK_SIZE bsize) {
3750 const AV1_COMMON *const cm = &cpi->common;
3751 const struct segmentation *const seg = &cm->seg;
3752 MACROBLOCKD *const xd = &x->e_mbd;
3753 MB_MODE_INFO *const mbmi = xd->mi[0];
3754 unsigned char segment_id = mbmi->segment_id;
3755 const SPEED_FEATURES *const sf = &cpi->sf;
3756 REF_SET ref_set = REF_SET_FULL;
3757
3758 if (sf->rt_sf.use_real_time_ref_set)
3759 ref_set = REF_SET_REALTIME;
3760 else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3761 ref_set = REF_SET_REDUCED;
3762
3763 default_skip_mask(mask, ref_set);
3764
3765 int min_pred_mv_sad = INT_MAX;
3766 MV_REFERENCE_FRAME ref_frame;
3767 if (ref_set == REF_SET_REALTIME) {
3768 // For real-time encoding, we only look at a subset of ref frames. So the
3769 // threshold for pruning should be computed from this subset as well.
3770 const int num_rt_refs =
3771 sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3772 for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3773 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3774 if (ref != INTRA_FRAME) {
3775 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3776 }
3777 }
3778 } else {
3779 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3780 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3781 }
3782
3783 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3784 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3785 // Skip checking missing reference in both single and compound reference
3786 // modes.
3787 disable_reference(ref_frame, mask->ref_combo);
3788 } else {
3789 // Skip fixed mv modes for poor references
3790 if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3791 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3792 }
3793 }
3794 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3795 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3796 // Reference not used for the segment.
3797 disable_reference(ref_frame, mask->ref_combo);
3798 }
3799 }
3800 // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3801 // is disabled for this segment. This is to prevent the possibility that we
3802 // end up unable to pick any mode.
3803 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3804 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3805 // unless ARNR filtering is enabled in which case we want
3806 // an unfiltered alternative. We allow near/nearest as well
3807 // because they may result in zero-zero MVs but be cheaper.
3808 if (cpi->rc.is_src_frame_alt_ref &&
3809 (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3810 disable_inter_references_except_altref(mask->ref_combo);
3811
3812 mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3813 const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3814 int_mv near_mv, nearest_mv, global_mv;
3815 get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3816 &x->mbmi_ext);
3817 get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3818 get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3819
3820 if (near_mv.as_int != global_mv.as_int)
3821 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3822 if (nearest_mv.as_int != global_mv.as_int)
3823 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3824 }
3825 }
3826
3827 if (cpi->rc.is_src_frame_alt_ref) {
3828 if (sf->inter_sf.alt_ref_search_fp &&
3829 (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3830 mask->pred_modes[ALTREF_FRAME] = 0;
3831 disable_inter_references_except_altref(mask->ref_combo);
3832 disable_reference(INTRA_FRAME, mask->ref_combo);
3833 }
3834 }
3835
3836 if (sf->inter_sf.alt_ref_search_fp) {
3837 if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
3838 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
3839 // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3840 // those are past frames
3841 MV_REFERENCE_FRAME start_frame =
3842 sf->inter_sf.alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3843 for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3844 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3845 0) {
3846 // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3847 // to the relative dist of LAST_FRAME.
3848 if (sf->inter_sf.alt_ref_search_fp == 1 &&
3849 (abs(cpi->ref_frame_dist_info
3850 .ref_relative_dist[ref_frame - LAST_FRAME]) >
3851 1.5 * abs(cpi->ref_frame_dist_info
3852 .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3853 continue;
3854 }
3855 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3856 mask->pred_modes[ref_frame] |= INTER_ALL;
3857 }
3858 }
3859 }
3860 }
3861
3862 if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3863 if (x->best_pred_mv_sad[0] < INT_MAX) {
3864 int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
3865 const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3866
3867 // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3868 for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3869 ref_frame = prune_ref_list[ref_idx];
3870 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3871 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3872 }
3873 }
3874 }
3875
3876 if (bsize > sf->part_sf.max_intra_bsize) {
3877 disable_reference(INTRA_FRAME, mask->ref_combo);
3878 }
3879
3880 if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3881 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3882 mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3883 mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3884 }
3885 }
3886
3887 mask->pred_modes[INTRA_FRAME] |=
3888 ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3889 }
3890
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3891 static AOM_INLINE void init_neighbor_pred_buf(
3892 const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
3893 int is_hbd) {
3894 if (is_hbd) {
3895 const int len = sizeof(uint16_t);
3896 args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3897 args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3898 (MAX_SB_SQUARE >> 1) * len);
3899 args->above_pred_buf[2] =
3900 CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3901 args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3902 args->left_pred_buf[1] =
3903 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3904 args->left_pred_buf[2] =
3905 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3906 } else {
3907 args->above_pred_buf[0] = obmc_buffer->above_pred;
3908 args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3909 args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3910 args->left_pred_buf[0] = obmc_buffer->left_pred;
3911 args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3912 args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3913 }
3914 }
3915
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3916 static AOM_INLINE int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3917 MV_REFERENCE_FRAME ref_frame) {
3918 const AV1_COMMON *const cm = &cpi->common;
3919 MV_REFERENCE_FRAME rf[2];
3920 av1_set_ref_frame(rf, ref_frame);
3921
3922 if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3923
3924 if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3925 cm->cur_frame->ref_display_order_hint)) {
3926 return 1;
3927 }
3928
3929 return 0;
3930 }
3931
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3932 static AOM_INLINE int is_ref_frame_used_by_compound_ref(
3933 int ref_frame, int skip_ref_frame_mask) {
3934 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3935 if (!(skip_ref_frame_mask & (1 << r))) {
3936 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3937 if (rf[0] == ref_frame || rf[1] == ref_frame) {
3938 return 1;
3939 }
3940 }
3941 }
3942 return 0;
3943 }
3944
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3945 static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3946 const MB_MODE_INFO *mi_cache) {
3947 if (!mi_cache) {
3948 return 0;
3949 }
3950
3951 if (ref_frame < REF_FRAMES) {
3952 return (ref_frame == mi_cache->ref_frame[0] ||
3953 ref_frame == mi_cache->ref_frame[1]);
3954 }
3955
3956 // if we are here, then the current mode is compound.
3957 MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
3958 return ref_frame == cached_ref_type;
3959 }
3960
3961 // Please add/modify parameter setting in this function, making it consistent
3962 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])3963 static AOM_INLINE void set_params_rd_pick_inter_mode(
3964 const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
3965 BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
3966 unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
3967 struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
3968 const AV1_COMMON *const cm = &cpi->common;
3969 MACROBLOCKD *const xd = &x->e_mbd;
3970 MB_MODE_INFO *const mbmi = xd->mi[0];
3971 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3972 unsigned char segment_id = mbmi->segment_id;
3973
3974 init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
3975 av1_collect_neighbors_ref_counts(xd);
3976 estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
3977 ref_costs_comp);
3978
3979 const int mi_row = xd->mi_row;
3980 const int mi_col = xd->mi_col;
3981 x->best_pred_mv_sad[0] = INT_MAX;
3982 x->best_pred_mv_sad[1] = INT_MAX;
3983
3984 for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
3985 ++ref_frame) {
3986 x->pred_mv_sad[ref_frame] = INT_MAX;
3987 mbmi_ext->mode_context[ref_frame] = 0;
3988 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3989 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
3990 // Skip the ref frame if the mask says skip and the ref is not used by
3991 // compound ref.
3992 if (skip_ref_frame_mask & (1 << ref_frame) &&
3993 !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
3994 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
3995 continue;
3996 }
3997 assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
3998 setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
3999 }
4000 if (cpi->sf.inter_sf.alt_ref_search_fp ||
4001 cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4002 // Store the best pred_mv_sad across all past frames
4003 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4004 0)
4005 x->best_pred_mv_sad[0] =
4006 AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4007 else
4008 // Store the best pred_mv_sad across all future frames
4009 x->best_pred_mv_sad[1] =
4010 AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4011 }
4012 }
4013
4014 if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4015 // No second reference on RT ref set, so no need to initialize
4016 for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4017 ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4018 mbmi_ext->mode_context[ref_frame] = 0;
4019 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4020 const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4021 if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4022 (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4023 continue;
4024 }
4025
4026 if (skip_ref_frame_mask & (1 << ref_frame) &&
4027 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4028 continue;
4029 }
4030 // Ref mv list population is not required, when compound references are
4031 // pruned.
4032 if (prune_ref_frame(cpi, x, ref_frame)) continue;
4033
4034 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4035 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4036 mbmi_ext->mode_context);
4037 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4038 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4039 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4040 }
4041 }
4042
4043 av1_count_overlappable_neighbors(cm, xd);
4044 const FRAME_UPDATE_TYPE update_type =
4045 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4046 int use_actual_frame_probs = 1;
4047 int prune_obmc;
4048 #if CONFIG_FPMT_TEST
4049 use_actual_frame_probs =
4050 (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4051 if (!use_actual_frame_probs) {
4052 prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4053 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4054 }
4055 #endif
4056 if (use_actual_frame_probs) {
4057 prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4058 cpi->sf.inter_sf.prune_obmc_prob_thresh;
4059 }
4060 if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4061 if (check_num_overlappable_neighbors(mbmi) &&
4062 is_motion_variation_allowed_bsize(bsize)) {
4063 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4064 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4065 MAX_SB_SIZE >> 1 };
4066 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4067 MAX_SB_SIZE >> 1 };
4068 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4069 av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4070 dst_width1, dst_height1,
4071 args->above_pred_stride);
4072 av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4073 dst_width2, dst_height2,
4074 args->left_pred_stride);
4075 const int num_planes = av1_num_planes(cm);
4076 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4077 mi_col, 0, num_planes);
4078 calc_target_weighted_pred(
4079 cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4080 args->left_pred_buf[0], args->left_pred_stride[0]);
4081 }
4082 }
4083
4084 init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4085
4086 // Set params for mode evaluation
4087 set_mode_eval_params(cpi, x, MODE_EVAL);
4088
4089 x->comp_rd_stats_idx = 0;
4090
4091 for (int idx = 0; idx < REF_FRAMES; idx++) {
4092 args->best_single_sse_in_refs[idx] = INT32_MAX;
4093 }
4094 }
4095
init_single_inter_mode_search_state(InterModeSearchState * search_state)4096 static AOM_INLINE void init_single_inter_mode_search_state(
4097 InterModeSearchState *search_state) {
4098 for (int dir = 0; dir < 2; ++dir) {
4099 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4100 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4101 SingleInterModeState *state;
4102
4103 state = &search_state->single_state[dir][mode][ref_frame];
4104 state->ref_frame = NONE_FRAME;
4105 state->rd = INT64_MAX;
4106
4107 state = &search_state->single_state_modelled[dir][mode][ref_frame];
4108 state->ref_frame = NONE_FRAME;
4109 state->rd = INT64_MAX;
4110
4111 search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4112 }
4113 }
4114 }
4115
4116 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4117 search_state->best_single_rd[ref_frame] = INT64_MAX;
4118 search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4119 }
4120 av1_zero(search_state->single_state_cnt);
4121 av1_zero(search_state->single_state_modelled_cnt);
4122 }
4123
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)4124 static AOM_INLINE void init_inter_mode_search_state(
4125 InterModeSearchState *search_state, const AV1_COMP *cpi,
4126 const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4127 init_intra_mode_search_state(&search_state->intra_search_state);
4128 av1_invalid_rd_stats(&search_state->best_y_rdcost);
4129
4130 search_state->best_rd = best_rd_so_far;
4131 search_state->best_skip_rd[0] = INT64_MAX;
4132 search_state->best_skip_rd[1] = INT64_MAX;
4133
4134 av1_zero(search_state->best_mbmode);
4135
4136 search_state->best_rate_y = INT_MAX;
4137
4138 search_state->best_rate_uv = INT_MAX;
4139
4140 search_state->best_mode_skippable = 0;
4141
4142 search_state->best_skip2 = 0;
4143
4144 search_state->best_mode_index = THR_INVALID;
4145
4146 const MACROBLOCKD *const xd = &x->e_mbd;
4147 const MB_MODE_INFO *const mbmi = xd->mi[0];
4148 const unsigned char segment_id = mbmi->segment_id;
4149
4150 search_state->num_available_refs = 0;
4151 memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4152 memset(search_state->dist_order_refs, -1,
4153 sizeof(search_state->dist_order_refs));
4154
4155 for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4156 search_state->mode_threshold[i] = 0;
4157 const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4158 for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4159 search_state->mode_threshold[i] =
4160 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4161 RD_THRESH_FAC_FRAC_BITS;
4162
4163 search_state->best_intra_rd = INT64_MAX;
4164
4165 search_state->best_pred_sse = UINT_MAX;
4166
4167 av1_zero(search_state->single_newmv);
4168 av1_zero(search_state->single_newmv_rate);
4169 av1_zero(search_state->single_newmv_valid);
4170 for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4171 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4172 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4173 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4174 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4175 }
4176 }
4177 }
4178
4179 for (int i = 0; i < REFERENCE_MODES; ++i) {
4180 search_state->best_pred_rd[i] = INT64_MAX;
4181 }
4182
4183 if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4184 for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4185 search_state->mode_threshold[i] =
4186 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4187 RD_THRESH_FAC_FRAC_BITS;
4188
4189 for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4190 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4191 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4192 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4193 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4194 }
4195 }
4196 }
4197
4198 init_single_inter_mode_search_state(search_state);
4199 }
4200 }
4201
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)4202 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4203 const MV_REFERENCE_FRAME *ref_frame,
4204 const PREDICTION_MODE this_mode) {
4205 if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4206 return true;
4207 }
4208
4209 return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4210 }
4211
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)4212 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4213 BLOCK_SIZE bsize,
4214 PREDICTION_MODE curr_mode,
4215 const MV_REFERENCE_FRAME *ref_frames) {
4216 const int comp_pred = ref_frames[1] > INTRA_FRAME;
4217 if (comp_pred) {
4218 if (!is_comp_ref_allowed(bsize)) return 1;
4219 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4220 return 1;
4221 }
4222
4223 const AV1_COMMON *const cm = &cpi->common;
4224 if (frame_is_intra_only(cm)) return 1;
4225
4226 const CurrentFrame *const current_frame = &cm->current_frame;
4227 if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4228
4229 const struct segmentation *const seg = &cm->seg;
4230 const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4231 // Do not allow compound prediction if the segment level reference frame
4232 // feature is in use as in this case there can only be one reference.
4233 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4234 }
4235
4236 if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4237 // Mode must be compatible
4238 if (!is_interintra_allowed_bsize(bsize)) return 1;
4239 if (!is_interintra_allowed_mode(curr_mode)) return 1;
4240 }
4241
4242 return 0;
4243 }
4244
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4245 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4246 BLOCK_SIZE bsize, int mib_size) {
4247 const int sb_size_mask = mib_size - 1;
4248 const MACROBLOCKD *const xd = &x->e_mbd;
4249 const int mi_row = xd->mi_row;
4250 const int mi_col = xd->mi_col;
4251 const int mi_row_in_sb = mi_row & sb_size_mask;
4252 const int mi_col_in_sb = mi_col & sb_size_mask;
4253 const int mi_w = mi_size_wide[bsize];
4254 const int mi_h = mi_size_high[bsize];
4255 int picked_ref_frames_mask = 0;
4256 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4257 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4258 picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4259 }
4260 }
4261 return picked_ref_frames_mask;
4262 }
4263
4264 // Check if reference frame pair of the current block matches with the given
4265 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4266 static INLINE int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4267 const MV_REFERENCE_FRAME *ref_frames) {
4268 return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4269 (ref_frames[1] == mbmi->ref_frame[1]));
4270 }
4271
4272 // Case 1: return 0, means don't skip this mode
4273 // Case 2: return 1, means skip this mode completely
4274 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4275 static int inter_mode_search_order_independent_skip(
4276 const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4277 InterModeSearchState *search_state, int skip_ref_frame_mask,
4278 PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4279 if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4280 return 1;
4281 }
4282
4283 const int ref_type = av1_ref_frame_type(ref_frame);
4284 if (!cpi->sf.rt_sf.use_real_time_ref_set)
4285 if (prune_ref_frame(cpi, x, ref_type)) return 1;
4286
4287 // This is only used in motion vector unit test.
4288 if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4289 ref_frame[0] == INTRA_FRAME)
4290 return 1;
4291
4292 const AV1_COMMON *const cm = &cpi->common;
4293 if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4294 return 1;
4295 }
4296
4297 // Reuse the prediction mode in cache
4298 if (x->use_mb_mode_cache) {
4299 const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4300 const PREDICTION_MODE cached_mode = cached_mi->mode;
4301 const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4302 const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4303
4304 // If the cached mode is intra, then we just need to match the mode.
4305 if (is_mode_intra(cached_mode) && mode != cached_mode) {
4306 return 1;
4307 }
4308
4309 // If the cached mode is single inter mode, then we match the mode and
4310 // reference frame.
4311 if (cached_mode_is_single) {
4312 if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4313 return 1;
4314 }
4315 } else {
4316 // If the cached mode is compound, then we need to consider several cases.
4317 const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4318 if (mode_is_single) {
4319 // If the mode is single, we know the modes can't match. But we might
4320 // still want to search it if compound mode depends on the current mode.
4321 int skip_motion_mode_only = 0;
4322 if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4323 skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4324 } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4325 skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4326 } else if (cached_mode == NEW_NEWMV) {
4327 skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4328 ref_frame[0] == cached_frame[1]);
4329 }
4330
4331 return 1 + skip_motion_mode_only;
4332 } else {
4333 // If both modes are compound, then everything must match.
4334 if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4335 ref_frame[1] != cached_frame[1]) {
4336 return 1;
4337 }
4338 }
4339 }
4340 }
4341
4342 const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4343 // If no valid mode has been found so far in PARTITION_NONE when finding a
4344 // valid partition is required, do not skip mode.
4345 if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4346 x->must_find_valid_partition)
4347 return 0;
4348
4349 const SPEED_FEATURES *const sf = &cpi->sf;
4350 // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4351 // frames
4352 if (sf->inter_sf.prune_nearmv_using_neighbors &&
4353 (mode == NEAR_NEARMV || mode == NEARMV)) {
4354 const MACROBLOCKD *const xd = &x->e_mbd;
4355 if (search_state->best_rd != INT64_MAX && xd->left_available &&
4356 xd->up_available) {
4357 const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4358 { 1, 1, 0 },
4359 { 2, 1, 0 } };
4360 const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4361
4362 assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4363 qindex_sub_range < 3);
4364 const int num_ref_frame_pair_match_thresh =
4365 thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4366 [qindex_sub_range];
4367
4368 assert(num_ref_frame_pair_match_thresh <= 2 &&
4369 num_ref_frame_pair_match_thresh >= 0);
4370 int num_ref_frame_pair_match = 0;
4371
4372 num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4373 num_ref_frame_pair_match +=
4374 match_ref_frame_pair(xd->above_mbmi, ref_frame);
4375
4376 // Pruning based on ref frame pair match with neighbors.
4377 if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4378 }
4379 }
4380
4381 int skip_motion_mode = 0;
4382 if (mbmi->partition != PARTITION_NONE) {
4383 int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4384 if (ref_type <= ALTREF_FRAME && skip_ref) {
4385 // Since the compound ref modes depends on the motion estimation result of
4386 // two single ref modes (best mv of single ref modes as the start point),
4387 // if current single ref mode is marked skip, we need to check if it will
4388 // be used in compound ref modes.
4389 if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4390 // Found a not skipped compound ref mode which contains current
4391 // single ref. So this single ref can't be skipped completely
4392 // Just skip its motion mode search, still try its simple
4393 // transition mode.
4394 skip_motion_mode = 1;
4395 skip_ref = 0;
4396 }
4397 }
4398 // If we are reusing the prediction from cache, and the current frame is
4399 // required by the cache, then we cannot prune it.
4400 if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4401 skip_ref = 0;
4402 // If the cache only needs the current reference type for compound
4403 // prediction, then we can skip motion mode search.
4404 skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4405 x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4406 }
4407 if (skip_ref) return 1;
4408 }
4409
4410 if (ref_frame[0] == INTRA_FRAME) {
4411 if (mode != DC_PRED) {
4412 // Disable intra modes other than DC_PRED for blocks with low variance
4413 // Threshold for intra skipping based on source variance
4414 // TODO(debargha): Specialize the threshold for super block sizes
4415 const unsigned int skip_intra_var_thresh = 64;
4416 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4417 x->source_variance < skip_intra_var_thresh)
4418 return 1;
4419 }
4420 }
4421
4422 if (skip_motion_mode) return 2;
4423
4424 return 0;
4425 }
4426
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4427 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4428 const MV_REFERENCE_FRAME *ref_frames,
4429 const AV1_COMMON *cm) {
4430 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4431 mbmi->ref_mv_idx = 0;
4432 mbmi->mode = curr_mode;
4433 mbmi->uv_mode = UV_DC_PRED;
4434 mbmi->ref_frame[0] = ref_frames[0];
4435 mbmi->ref_frame[1] = ref_frames[1];
4436 pmi->palette_size[0] = 0;
4437 pmi->palette_size[1] = 0;
4438 mbmi->filter_intra_mode_info.use_filter_intra = 0;
4439 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4440 mbmi->motion_mode = SIMPLE_TRANSLATION;
4441 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4442 set_default_interp_filters(mbmi, cm->features.interp_filter);
4443 }
4444
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4445 static AOM_INLINE void collect_single_states(MACROBLOCK *x,
4446 InterModeSearchState *search_state,
4447 const MB_MODE_INFO *const mbmi) {
4448 int i, j;
4449 const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4450 const PREDICTION_MODE this_mode = mbmi->mode;
4451 const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4452 const int mode_offset = INTER_OFFSET(this_mode);
4453 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4454
4455 // Simple rd
4456 int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4457 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4458 const int64_t rd =
4459 search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4460 if (rd < simple_rd) simple_rd = rd;
4461 }
4462
4463 // Insertion sort of single_state
4464 const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4465 SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4466 i = search_state->single_state_cnt[dir][mode_offset];
4467 for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4468 state_s[j] = state_s[j - 1];
4469 state_s[j] = this_state_s;
4470 search_state->single_state_cnt[dir][mode_offset]++;
4471
4472 // Modelled rd
4473 int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4474 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4475 const int64_t rd =
4476 search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4477 if (rd < modelled_rd) modelled_rd = rd;
4478 }
4479
4480 // Insertion sort of single_state_modelled
4481 const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4482 SingleInterModeState *state_m =
4483 search_state->single_state_modelled[dir][mode_offset];
4484 i = search_state->single_state_modelled_cnt[dir][mode_offset];
4485 for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4486 state_m[j] = state_m[j - 1];
4487 state_m[j] = this_state_m;
4488 search_state->single_state_modelled_cnt[dir][mode_offset]++;
4489 }
4490
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4491 static AOM_INLINE void analyze_single_states(
4492 const AV1_COMP *cpi, InterModeSearchState *search_state) {
4493 const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4494 assert(prune_level >= 1);
4495 int i, j, dir, mode;
4496
4497 for (dir = 0; dir < 2; ++dir) {
4498 int64_t best_rd;
4499 SingleInterModeState(*state)[FWD_REFS];
4500 const int prune_factor = prune_level >= 2 ? 6 : 5;
4501
4502 // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4503 // reference frames for all the modes (NEARESTMV and NEARMV may not
4504 // have same motion vectors). Always keep the best of each mode
4505 // because it might form the best possible combination with other mode.
4506 state = search_state->single_state[dir];
4507 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4508 state[INTER_OFFSET(GLOBALMV)][0].rd);
4509 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4510 for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4511 if (state[mode][i].rd != INT64_MAX &&
4512 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4513 state[mode][i].valid = 0;
4514 }
4515 }
4516 }
4517
4518 state = search_state->single_state_modelled[dir];
4519 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4520 state[INTER_OFFSET(GLOBALMV)][0].rd);
4521 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4522 for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4523 if (state[mode][i].rd != INT64_MAX &&
4524 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4525 state[mode][i].valid = 0;
4526 }
4527 }
4528 }
4529 }
4530
4531 // Ordering by simple rd first, then by modelled rd
4532 for (dir = 0; dir < 2; ++dir) {
4533 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4534 const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4535 const int state_cnt_m =
4536 search_state->single_state_modelled_cnt[dir][mode];
4537 SingleInterModeState *state_s = search_state->single_state[dir][mode];
4538 SingleInterModeState *state_m =
4539 search_state->single_state_modelled[dir][mode];
4540 int count = 0;
4541 const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4542 for (i = 0; i < state_cnt_s; ++i) {
4543 if (state_s[i].rd == INT64_MAX) break;
4544 if (state_s[i].valid) {
4545 search_state->single_rd_order[dir][mode][count++] =
4546 state_s[i].ref_frame;
4547 }
4548 }
4549 if (count >= max_candidates) continue;
4550
4551 for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4552 if (state_m[i].rd == INT64_MAX) break;
4553 if (!state_m[i].valid) continue;
4554 const int ref_frame = state_m[i].ref_frame;
4555 int match = 0;
4556 // Check if existing already
4557 for (j = 0; j < count; ++j) {
4558 if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4559 match = 1;
4560 break;
4561 }
4562 }
4563 if (match) continue;
4564 // Check if this ref_frame is removed in simple rd
4565 int valid = 1;
4566 for (j = 0; j < state_cnt_s; ++j) {
4567 if (ref_frame == state_s[j].ref_frame) {
4568 valid = state_s[j].valid;
4569 break;
4570 }
4571 }
4572 if (valid) {
4573 search_state->single_rd_order[dir][mode][count++] = ref_frame;
4574 }
4575 }
4576 }
4577 }
4578 }
4579
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4580 static int compound_skip_get_candidates(
4581 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4582 const int dir, const PREDICTION_MODE mode) {
4583 const int mode_offset = INTER_OFFSET(mode);
4584 const SingleInterModeState *state =
4585 search_state->single_state[dir][mode_offset];
4586 const SingleInterModeState *state_modelled =
4587 search_state->single_state_modelled[dir][mode_offset];
4588
4589 int max_candidates = 0;
4590 for (int i = 0; i < FWD_REFS; ++i) {
4591 if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4592 max_candidates++;
4593 }
4594
4595 int candidates = max_candidates;
4596 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4597 candidates = AOMMIN(2, max_candidates);
4598 }
4599 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4600 if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4601 state[0].ref_frame == state_modelled[0].ref_frame)
4602 candidates = 1;
4603 if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4604 }
4605
4606 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4607 // Limit the number of candidates to 1 in each direction for compound
4608 // prediction
4609 candidates = AOMMIN(1, candidates);
4610 }
4611 return candidates;
4612 }
4613
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4614 static int compound_skip_by_single_states(
4615 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4616 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4617 const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4618 const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4619 const int mode[2] = { compound_ref0_mode(this_mode),
4620 compound_ref1_mode(this_mode) };
4621 const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4622 const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4623 refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4624 int ref_searched[2] = { 0, 0 };
4625 int ref_mv_match[2] = { 1, 1 };
4626 int i, j;
4627
4628 for (i = 0; i < 2; ++i) {
4629 const SingleInterModeState *state =
4630 search_state->single_state[mode_dir[i]][mode_offset[i]];
4631 const int state_cnt =
4632 search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4633 for (j = 0; j < state_cnt; ++j) {
4634 if (state[j].ref_frame == refs[i]) {
4635 ref_searched[i] = 1;
4636 break;
4637 }
4638 }
4639 }
4640
4641 const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4642 for (i = 0; i < 2; ++i) {
4643 if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4644 continue;
4645 }
4646 const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4647 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4648 int_mv single_mv;
4649 int_mv comp_mv;
4650 get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4651 &x->mbmi_ext);
4652 get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4653 if (single_mv.as_int != comp_mv.as_int) {
4654 ref_mv_match[i] = 0;
4655 break;
4656 }
4657 }
4658 }
4659
4660 for (i = 0; i < 2; ++i) {
4661 if (!ref_searched[i] || !ref_mv_match[i]) continue;
4662 const int candidates =
4663 compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4664 const MV_REFERENCE_FRAME *ref_order =
4665 search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4666 int match = 0;
4667 for (j = 0; j < candidates; ++j) {
4668 if (refs[i] == ref_order[j]) {
4669 match = 1;
4670 break;
4671 }
4672 }
4673 if (!match) return 1;
4674 }
4675
4676 return 0;
4677 }
4678
4679 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4680 static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
4681 const MV_REFERENCE_FRAME *ref_frames,
4682 int *const is_ref_match) {
4683 if (is_inter_block(mbmi)) {
4684 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4685 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4686 if (has_second_ref(mbmi)) {
4687 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4688 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4689 }
4690 }
4691 }
4692
4693 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4694 static INLINE int compound_skip_using_neighbor_refs(
4695 MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4696 const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4697 // Exclude non-extended compound modes from pruning
4698 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4699 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4700 return 0;
4701
4702 if (prune_ext_comp_using_neighbors >= 3) return 1;
4703
4704 int is_ref_match[2] = { 0 }; // 0 - match for forward refs
4705 // 1 - match for backward refs
4706 // Check if ref frames of this block matches with left neighbor.
4707 if (xd->left_available)
4708 match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4709
4710 // Check if ref frames of this block matches with above neighbor.
4711 if (xd->up_available)
4712 match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4713
4714 // Combine ref frame match with neighbors in forward and backward refs.
4715 const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4716
4717 // Pruning based on ref frame match with neighbors.
4718 if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4719 return 1;
4720 }
4721
4722 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4723 static INLINE void update_best_single_mode(InterModeSearchState *search_state,
4724 const PREDICTION_MODE this_mode,
4725 const MV_REFERENCE_FRAME ref_frame,
4726 int64_t this_rd) {
4727 if (this_rd < search_state->best_single_rd[ref_frame]) {
4728 search_state->best_single_rd[ref_frame] = this_rd;
4729 search_state->best_single_mode[ref_frame] = this_mode;
4730 }
4731 }
4732
4733 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4734 static INLINE int skip_compound_using_best_single_mode_ref(
4735 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4736 const PREDICTION_MODE *best_single_mode,
4737 int prune_comp_using_best_single_mode_ref) {
4738 // Exclude non-extended compound modes from pruning
4739 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4740 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4741 return 0;
4742
4743 assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4744 const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4745 // Get ref frame direction corresponding to NEWMV
4746 // 0 - NEWMV corresponding to forward direction
4747 // 1 - NEWMV corresponding to backward direction
4748 const int newmv_dir = comp_mode_ref0 != NEWMV;
4749
4750 // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4751 // have NEWMV as single mode winner.
4752 // Example: For an extended-compound mode,
4753 // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4754 // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4755 // - Avoid pruning this mode, if best single mode corresponding to ref frame
4756 // ALTREF_FRAME is NEWMV
4757 const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4758 if (single_mode == NEWMV) return 0;
4759
4760 // Avoid pruning the compound mode when best single mode is not available
4761 if (prune_comp_using_best_single_mode_ref == 1)
4762 if (single_mode == MB_MODE_COUNT) return 0;
4763 return 1;
4764 }
4765
compare_int64(const void * a,const void * b)4766 static int compare_int64(const void *a, const void *b) {
4767 int64_t a64 = *((int64_t *)a);
4768 int64_t b64 = *((int64_t *)b);
4769 if (a64 < b64) {
4770 return -1;
4771 } else if (a64 == b64) {
4772 return 0;
4773 } else {
4774 return 1;
4775 }
4776 }
4777
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4778 static INLINE void update_search_state(
4779 InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4780 PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4781 const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4782 THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4783 const MACROBLOCKD *xd = &x->e_mbd;
4784 const MB_MODE_INFO *mbmi = xd->mi[0];
4785 const int skip_ctx = av1_get_skip_txfm_context(xd);
4786 const int skip_txfm =
4787 mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4788 const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4789
4790 search_state->best_rd = new_best_rd_stats->rdcost;
4791 search_state->best_mode_index = new_best_mode;
4792 *best_rd_stats_dst = *new_best_rd_stats;
4793 search_state->best_mbmode = *mbmi;
4794 search_state->best_skip2 = skip_txfm;
4795 search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4796 // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4797 // rate_uv because av1_txfm_search process is replaced by rd estimation.
4798 // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4799 // These two values will be updated when av1_txfm_search is called.
4800 if (txfm_search_done) {
4801 search_state->best_rate_y =
4802 new_best_rd_stats_y->rate +
4803 x->mode_costs.skip_txfm_cost[skip_ctx]
4804 [new_best_rd_stats->skip_txfm || skip_txfm];
4805 search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4806 }
4807 search_state->best_y_rdcost = *new_best_rd_stats_y;
4808 memcpy(ctx->blk_skip, txfm_info->blk_skip,
4809 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4810 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4811 }
4812
4813 // Find the best RD for a reference frame (among single reference modes)
4814 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4815 static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4816 assert(ref_frame_rd[0] == INT64_MAX);
4817 int64_t ref_copy[REF_FRAMES - 1];
4818 memcpy(ref_copy, ref_frame_rd + 1,
4819 sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4820 qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4821
4822 int64_t cutoff = ref_copy[0];
4823 // The cut-off is within 10% of the best.
4824 if (cutoff != INT64_MAX) {
4825 assert(cutoff < INT64_MAX / 200);
4826 cutoff = (110 * cutoff) / 100;
4827 }
4828 ref_frame_rd[0] = cutoff;
4829 }
4830
4831 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4832 static INLINE bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4833 MV_REFERENCE_FRAME frame1,
4834 MV_REFERENCE_FRAME frame2) {
4835 assert(frame2 > 0);
4836 return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4837 ref_frame_rd[frame2] <= ref_frame_rd[0];
4838 }
4839
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4840 static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
4841 const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4842 HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4843 PICK_MODE_CONTEXT *const ctx,
4844 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4845 const motion_mode_best_st_candidate *const best_motion_mode_cands,
4846 int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4847 InterModeSearchState *const search_state, int64_t *yrd) {
4848 const AV1_COMMON *const cm = &cpi->common;
4849 const int num_planes = av1_num_planes(cm);
4850 MACROBLOCKD *const xd = &x->e_mbd;
4851 MB_MODE_INFO *const mbmi = xd->mi[0];
4852 InterModesInfo *const inter_modes_info = x->inter_modes_info;
4853 const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4854
4855 for (int cand = 0; cand < num_best_cand; cand++) {
4856 RD_STATS rd_stats;
4857 RD_STATS rd_stats_y;
4858 RD_STATS rd_stats_uv;
4859 av1_init_rd_stats(&rd_stats);
4860 av1_init_rd_stats(&rd_stats_y);
4861 av1_init_rd_stats(&rd_stats_uv);
4862 int rate_mv;
4863
4864 rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4865 args->skip_motion_mode =
4866 best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4867 *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4868 rd_stats.rate =
4869 best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4870
4871 // Continue if the best candidate is compound.
4872 if (!is_inter_singleref_mode(mbmi->mode)) continue;
4873
4874 x->txfm_search_info.skip_txfm = 0;
4875 struct macroblockd_plane *pd = xd->plane;
4876 const BUFFER_SET orig_dst = {
4877 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4878 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4879 };
4880
4881 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4882 // Initialize motion mode to simple translation
4883 // Calculation of switchable rate depends on it.
4884 mbmi->motion_mode = 0;
4885 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4886 for (int i = 0; i < num_planes; i++) {
4887 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4888 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4889 }
4890
4891 int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4892 search_state->best_skip_rd[1] };
4893 int64_t this_yrd = INT64_MAX;
4894 int64_t ret_value = motion_mode_rd(
4895 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4896 search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4897 do_tx_search, inter_modes_info, 1, &this_yrd);
4898
4899 if (ret_value != INT64_MAX) {
4900 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4901 const THR_MODES mode_enum = get_prediction_mode_idx(
4902 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4903 // Collect mode stats for multiwinner mode processing
4904 store_winner_mode_stats(
4905 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4906 mode_enum, NULL, bsize, rd_stats.rdcost,
4907 cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4908 if (rd_stats.rdcost < search_state->best_rd) {
4909 *yrd = this_yrd;
4910 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4911 &rd_stats_uv, mode_enum, x, do_tx_search);
4912 if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4913 }
4914 }
4915 }
4916 }
4917
4918 /*!\cond */
4919 // Arguments for speed feature pruning of inter mode search
4920 typedef struct {
4921 int *skip_motion_mode;
4922 mode_skip_mask_t *mode_skip_mask;
4923 InterModeSearchState *search_state;
4924 int skip_ref_frame_mask;
4925 int reach_first_comp_mode;
4926 int mode_thresh_mul_fact;
4927 int num_single_modes_processed;
4928 int prune_cpd_using_sr_stats_ready;
4929 } InterModeSFArgs;
4930 /*!\endcond */
4931
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args,int is_low_temp_var)4932 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4933 int64_t *ref_frame_rd, int midx,
4934 InterModeSFArgs *args, int is_low_temp_var) {
4935 const SPEED_FEATURES *const sf = &cpi->sf;
4936 MACROBLOCKD *const xd = &x->e_mbd;
4937 // Get the actual prediction mode we are trying in this iteration
4938 const THR_MODES mode_enum = av1_default_mode_order[midx];
4939 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4940 const PREDICTION_MODE this_mode = mode_def->mode;
4941 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4942 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4943 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4944 const int comp_pred = second_ref_frame > INTRA_FRAME;
4945
4946 if (ref_frame == INTRA_FRAME) return 1;
4947
4948 const FRAME_UPDATE_TYPE update_type =
4949 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4950 if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
4951 comp_pred) {
4952 return 1;
4953 }
4954
4955 // This is for real time encoding.
4956 if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
4957 this_mode != NEARESTMV)
4958 return 1;
4959
4960 // Check if this mode should be skipped because it is incompatible with the
4961 // current frame
4962 if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
4963 return 1;
4964 const int ret = inter_mode_search_order_independent_skip(
4965 cpi, x, args->mode_skip_mask, args->search_state,
4966 args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
4967 if (ret == 1) return 1;
4968 *(args->skip_motion_mode) = (ret == 2);
4969
4970 // We've reached the first compound prediction mode, get stats from the
4971 // single reference predictors to help with pruning
4972 if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
4973 args->reach_first_comp_mode == 0) {
4974 analyze_single_states(cpi, args->search_state);
4975 args->reach_first_comp_mode = 1;
4976 }
4977
4978 // Prune aggressively when best mode is skippable.
4979 int mul_fact = args->search_state->best_mode_skippable
4980 ? args->mode_thresh_mul_fact
4981 : (1 << MODE_THRESH_QBITS);
4982 int64_t mode_threshold =
4983 (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
4984 MODE_THRESH_QBITS;
4985
4986 if (args->search_state->best_rd < mode_threshold) return 1;
4987
4988 // Skip this compound mode based on the RD results from the single prediction
4989 // modes
4990 if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
4991 if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
4992 ref_frame, second_ref_frame, x))
4993 return 1;
4994 }
4995
4996 if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
4997 // After we done with single reference modes, find the 2nd best RD
4998 // for a reference frame. Only search compound modes that have a reference
4999 // frame at least as good as the 2nd best.
5000 if (!args->prune_cpd_using_sr_stats_ready &&
5001 args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5002 find_top_ref(ref_frame_rd);
5003 args->prune_cpd_using_sr_stats_ready = 1;
5004 }
5005 if (args->prune_cpd_using_sr_stats_ready &&
5006 !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5007 return 1;
5008 }
5009
5010 // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5011 if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5012 (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5013 return 1;
5014 }
5015
5016 if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5017 if (compound_skip_using_neighbor_refs(
5018 xd, this_mode, ref_frames,
5019 sf->inter_sf.prune_ext_comp_using_neighbors))
5020 return 1;
5021 }
5022
5023 if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5024 if (skip_compound_using_best_single_mode_ref(
5025 this_mode, ref_frames, args->search_state->best_single_mode,
5026 sf->inter_sf.prune_comp_using_best_single_mode_ref))
5027 return 1;
5028 }
5029
5030 if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5031 const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5032 if (skip_nearest_near_mv_using_refmv_weight(x, this_mode, ref_frame_type))
5033 return 1;
5034 }
5035
5036 if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5037 ref_frame == GOLDEN_FRAME && !comp_pred) {
5038 const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5039 if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5040 args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5041 if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5042 return 1;
5043 }
5044 }
5045
5046 return 0;
5047 }
5048
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)5049 static void record_best_compound(REFERENCE_MODE reference_mode,
5050 RD_STATS *rd_stats, int comp_pred, int rdmult,
5051 InterModeSearchState *search_state,
5052 int compmode_cost) {
5053 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5054
5055 if (reference_mode == REFERENCE_MODE_SELECT) {
5056 single_rate = rd_stats->rate - compmode_cost;
5057 hybrid_rate = rd_stats->rate;
5058 } else {
5059 single_rate = rd_stats->rate;
5060 hybrid_rate = rd_stats->rate + compmode_cost;
5061 }
5062
5063 single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5064 hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5065
5066 if (!comp_pred) {
5067 if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5068 search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5069 } else {
5070 if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5071 search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5072 }
5073 if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5074 search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5075 }
5076
5077 // Does a transform search over a list of the best inter mode candidates.
5078 // This is called if the original mode search computed an RD estimate
5079 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)5080 static void tx_search_best_inter_candidates(
5081 AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5082 int64_t best_rd_so_far, BLOCK_SIZE bsize,
5083 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5084 InterModeSearchState *search_state, RD_STATS *rd_cost,
5085 PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5086 AV1_COMMON *const cm = &cpi->common;
5087 MACROBLOCKD *const xd = &x->e_mbd;
5088 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5089 const ModeCosts *mode_costs = &x->mode_costs;
5090 const int num_planes = av1_num_planes(cm);
5091 const int skip_ctx = av1_get_skip_txfm_context(xd);
5092 MB_MODE_INFO *const mbmi = xd->mi[0];
5093 InterModesInfo *inter_modes_info = x->inter_modes_info;
5094 inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5095 search_state->best_rd = best_rd_so_far;
5096 search_state->best_mode_index = THR_INVALID;
5097 // Initialize best mode stats for winner mode processing
5098 x->winner_mode_count = 0;
5099 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5100 NULL, bsize, best_rd_so_far,
5101 cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5102 inter_modes_info->num =
5103 inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5104 ? inter_modes_info->num
5105 : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5106 const int64_t top_est_rd =
5107 inter_modes_info->num > 0
5108 ? inter_modes_info
5109 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5110 : INT64_MAX;
5111 *yrd = INT64_MAX;
5112 int64_t best_rd_in_this_partition = INT64_MAX;
5113 int num_inter_mode_cands = inter_modes_info->num;
5114 int newmv_mode_evaled = 0;
5115 int max_allowed_cands = INT_MAX;
5116 if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5117 // The bound on the no. of inter mode candidates, beyond which the
5118 // candidates are limited if a newmv mode got evaluated, is set as
5119 // max_allowed_cands + 1.
5120 const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5121 assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5122 max_allowed_cands =
5123 num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5124 }
5125
5126 int num_mode_thresh = INT_MAX;
5127 if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5128 // Bound the no. of transform searches per prediction mode beyond a
5129 // threshold.
5130 const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5131 assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5132 num_mode_thresh =
5133 num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5134 }
5135
5136 int num_tx_cands = 0;
5137 int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5138 // Iterate over best inter mode candidates and perform tx search
5139 for (int j = 0; j < num_inter_mode_cands; ++j) {
5140 const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5141 *mbmi = inter_modes_info->mbmi_arr[data_idx];
5142 const PREDICTION_MODE prediction_mode = mbmi->mode;
5143 int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5144 if (curr_est_rd * 0.80 > top_est_rd) break;
5145
5146 if (num_tx_cands > num_mode_thresh) {
5147 if ((prediction_mode != NEARESTMV &&
5148 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5149 (prediction_mode == NEARESTMV &&
5150 num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5151 continue;
5152 }
5153
5154 txfm_info->skip_txfm = 0;
5155 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5156
5157 // Select prediction reference frames.
5158 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5159 for (int i = 0; i < num_planes; i++) {
5160 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5161 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5162 }
5163
5164 bool is_predictor_built = false;
5165
5166 // Initialize RD stats
5167 RD_STATS rd_stats;
5168 RD_STATS rd_stats_y;
5169 RD_STATS rd_stats_uv;
5170 const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5171 int64_t skip_rd = INT64_MAX;
5172 if (cpi->sf.inter_sf.txfm_rd_gate_level) {
5173 // Check if the mode is good enough based on skip RD
5174 int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5175 skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5176 int eval_txfm =
5177 check_txfm_eval(x, bsize, search_state->best_skip_rd[0], skip_rd,
5178 cpi->sf.inter_sf.txfm_rd_gate_level, 0);
5179 if (!eval_txfm) continue;
5180 }
5181
5182 // Build the prediction for this mode
5183 if (!is_predictor_built) {
5184 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5185 av1_num_planes(cm) - 1);
5186 }
5187 if (mbmi->motion_mode == OBMC_CAUSAL) {
5188 av1_build_obmc_inter_predictors_sb(cm, xd);
5189 }
5190
5191 num_tx_cands++;
5192 if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5193 num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5194 int64_t this_yrd = INT64_MAX;
5195 // Do the transform search
5196 if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5197 mode_rate, search_state->best_rd)) {
5198 continue;
5199 } else {
5200 const int y_rate =
5201 rd_stats.skip_txfm
5202 ? mode_costs->skip_txfm_cost[skip_ctx][1]
5203 : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5204 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5205
5206 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5207 inter_mode_data_push(
5208 tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5209 rd_stats_y.rate + rd_stats_uv.rate +
5210 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5211 }
5212 }
5213 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5214 if (rd_stats.rdcost < best_rd_in_this_partition) {
5215 best_rd_in_this_partition = rd_stats.rdcost;
5216 *yrd = this_yrd;
5217 }
5218
5219 const THR_MODES mode_enum = get_prediction_mode_idx(
5220 prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5221
5222 // Collect mode stats for multiwinner mode processing
5223 const int txfm_search_done = 1;
5224 store_winner_mode_stats(
5225 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5226 NULL, bsize, rd_stats.rdcost,
5227 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5228
5229 if (rd_stats.rdcost < search_state->best_rd) {
5230 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5231 &rd_stats_uv, mode_enum, x, txfm_search_done);
5232 search_state->best_skip_rd[0] = skip_rd;
5233 // Limit the total number of modes to be evaluated if the first is valid
5234 // and transform skip or compound
5235 if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5236 if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5237 // Evaluate more candidates at high quantizers where occurrence of
5238 // transform skip is high.
5239 const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5240 const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5241 num_inter_mode_cands =
5242 AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5243 } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5244 const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5245 // Evaluate more candidates at low quantizers where occurrence of
5246 // single reference mode is high.
5247 const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5248 { 10, 7, 5, 3 } };
5249 const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5250 num_inter_mode_cands = AOMMIN(
5251 max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5252 }
5253 }
5254 }
5255 // If the number of candidates evaluated exceeds max_allowed_cands, break if
5256 // a newmv mode was evaluated already.
5257 if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5258 }
5259 }
5260
5261 // Indicates number of winner simple translation modes to be used
5262 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5263
5264 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5265 // speed feature. This list consists of modes that have only searched
5266 // SIMPLE_TRANSLATION. The final list will be used to search other motion
5267 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)5268 static void handle_winner_cand(
5269 MB_MODE_INFO *const mbmi,
5270 motion_mode_best_st_candidate *best_motion_mode_cands,
5271 int max_winner_motion_mode_cand, int64_t this_rd,
5272 motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5273 // Number of current motion mode candidates in list
5274 const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5275 int valid_motion_mode_cand_loc = num_motion_mode_cand;
5276
5277 // find the best location to insert new motion mode candidate
5278 for (int j = 0; j < num_motion_mode_cand; j++) {
5279 if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5280 valid_motion_mode_cand_loc = j;
5281 break;
5282 }
5283 }
5284
5285 // Insert motion mode if location is found
5286 if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5287 if (num_motion_mode_cand > 0 &&
5288 valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5289 memmove(
5290 &best_motion_mode_cands
5291 ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5292 &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5293 (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5294 valid_motion_mode_cand_loc) *
5295 sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5296 motion_mode_cand->mbmi = *mbmi;
5297 motion_mode_cand->rd_cost = this_rd;
5298 motion_mode_cand->skip_motion_mode = skip_motion_mode;
5299 best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5300 *motion_mode_cand;
5301 best_motion_mode_cands->num_motion_mode_cand =
5302 AOMMIN(max_winner_motion_mode_cand,
5303 best_motion_mode_cands->num_motion_mode_cand + 1);
5304 }
5305 }
5306
5307 /*!\brief Search intra modes in interframes
5308 *
5309 * \ingroup intra_mode_search
5310 *
5311 * This function searches for the best intra mode when the current frame is an
5312 * interframe. This function however does *not* handle luma palette mode.
5313 * Palette mode is currently handled by \ref av1_search_palette_mode.
5314 *
5315 * This function will first iterate through the luma mode candidates to find the
5316 * best luma intra mode. Once the best luma mode it's found, it will then search
5317 * for the best chroma mode. Because palette mode is currently not handled by
5318 * here, a cache of uv mode is stored in
5319 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5320 * av1_search_palette_mode.
5321 *
5322 * \param[in,out] search_state Struct keep track of the prediction mode
5323 * search state in interframe.
5324 *
5325 * \param[in] cpi Top-level encoder structure.
5326 * \param[in,out] x Pointer to struct holding all the data for
5327 * the current prediction block.
5328 * \param[out] rd_cost Stores the best rd_cost among all the
5329 * prediction modes searched.
5330 * \param[in] bsize Current block size.
5331 * \param[in,out] ctx Structure to hold the number of 4x4 blks to
5332 * copy the tx_type and txfm_skip arrays.
5333 * for only the Y plane.
5334 * \param[in] sf_args Stores the list of intra mode candidates
5335 * to be searched.
5336 * \param[in] intra_ref_frame_cost The entropy cost for signaling that the
5337 * current ref frame is an intra frame.
5338 * \param[in] yrd_threshold The rdcost threshold for luma intra mode to
5339 * terminate chroma intra mode search.
5340 *
5341 * \remark If a new best mode is found, search_state and rd_costs are updated
5342 * correspondingly. While x is also modified, it is only used as a temporary
5343 * buffer, and the final decisions are stored in search_state.
5344 */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,const InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5345 static AOM_INLINE void search_intra_modes_in_interframe(
5346 InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5347 RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5348 const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5349 int64_t yrd_threshold) {
5350 const AV1_COMMON *const cm = &cpi->common;
5351 const SPEED_FEATURES *const sf = &cpi->sf;
5352 const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5353 MACROBLOCKD *const xd = &x->e_mbd;
5354 MB_MODE_INFO *const mbmi = xd->mi[0];
5355 IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5356
5357 int is_best_y_mode_intra = 0;
5358 RD_STATS best_intra_rd_stats_y;
5359 int64_t best_rd_y = INT64_MAX;
5360 int best_mode_cost_y = -1;
5361 MB_MODE_INFO best_mbmi = *xd->mi[0];
5362 THR_MODES best_mode_enum = THR_INVALID;
5363 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5364 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5365 const int num_4x4 = bsize_to_num_blk(bsize);
5366
5367 // Performs luma search
5368 int64_t best_model_rd = INT64_MAX;
5369 int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5370 for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5371 top_intra_model_rd[i] = INT64_MAX;
5372 }
5373 for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5374 if (sf->intra_sf.skip_intra_in_interframe &&
5375 search_state->intra_search_state.skip_intra_modes)
5376 break;
5377 set_y_mode_and_delta_angle(
5378 mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5379 assert(mbmi->mode < INTRA_MODE_END);
5380
5381 // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5382 if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5383 continue;
5384
5385 const THR_MODES mode_enum =
5386 get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5387 if ((!intra_mode_cfg->enable_smooth_intra ||
5388 cpi->sf.intra_sf.disable_smooth_intra) &&
5389 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5390 mbmi->mode == SMOOTH_V_PRED))
5391 continue;
5392 if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5393 continue;
5394 if (av1_is_directional_mode(mbmi->mode) &&
5395 !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5396 mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5397 continue;
5398 const PREDICTION_MODE this_mode = mbmi->mode;
5399
5400 assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5401 assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5402 init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5403 x->txfm_search_info.skip_txfm = 0;
5404
5405 if (this_mode != DC_PRED) {
5406 // Only search the oblique modes if the best so far is
5407 // one of the neighboring directional modes
5408 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5409 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5410 if (search_state->best_mode_index != THR_INVALID &&
5411 search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5412 continue;
5413 }
5414 if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5415 if (conditional_skipintra(
5416 this_mode, search_state->intra_search_state.best_intra_mode))
5417 continue;
5418 }
5419 }
5420
5421 RD_STATS intra_rd_stats_y;
5422 int mode_cost_y;
5423 int64_t intra_rd_y = INT64_MAX;
5424 const int is_luma_result_valid = av1_handle_intra_y_mode(
5425 intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5426 &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5427 &best_model_rd, top_intra_model_rd);
5428 if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5429 is_best_y_mode_intra = 1;
5430 if (intra_rd_y < best_rd_y) {
5431 best_intra_rd_stats_y = intra_rd_stats_y;
5432 best_mode_cost_y = mode_cost_y;
5433 best_rd_y = intra_rd_y;
5434 best_mbmi = *mbmi;
5435 best_mode_enum = mode_enum;
5436 memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5437 sizeof(best_blk_skip[0]) * num_4x4);
5438 av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5439 }
5440 }
5441 }
5442
5443 if (!is_best_y_mode_intra) {
5444 return;
5445 }
5446
5447 assert(best_rd_y < INT64_MAX);
5448
5449 // Restores the best luma mode
5450 *mbmi = best_mbmi;
5451 memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5452 sizeof(best_blk_skip[0]) * num_4x4);
5453 av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5454
5455 // Performs chroma search
5456 RD_STATS intra_rd_stats, intra_rd_stats_uv;
5457 av1_init_rd_stats(&intra_rd_stats);
5458 av1_init_rd_stats(&intra_rd_stats_uv);
5459 const int num_planes = av1_num_planes(cm);
5460 if (num_planes > 1) {
5461 const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5462 intra_search_state, cpi, x, bsize, &intra_rd_stats,
5463 &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5464
5465 if (!intra_uv_mode_valid) {
5466 return;
5467 }
5468 }
5469
5470 // Merge the luma and chroma rd stats
5471 assert(best_mode_cost_y >= 0);
5472 intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5473 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5474 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5475 // in the tokenonly rate, but for intra blocks, tx_size is always coded
5476 // (prediction granularity), so we account for it in the full rate,
5477 // not the tokenonly rate.
5478 best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5479 }
5480
5481 const ModeCosts *mode_costs = &x->mode_costs;
5482 const PREDICTION_MODE mode = mbmi->mode;
5483 if (num_planes > 1 && xd->is_chroma_ref) {
5484 const int uv_mode_cost =
5485 mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5486 intra_rd_stats.rate +=
5487 intra_rd_stats_uv.rate +
5488 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5489 }
5490
5491 // Intra block is always coded as non-skip
5492 intra_rd_stats.skip_txfm = 0;
5493 intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5494 // Add in the cost of the no skip flag.
5495 const int skip_ctx = av1_get_skip_txfm_context(xd);
5496 intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5497 // Calculate the final RD estimate for this mode.
5498 const int64_t this_rd =
5499 RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5500 // Keep record of best intra rd
5501 if (this_rd < search_state->best_intra_rd) {
5502 search_state->best_intra_rd = this_rd;
5503 intra_search_state->best_intra_mode = mode;
5504 }
5505
5506 for (int i = 0; i < REFERENCE_MODES; ++i) {
5507 search_state->best_pred_rd[i] =
5508 AOMMIN(search_state->best_pred_rd[i], this_rd);
5509 }
5510
5511 intra_rd_stats.rdcost = this_rd;
5512
5513 // Collect mode stats for multiwinner mode processing
5514 const int txfm_search_done = 1;
5515 store_winner_mode_stats(
5516 &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5517 &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5518 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5519 if (intra_rd_stats.rdcost < search_state->best_rd) {
5520 update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5521 &best_intra_rd_stats_y, &intra_rd_stats_uv,
5522 best_mode_enum, x, txfm_search_done);
5523 }
5524 }
5525
5526 #if !CONFIG_REALTIME_ONLY
5527 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5528 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5529 static AOM_INLINE void calculate_cost_from_tpl_data(
5530 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
5531 int mi_col, int64_t *inter_cost, int64_t *intra_cost) {
5532 const AV1_COMMON *const cm = &cpi->common;
5533 // Only consider full SB.
5534 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5535 const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5536 const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5537 (block_size_high[sb_size] / tpl_bsize_1d);
5538 SuperBlockEnc *sb_enc = &x->sb_enc;
5539 if (sb_enc->tpl_data_count == len) {
5540 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5541 const int tpl_stride = sb_enc->tpl_stride;
5542 const int tplw = mi_size_wide[tpl_bsize];
5543 const int tplh = mi_size_high[tpl_bsize];
5544 const int nw = mi_size_wide[bsize] / tplw;
5545 const int nh = mi_size_high[bsize] / tplh;
5546 if (nw >= 1 && nh >= 1) {
5547 const int of_h = mi_row % mi_size_high[sb_size];
5548 const int of_w = mi_col % mi_size_wide[sb_size];
5549 const int start = of_h / tplh * tpl_stride + of_w / tplw;
5550
5551 for (int k = 0; k < nh; k++) {
5552 for (int l = 0; l < nw; l++) {
5553 *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5554 *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5555 }
5556 }
5557 *inter_cost /= nw * nh;
5558 *intra_cost /= nw * nh;
5559 }
5560 }
5561 }
5562 #endif // !CONFIG_REALTIME_ONLY
5563
5564 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5565 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,const SPEED_FEATURES * const sf,int64_t inter_cost,int64_t intra_cost)5566 static AOM_INLINE void skip_intra_modes_in_interframe(
5567 AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5568 InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5569 int64_t inter_cost, int64_t intra_cost) {
5570 MACROBLOCKD *const xd = &x->e_mbd;
5571 const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5572 if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5573 bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5574 const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5575 const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5576 if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5577 x->source_variance > 128) {
5578 search_state->intra_search_state.skip_intra_modes = 1;
5579 return;
5580 }
5581 }
5582
5583 const unsigned int src_var_thresh_intra_skip = 1;
5584 const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5585 if (!(skip_intra_in_interframe &&
5586 (x->source_variance > src_var_thresh_intra_skip)))
5587 return;
5588
5589 // Prune intra search based on best inter mode being transfrom skip.
5590 if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5591 const int qindex_thresh[2] = { 200, MAXQ };
5592 const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5593 if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5594 (x->qindex <= qindex_thresh[ind])) {
5595 search_state->intra_search_state.skip_intra_modes = 1;
5596 return;
5597 } else if ((skip_intra_in_interframe >= 4) &&
5598 (inter_cost < 0 || intra_cost < 0)) {
5599 search_state->intra_search_state.skip_intra_modes = 1;
5600 return;
5601 }
5602 }
5603 // Use ML model to prune intra search.
5604 if (inter_cost >= 0 && intra_cost >= 0) {
5605 const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5606 ? &av1_intrap_nn_config
5607 : &av1_intrap_hd_nn_config;
5608 float nn_features[6];
5609 float scores[2] = { 0.0f };
5610
5611 nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5612 nn_features[1] = (float)mi_size_wide_log2[bsize];
5613 nn_features[2] = (float)mi_size_high_log2[bsize];
5614 nn_features[3] = (float)intra_cost;
5615 nn_features[4] = (float)inter_cost;
5616 const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5617 const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5618 nn_features[5] = (float)(ac_q_max / ac_q);
5619
5620 av1_nn_predict(nn_features, nn_config, 1, scores);
5621
5622 // For two parameters, the max prob returned from av1_nn_softmax equals
5623 // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5624 // calling of av1_nn_softmax.
5625 const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5626 assert(skip_intra_in_interframe <= 5);
5627 if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5628 search_state->intra_search_state.skip_intra_modes = 1;
5629 }
5630 }
5631 }
5632
get_block_temp_var(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)5633 static AOM_INLINE int get_block_temp_var(const AV1_COMP *cpi,
5634 const MACROBLOCK *x,
5635 BLOCK_SIZE bsize) {
5636 const AV1_COMMON *const cm = &cpi->common;
5637 const SPEED_FEATURES *const sf = &cpi->sf;
5638
5639 if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5640 !sf->rt_sf.short_circuit_low_temp_var ||
5641 !sf->rt_sf.prune_inter_modes_using_temp_var) {
5642 return 0;
5643 }
5644
5645 const int mi_row = x->e_mbd.mi_row;
5646 const int mi_col = x->e_mbd.mi_col;
5647 int is_low_temp_var = 0;
5648
5649 if (cm->seq_params->sb_size == BLOCK_64X64)
5650 is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5651 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5652 else
5653 is_low_temp_var = av1_get_force_skip_low_temp_var(
5654 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5655
5656 return is_low_temp_var;
5657 }
5658
5659 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5660 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5661 struct macroblock *x, struct RD_STATS *rd_cost,
5662 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5663 int64_t best_rd_so_far) {
5664 AV1_COMMON *const cm = &cpi->common;
5665 const FeatureFlags *const features = &cm->features;
5666 const int num_planes = av1_num_planes(cm);
5667 const SPEED_FEATURES *const sf = &cpi->sf;
5668 MACROBLOCKD *const xd = &x->e_mbd;
5669 MB_MODE_INFO *const mbmi = xd->mi[0];
5670 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5671 int i;
5672 const ModeCosts *mode_costs = &x->mode_costs;
5673 const int *comp_inter_cost =
5674 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5675
5676 InterModeSearchState search_state;
5677 init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5678 INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5679 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5680 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5681 };
5682 HandleInterModeArgs args = { { NULL },
5683 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5684 { NULL },
5685 { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5686 MAX_SB_SIZE >> 1 },
5687 NULL,
5688 NULL,
5689 NULL,
5690 search_state.modelled_rd,
5691 INT_MAX,
5692 INT_MAX,
5693 search_state.simple_rd,
5694 0,
5695 interintra_modes,
5696 { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5697 { { 0, 0 } },
5698 0,
5699 0,
5700 -1,
5701 -1,
5702 -1,
5703 { 0 },
5704 { 0 },
5705 UINT_MAX };
5706 // Currently, is_low_temp_var is used in real time encoding.
5707 const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5708
5709 for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5710 // Indicates the appropriate number of simple translation winner modes for
5711 // exhaustive motion mode evaluation
5712 const int max_winner_motion_mode_cand =
5713 num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
5714 assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5715 motion_mode_candidate motion_mode_cand;
5716 motion_mode_best_st_candidate best_motion_mode_cands;
5717 // Initializing the number of motion mode candidates to zero.
5718 best_motion_mode_cands.num_motion_mode_cand = 0;
5719 for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5720 best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5721
5722 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5723
5724 av1_invalid_rd_stats(rd_cost);
5725
5726 for (i = 0; i < REF_FRAMES; ++i) {
5727 x->warp_sample_info[i].num = -1;
5728 }
5729
5730 // Ref frames that are selected by square partition blocks.
5731 int picked_ref_frames_mask = 0;
5732 if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
5733 mbmi->partition != PARTITION_NONE) {
5734 // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5735 // partition blocks. prune_ref_frame_for_rect_partitions >=2
5736 // implies prune for vert, horiz and extended partition blocks.
5737 if ((mbmi->partition != PARTITION_VERT &&
5738 mbmi->partition != PARTITION_HORZ) ||
5739 sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5740 picked_ref_frames_mask =
5741 fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5742 }
5743 }
5744
5745 #if CONFIG_COLLECT_COMPONENT_TIMING
5746 start_timing(cpi, set_params_rd_pick_inter_mode_time);
5747 #endif
5748 // Skip ref frames that never selected by square blocks.
5749 const int skip_ref_frame_mask =
5750 picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5751 mode_skip_mask_t mode_skip_mask;
5752 unsigned int ref_costs_single[REF_FRAMES];
5753 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5754 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5755 // init params, set frame modes, speed features
5756 set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5757 skip_ref_frame_mask, ref_costs_single,
5758 ref_costs_comp, yv12_mb);
5759 #if CONFIG_COLLECT_COMPONENT_TIMING
5760 end_timing(cpi, set_params_rd_pick_inter_mode_time);
5761 #endif
5762
5763 int64_t best_est_rd = INT64_MAX;
5764 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5765 // If do_tx_search is 0, only estimated RD should be computed.
5766 // If do_tx_search is 1, all modes have TX search performed.
5767 const int do_tx_search =
5768 !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5769 (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
5770 num_pels_log2_lookup[bsize] > 8));
5771 InterModesInfo *inter_modes_info = x->inter_modes_info;
5772 inter_modes_info->num = 0;
5773
5774 // Temporary buffers used by handle_inter_mode().
5775 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5776
5777 // The best RD found for the reference frame, among single reference modes.
5778 // Note that the 0-th element will contain a cut-off that is later used
5779 // to determine if we should skip a compound mode.
5780 int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5781 INT64_MAX, INT64_MAX, INT64_MAX,
5782 INT64_MAX, INT64_MAX };
5783
5784 // Prepared stats used later to check if we could skip intra mode eval.
5785 int64_t inter_cost = -1;
5786 int64_t intra_cost = -1;
5787 // Need to tweak the threshold for hdres speed 0 & 1.
5788 const int mi_row = xd->mi_row;
5789 const int mi_col = xd->mi_col;
5790
5791 // Obtain the relevant tpl stats for pruning inter modes
5792 PruneInfoFromTpl inter_cost_info_from_tpl;
5793 #if !CONFIG_REALTIME_ONLY
5794 if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
5795 // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5796 // prune_ref_by_selective_ref_frame()
5797 // x->tpl_keep_ref_frame[id] = 0 => ref frame can be pruned in
5798 // prune_ref_by_selective_ref_frame()
5799 // Populating valid_refs[idx] = 1 ensures that
5800 // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5801 // pruned ref frame.
5802 int valid_refs[INTER_REFS_PER_FRAME];
5803 for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5804 const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5805 valid_refs[frame - 1] =
5806 x->tpl_keep_ref_frame[frame] ||
5807 !prune_ref_by_selective_ref_frame(
5808 cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5809 }
5810 av1_zero(inter_cost_info_from_tpl);
5811 get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5812 &inter_cost_info_from_tpl);
5813 }
5814
5815 const int do_pruning =
5816 (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5817 if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5818 cpi->oxcf.algo_cfg.enable_tpl_model)
5819 calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5820 &intra_cost);
5821 #endif // !CONFIG_REALTIME_ONLY
5822
5823 // Initialize best mode stats for winner mode processing.
5824 const int max_winner_mode_count =
5825 winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
5826 zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
5827 x->winner_mode_count = 0;
5828 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5829 NULL, bsize, best_rd_so_far,
5830 sf->winner_mode_sf.multi_winner_mode_type, 0);
5831
5832 int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5833 if (sf->inter_sf.prune_inter_modes_if_skippable) {
5834 // Higher multiplication factor values for lower quantizers.
5835 mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5836 }
5837
5838 // Initialize arguments for mode loop speed features
5839 InterModeSFArgs sf_args = { &args.skip_motion_mode,
5840 &mode_skip_mask,
5841 &search_state,
5842 skip_ref_frame_mask,
5843 0,
5844 mode_thresh_mul_fact,
5845 0,
5846 0 };
5847 int64_t best_inter_yrd = INT64_MAX;
5848
5849 // This is the main loop of this function. It loops over all possible inter
5850 // modes and calls handle_inter_mode() to compute the RD for each.
5851 // Here midx is just an iterator index that should not be used by itself
5852 // except to keep track of the number of modes searched. It should be used
5853 // with av1_default_mode_order to get the enum that defines the mode, which
5854 // can be used with av1_mode_defs to get the prediction mode and the ref
5855 // frames.
5856 // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5857 // good speedup for real time case. If we decide to use compound mode in real
5858 // time, maybe we can modify av1_default_mode_order table.
5859 THR_MODES mode_start = THR_INTER_MODE_START;
5860 THR_MODES mode_end = THR_INTER_MODE_END;
5861 const CurrentFrame *const current_frame = &cm->current_frame;
5862 if (current_frame->reference_mode == SINGLE_REFERENCE) {
5863 mode_start = SINGLE_REF_MODE_START;
5864 mode_end = SINGLE_REF_MODE_END;
5865 }
5866
5867 for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5868 // Get the actual prediction mode we are trying in this iteration
5869 const THR_MODES mode_enum = av1_default_mode_order[midx];
5870 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5871 const PREDICTION_MODE this_mode = mode_def->mode;
5872 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5873
5874 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5875 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5876 const int is_single_pred =
5877 ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5878 const int comp_pred = second_ref_frame > INTRA_FRAME;
5879
5880 init_mbmi(mbmi, this_mode, ref_frames, cm);
5881
5882 txfm_info->skip_txfm = 0;
5883 sf_args.num_single_modes_processed += is_single_pred;
5884 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5885 #if CONFIG_COLLECT_COMPONENT_TIMING
5886 start_timing(cpi, skip_inter_mode_time);
5887 #endif
5888 // Apply speed features to decide if this inter mode can be skipped
5889 const int is_skip_inter_mode = skip_inter_mode(
5890 cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5891 #if CONFIG_COLLECT_COMPONENT_TIMING
5892 end_timing(cpi, skip_inter_mode_time);
5893 #endif
5894 if (is_skip_inter_mode) continue;
5895
5896 // Select prediction reference frames.
5897 for (i = 0; i < num_planes; i++) {
5898 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5899 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5900 }
5901
5902 mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5903 mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5904 mbmi->filter_intra_mode_info.use_filter_intra = 0;
5905 mbmi->ref_mv_idx = 0;
5906
5907 const int64_t ref_best_rd = search_state.best_rd;
5908 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5909 av1_init_rd_stats(&rd_stats);
5910
5911 const int ref_frame_cost = comp_pred
5912 ? ref_costs_comp[ref_frame][second_ref_frame]
5913 : ref_costs_single[ref_frame];
5914 const int compmode_cost =
5915 is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
5916 const int real_compmode_cost =
5917 cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
5918 ? compmode_cost
5919 : 0;
5920 // Point to variables that are maintained between loop iterations
5921 args.single_newmv = search_state.single_newmv;
5922 args.single_newmv_rate = search_state.single_newmv_rate;
5923 args.single_newmv_valid = search_state.single_newmv_valid;
5924 args.single_comp_cost = real_compmode_cost;
5925 args.ref_frame_cost = ref_frame_cost;
5926 args.best_pred_sse = search_state.best_pred_sse;
5927
5928 int64_t skip_rd[2] = { search_state.best_skip_rd[0],
5929 search_state.best_skip_rd[1] };
5930 int64_t this_yrd = INT64_MAX;
5931 #if CONFIG_COLLECT_COMPONENT_TIMING
5932 start_timing(cpi, handle_inter_mode_time);
5933 #endif
5934 int64_t this_rd = handle_inter_mode(
5935 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
5936 ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
5937 inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
5938 &this_yrd);
5939 #if CONFIG_COLLECT_COMPONENT_TIMING
5940 end_timing(cpi, handle_inter_mode_time);
5941 #endif
5942 if (current_frame->reference_mode != SINGLE_REFERENCE) {
5943 if (sf->inter_sf.prune_comp_search_by_single_result > 0 &&
5944 is_inter_singleref_mode(this_mode)) {
5945 collect_single_states(x, &search_state, mbmi);
5946 }
5947
5948 if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
5949 is_inter_singleref_mode(this_mode))
5950 update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
5951 }
5952
5953 if (this_rd == INT64_MAX) continue;
5954
5955 if (mbmi->skip_txfm) {
5956 rd_stats_y.rate = 0;
5957 rd_stats_uv.rate = 0;
5958 }
5959
5960 if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
5961 this_rd < ref_frame_rd[ref_frame]) {
5962 ref_frame_rd[ref_frame] = this_rd;
5963 }
5964
5965 // Did this mode help, i.e., is it the new best mode
5966 if (this_rd < search_state.best_rd) {
5967 assert(IMPLIES(comp_pred,
5968 cm->current_frame.reference_mode != SINGLE_REFERENCE));
5969 search_state.best_pred_sse = x->pred_sse[ref_frame];
5970 best_inter_yrd = this_yrd;
5971 update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5972 &rd_stats_uv, mode_enum, x, do_tx_search);
5973 if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
5974 search_state.best_skip_rd[1] = skip_rd[1];
5975 }
5976 if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
5977 // Add this mode to motion mode candidate list for motion mode search
5978 // if using motion_mode_for_winner_cand speed feature
5979 handle_winner_cand(mbmi, &best_motion_mode_cands,
5980 max_winner_motion_mode_cand, this_rd,
5981 &motion_mode_cand, args.skip_motion_mode);
5982 }
5983
5984 /* keep record of best compound/single-only prediction */
5985 record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
5986 x->rdmult, &search_state, compmode_cost);
5987 }
5988
5989 #if CONFIG_COLLECT_COMPONENT_TIMING
5990 start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5991 #endif
5992 if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
5993 // For the single ref winner candidates, evaluate other motion modes (non
5994 // simple translation).
5995 evaluate_motion_mode_for_winner_candidates(
5996 cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
5997 &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
5998 &search_state, &best_inter_yrd);
5999 }
6000 #if CONFIG_COLLECT_COMPONENT_TIMING
6001 end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6002 #endif
6003
6004 #if CONFIG_COLLECT_COMPONENT_TIMING
6005 start_timing(cpi, do_tx_search_time);
6006 #endif
6007 if (do_tx_search != 1) {
6008 // A full tx search has not yet been done, do tx search for
6009 // top mode candidates
6010 tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6011 yv12_mb, mi_row, mi_col, &search_state,
6012 rd_cost, ctx, &best_inter_yrd);
6013 }
6014 #if CONFIG_COLLECT_COMPONENT_TIMING
6015 end_timing(cpi, do_tx_search_time);
6016 #endif
6017
6018 #if CONFIG_COLLECT_COMPONENT_TIMING
6019 start_timing(cpi, handle_intra_mode_time);
6020 #endif
6021 // Gate intra mode evaluation if best of inter is skip except when source
6022 // variance is extremely low and also based on max intra bsize.
6023 skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6024 intra_cost);
6025
6026 const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6027 search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6028 &sf_args, intra_ref_frame_cost,
6029 best_inter_yrd);
6030 #if CONFIG_COLLECT_COMPONENT_TIMING
6031 end_timing(cpi, handle_intra_mode_time);
6032 #endif
6033
6034 #if CONFIG_COLLECT_COMPONENT_TIMING
6035 start_timing(cpi, refine_winner_mode_tx_time);
6036 #endif
6037 int winner_mode_count =
6038 sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6039 // In effect only when fast tx search speed features are enabled.
6040 refine_winner_mode_tx(
6041 cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6042 &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6043 search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6044 #if CONFIG_COLLECT_COMPONENT_TIMING
6045 end_timing(cpi, refine_winner_mode_tx_time);
6046 #endif
6047
6048 // Initialize default mode evaluation params
6049 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6050
6051 // Only try palette mode when the best mode so far is an intra mode.
6052 const int try_palette =
6053 cpi->oxcf.tool_cfg.enable_palette &&
6054 av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6055 !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6056 RD_STATS this_rd_cost;
6057 int this_skippable = 0;
6058 if (try_palette) {
6059 #if CONFIG_COLLECT_COMPONENT_TIMING
6060 start_timing(cpi, av1_search_palette_mode_time);
6061 #endif
6062 this_skippable = av1_search_palette_mode(
6063 &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6064 ctx, &this_rd_cost, search_state.best_rd);
6065 #if CONFIG_COLLECT_COMPONENT_TIMING
6066 end_timing(cpi, av1_search_palette_mode_time);
6067 #endif
6068 if (this_rd_cost.rdcost < search_state.best_rd) {
6069 search_state.best_mode_index = THR_DC;
6070 mbmi->mv[0].as_int = 0;
6071 rd_cost->rate = this_rd_cost.rate;
6072 rd_cost->dist = this_rd_cost.dist;
6073 rd_cost->rdcost = this_rd_cost.rdcost;
6074 search_state.best_rd = rd_cost->rdcost;
6075 search_state.best_mbmode = *mbmi;
6076 search_state.best_skip2 = 0;
6077 search_state.best_mode_skippable = this_skippable;
6078 memcpy(ctx->blk_skip, txfm_info->blk_skip,
6079 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6080 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6081 }
6082 }
6083
6084 search_state.best_mbmode.skip_mode = 0;
6085 if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6086 is_comp_ref_allowed(bsize)) {
6087 const struct segmentation *const seg = &cm->seg;
6088 unsigned char segment_id = mbmi->segment_id;
6089 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6090 rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6091 }
6092 }
6093
6094 // Make sure that the ref_mv_idx is only nonzero when we're
6095 // using a mode which can support ref_mv_idx
6096 if (search_state.best_mbmode.ref_mv_idx != 0 &&
6097 !(search_state.best_mbmode.mode == NEWMV ||
6098 search_state.best_mbmode.mode == NEW_NEWMV ||
6099 have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6100 search_state.best_mbmode.ref_mv_idx = 0;
6101 }
6102
6103 if (search_state.best_mode_index == THR_INVALID ||
6104 search_state.best_rd >= best_rd_so_far) {
6105 rd_cost->rate = INT_MAX;
6106 rd_cost->rdcost = INT64_MAX;
6107 return;
6108 }
6109
6110 const InterpFilter interp_filter = features->interp_filter;
6111 assert((interp_filter == SWITCHABLE) ||
6112 (interp_filter ==
6113 search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6114 !is_inter_block(&search_state.best_mbmode));
6115 assert((interp_filter == SWITCHABLE) ||
6116 (interp_filter ==
6117 search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6118 !is_inter_block(&search_state.best_mbmode));
6119
6120 if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6121 av1_update_rd_thresh_fact(
6122 cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6123 search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6124 }
6125
6126 // macroblock modes
6127 *mbmi = search_state.best_mbmode;
6128 txfm_info->skip_txfm |= search_state.best_skip2;
6129
6130 // Note: this section is needed since the mode may have been forced to
6131 // GLOBALMV by the all-zero mode handling of ref-mv.
6132 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6133 // Correct the interp filters for GLOBALMV
6134 if (is_nontrans_global_motion(xd, xd->mi[0])) {
6135 int_interpfilters filters =
6136 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6137 assert(mbmi->interp_filters.as_int == filters.as_int);
6138 (void)filters;
6139 }
6140 }
6141
6142 txfm_info->skip_txfm |= search_state.best_mode_skippable;
6143
6144 assert(search_state.best_mode_index != THR_INVALID);
6145
6146 #if CONFIG_INTERNAL_STATS
6147 store_coding_context(x, ctx, search_state.best_mode_index,
6148 search_state.best_mode_skippable);
6149 #else
6150 store_coding_context(x, ctx, search_state.best_mode_skippable);
6151 #endif // CONFIG_INTERNAL_STATS
6152
6153 if (mbmi->palette_mode_info.palette_size[1] > 0) {
6154 assert(try_palette);
6155 av1_restore_uv_color_map(cpi, x);
6156 }
6157 }
6158
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)6159 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6160 TileDataEnc *tile_data, MACROBLOCK *x,
6161 int mi_row, int mi_col,
6162 RD_STATS *rd_cost, BLOCK_SIZE bsize,
6163 PICK_MODE_CONTEXT *ctx,
6164 int64_t best_rd_so_far) {
6165 const AV1_COMMON *const cm = &cpi->common;
6166 const FeatureFlags *const features = &cm->features;
6167 MACROBLOCKD *const xd = &x->e_mbd;
6168 MB_MODE_INFO *const mbmi = xd->mi[0];
6169 unsigned char segment_id = mbmi->segment_id;
6170 const int comp_pred = 0;
6171 int i;
6172 unsigned int ref_costs_single[REF_FRAMES];
6173 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6174 const ModeCosts *mode_costs = &x->mode_costs;
6175 const int *comp_inter_cost =
6176 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6177 InterpFilter best_filter = SWITCHABLE;
6178 int64_t this_rd = INT64_MAX;
6179 int rate2 = 0;
6180 const int64_t distortion2 = 0;
6181 (void)mi_row;
6182 (void)mi_col;
6183 (void)tile_data;
6184
6185 av1_collect_neighbors_ref_counts(xd);
6186
6187 estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6188 ref_costs_comp);
6189
6190 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6191 for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6192
6193 rd_cost->rate = INT_MAX;
6194
6195 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6196
6197 mbmi->palette_mode_info.palette_size[0] = 0;
6198 mbmi->palette_mode_info.palette_size[1] = 0;
6199 mbmi->filter_intra_mode_info.use_filter_intra = 0;
6200 mbmi->mode = GLOBALMV;
6201 mbmi->motion_mode = SIMPLE_TRANSLATION;
6202 mbmi->uv_mode = UV_DC_PRED;
6203 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6204 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6205 else
6206 mbmi->ref_frame[0] = LAST_FRAME;
6207 mbmi->ref_frame[1] = NONE_FRAME;
6208 mbmi->mv[0].as_int =
6209 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6210 features->allow_high_precision_mv, bsize, mi_col,
6211 mi_row, features->cur_frame_force_integer_mv)
6212 .as_int;
6213 mbmi->tx_size = max_txsize_lookup[bsize];
6214 x->txfm_search_info.skip_txfm = 1;
6215
6216 mbmi->ref_mv_idx = 0;
6217
6218 mbmi->motion_mode = SIMPLE_TRANSLATION;
6219 av1_count_overlappable_neighbors(cm, xd);
6220 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6221 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6222 mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6223 // Select the samples according to motion vector difference
6224 if (mbmi->num_proj_ref > 1) {
6225 mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6226 mbmi->num_proj_ref, bsize);
6227 }
6228 }
6229
6230 const InterpFilter interp_filter = features->interp_filter;
6231 set_default_interp_filters(mbmi, interp_filter);
6232
6233 if (interp_filter != SWITCHABLE) {
6234 best_filter = interp_filter;
6235 } else {
6236 best_filter = EIGHTTAP_REGULAR;
6237 if (av1_is_interp_needed(xd)) {
6238 int rs;
6239 int best_rs = INT_MAX;
6240 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6241 mbmi->interp_filters = av1_broadcast_interp_filter(i);
6242 rs = av1_get_switchable_rate(x, xd, interp_filter,
6243 cm->seq_params->enable_dual_filter);
6244 if (rs < best_rs) {
6245 best_rs = rs;
6246 best_filter = mbmi->interp_filters.as_filters.y_filter;
6247 }
6248 }
6249 }
6250 }
6251 // Set the appropriate filter
6252 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6253 rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6254 cm->seq_params->enable_dual_filter);
6255
6256 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6257 rate2 += comp_inter_cost[comp_pred];
6258
6259 // Estimate the reference frame signaling cost and add it
6260 // to the rolling cost variable.
6261 rate2 += ref_costs_single[LAST_FRAME];
6262 this_rd = RDCOST(x->rdmult, rate2, distortion2);
6263
6264 rd_cost->rate = rate2;
6265 rd_cost->dist = distortion2;
6266 rd_cost->rdcost = this_rd;
6267
6268 if (this_rd >= best_rd_so_far) {
6269 rd_cost->rate = INT_MAX;
6270 rd_cost->rdcost = INT64_MAX;
6271 return;
6272 }
6273
6274 assert((interp_filter == SWITCHABLE) ||
6275 (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6276
6277 if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6278 av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6279 cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6280 THR_GLOBALMV, THR_INTER_MODE_START,
6281 THR_INTER_MODE_END, THR_DC, MAX_MODES);
6282 }
6283
6284 #if CONFIG_INTERNAL_STATS
6285 store_coding_context(x, ctx, THR_GLOBALMV, 0);
6286 #else
6287 store_coding_context(x, ctx, 0);
6288 #endif // CONFIG_INTERNAL_STATS
6289 }
6290
6291 /*!\cond */
6292 struct calc_target_weighted_pred_ctxt {
6293 const OBMCBuffer *obmc_buffer;
6294 const uint8_t *tmp;
6295 int tmp_stride;
6296 int overlap;
6297 };
6298 /*!\endcond */
6299
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6300 static INLINE void calc_target_weighted_pred_above(
6301 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6302 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6303 (void)nb_mi;
6304 (void)num_planes;
6305 (void)rel_mi_row;
6306 (void)dir;
6307
6308 struct calc_target_weighted_pred_ctxt *ctxt =
6309 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6310
6311 const int bw = xd->width << MI_SIZE_LOG2;
6312 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6313
6314 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6315 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6316 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6317 const int is_hbd = is_cur_buf_hbd(xd);
6318
6319 if (!is_hbd) {
6320 for (int row = 0; row < ctxt->overlap; ++row) {
6321 const uint8_t m0 = mask1d[row];
6322 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6323 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6324 wsrc[col] = m1 * tmp[col];
6325 mask[col] = m0;
6326 }
6327 wsrc += bw;
6328 mask += bw;
6329 tmp += ctxt->tmp_stride;
6330 }
6331 } else {
6332 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6333
6334 for (int row = 0; row < ctxt->overlap; ++row) {
6335 const uint8_t m0 = mask1d[row];
6336 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6337 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6338 wsrc[col] = m1 * tmp16[col];
6339 mask[col] = m0;
6340 }
6341 wsrc += bw;
6342 mask += bw;
6343 tmp16 += ctxt->tmp_stride;
6344 }
6345 }
6346 }
6347
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6348 static INLINE void calc_target_weighted_pred_left(
6349 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6350 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6351 (void)nb_mi;
6352 (void)num_planes;
6353 (void)rel_mi_col;
6354 (void)dir;
6355
6356 struct calc_target_weighted_pred_ctxt *ctxt =
6357 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6358
6359 const int bw = xd->width << MI_SIZE_LOG2;
6360 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6361
6362 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6363 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6364 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6365 const int is_hbd = is_cur_buf_hbd(xd);
6366
6367 if (!is_hbd) {
6368 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6369 for (int col = 0; col < ctxt->overlap; ++col) {
6370 const uint8_t m0 = mask1d[col];
6371 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6372 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6373 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6374 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6375 }
6376 wsrc += bw;
6377 mask += bw;
6378 tmp += ctxt->tmp_stride;
6379 }
6380 } else {
6381 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6382
6383 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6384 for (int col = 0; col < ctxt->overlap; ++col) {
6385 const uint8_t m0 = mask1d[col];
6386 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6387 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6388 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6389 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6390 }
6391 wsrc += bw;
6392 mask += bw;
6393 tmp16 += ctxt->tmp_stride;
6394 }
6395 }
6396 }
6397
6398 // This function has a structure similar to av1_build_obmc_inter_prediction
6399 //
6400 // The OBMC predictor is computed as:
6401 //
6402 // PObmc(x,y) =
6403 // AOM_BLEND_A64(Mh(x),
6404 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6405 // PLeft(x, y))
6406 //
6407 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6408 // rounding, this can be written as:
6409 //
6410 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6411 // Mh(x) * Mv(y) * P(x,y) +
6412 // Mh(x) * Cv(y) * Pabove(x,y) +
6413 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6414 //
6415 // Where :
6416 //
6417 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6418 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6419 //
6420 // This function computes 'wsrc' and 'mask' as:
6421 //
6422 // wsrc(x, y) =
6423 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6424 // Mh(x) * Cv(y) * Pabove(x,y) +
6425 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6426 //
6427 // mask(x, y) = Mh(x) * Mv(y)
6428 //
6429 // These can then be used to efficiently approximate the error for any
6430 // predictor P in the context of the provided neighbouring predictors by
6431 // computing:
6432 //
6433 // error(x, y) =
6434 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6435 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6436 static AOM_INLINE void calc_target_weighted_pred(
6437 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6438 const uint8_t *above, int above_stride, const uint8_t *left,
6439 int left_stride) {
6440 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6441 const int bw = xd->width << MI_SIZE_LOG2;
6442 const int bh = xd->height << MI_SIZE_LOG2;
6443 const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6444 int32_t *mask_buf = obmc_buffer->mask;
6445 int32_t *wsrc_buf = obmc_buffer->wsrc;
6446
6447 const int is_hbd = is_cur_buf_hbd(xd);
6448 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6449
6450 // plane 0 should not be sub-sampled
6451 assert(xd->plane[0].subsampling_x == 0);
6452 assert(xd->plane[0].subsampling_y == 0);
6453
6454 av1_zero_array(wsrc_buf, bw * bh);
6455 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6456
6457 // handle above row
6458 if (xd->up_available) {
6459 const int overlap =
6460 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6461 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6462 above_stride, overlap };
6463 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6464 max_neighbor_obmc[mi_size_wide_log2[bsize]],
6465 calc_target_weighted_pred_above, &ctxt);
6466 }
6467
6468 for (int i = 0; i < bw * bh; ++i) {
6469 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6470 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6471 }
6472
6473 // handle left column
6474 if (xd->left_available) {
6475 const int overlap =
6476 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6477 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6478 left_stride, overlap };
6479 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6480 max_neighbor_obmc[mi_size_high_log2[bsize]],
6481 calc_target_weighted_pred_left, &ctxt);
6482 }
6483
6484 if (!is_hbd) {
6485 const uint8_t *src = x->plane[0].src.buf;
6486
6487 for (int row = 0; row < bh; ++row) {
6488 for (int col = 0; col < bw; ++col) {
6489 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6490 }
6491 wsrc_buf += bw;
6492 src += x->plane[0].src.stride;
6493 }
6494 } else {
6495 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6496
6497 for (int row = 0; row < bh; ++row) {
6498 for (int col = 0; col < bw; ++col) {
6499 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6500 }
6501 wsrc_buf += bw;
6502 src += x->plane[0].src.stride;
6503 }
6504 }
6505 }
6506