• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19 
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44 
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72 #include "av1/encoder/var_based_part.h"
73 
74 #define LAST_NEW_MV_INDEX 6
75 
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81   10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82   9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
83   9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
84   9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
85   9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
86   8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
87   8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
88   8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
89   8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
90   7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
91   7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
92   7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
93   7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
94   6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
95   6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
96   6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
97   5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
98   5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
99   5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
100   5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
101   4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
102   4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
103   4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
104   4144,  4120,  4096
105 };
106 
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108   THR_NEARESTMV,
109   THR_NEARESTL2,
110   THR_NEARESTL3,
111   THR_NEARESTB,
112   THR_NEARESTA2,
113   THR_NEARESTA,
114   THR_NEARESTG,
115 
116   THR_NEWMV,
117   THR_NEWL2,
118   THR_NEWL3,
119   THR_NEWB,
120   THR_NEWA2,
121   THR_NEWA,
122   THR_NEWG,
123 
124   THR_NEARMV,
125   THR_NEARL2,
126   THR_NEARL3,
127   THR_NEARB,
128   THR_NEARA2,
129   THR_NEARA,
130   THR_NEARG,
131 
132   THR_GLOBALMV,
133   THR_GLOBALL2,
134   THR_GLOBALL3,
135   THR_GLOBALB,
136   THR_GLOBALA2,
137   THR_GLOBALA,
138   THR_GLOBALG,
139 
140   THR_COMP_NEAREST_NEARESTLA,
141   THR_COMP_NEAREST_NEARESTL2A,
142   THR_COMP_NEAREST_NEARESTL3A,
143   THR_COMP_NEAREST_NEARESTGA,
144   THR_COMP_NEAREST_NEARESTLB,
145   THR_COMP_NEAREST_NEARESTL2B,
146   THR_COMP_NEAREST_NEARESTL3B,
147   THR_COMP_NEAREST_NEARESTGB,
148   THR_COMP_NEAREST_NEARESTLA2,
149   THR_COMP_NEAREST_NEARESTL2A2,
150   THR_COMP_NEAREST_NEARESTL3A2,
151   THR_COMP_NEAREST_NEARESTGA2,
152   THR_COMP_NEAREST_NEARESTLL2,
153   THR_COMP_NEAREST_NEARESTLL3,
154   THR_COMP_NEAREST_NEARESTLG,
155   THR_COMP_NEAREST_NEARESTBA,
156 
157   THR_COMP_NEAR_NEARLB,
158   THR_COMP_NEW_NEWLB,
159   THR_COMP_NEW_NEARESTLB,
160   THR_COMP_NEAREST_NEWLB,
161   THR_COMP_NEW_NEARLB,
162   THR_COMP_NEAR_NEWLB,
163   THR_COMP_GLOBAL_GLOBALLB,
164 
165   THR_COMP_NEAR_NEARLA,
166   THR_COMP_NEW_NEWLA,
167   THR_COMP_NEW_NEARESTLA,
168   THR_COMP_NEAREST_NEWLA,
169   THR_COMP_NEW_NEARLA,
170   THR_COMP_NEAR_NEWLA,
171   THR_COMP_GLOBAL_GLOBALLA,
172 
173   THR_COMP_NEAR_NEARL2A,
174   THR_COMP_NEW_NEWL2A,
175   THR_COMP_NEW_NEARESTL2A,
176   THR_COMP_NEAREST_NEWL2A,
177   THR_COMP_NEW_NEARL2A,
178   THR_COMP_NEAR_NEWL2A,
179   THR_COMP_GLOBAL_GLOBALL2A,
180 
181   THR_COMP_NEAR_NEARL3A,
182   THR_COMP_NEW_NEWL3A,
183   THR_COMP_NEW_NEARESTL3A,
184   THR_COMP_NEAREST_NEWL3A,
185   THR_COMP_NEW_NEARL3A,
186   THR_COMP_NEAR_NEWL3A,
187   THR_COMP_GLOBAL_GLOBALL3A,
188 
189   THR_COMP_NEAR_NEARGA,
190   THR_COMP_NEW_NEWGA,
191   THR_COMP_NEW_NEARESTGA,
192   THR_COMP_NEAREST_NEWGA,
193   THR_COMP_NEW_NEARGA,
194   THR_COMP_NEAR_NEWGA,
195   THR_COMP_GLOBAL_GLOBALGA,
196 
197   THR_COMP_NEAR_NEARL2B,
198   THR_COMP_NEW_NEWL2B,
199   THR_COMP_NEW_NEARESTL2B,
200   THR_COMP_NEAREST_NEWL2B,
201   THR_COMP_NEW_NEARL2B,
202   THR_COMP_NEAR_NEWL2B,
203   THR_COMP_GLOBAL_GLOBALL2B,
204 
205   THR_COMP_NEAR_NEARL3B,
206   THR_COMP_NEW_NEWL3B,
207   THR_COMP_NEW_NEARESTL3B,
208   THR_COMP_NEAREST_NEWL3B,
209   THR_COMP_NEW_NEARL3B,
210   THR_COMP_NEAR_NEWL3B,
211   THR_COMP_GLOBAL_GLOBALL3B,
212 
213   THR_COMP_NEAR_NEARGB,
214   THR_COMP_NEW_NEWGB,
215   THR_COMP_NEW_NEARESTGB,
216   THR_COMP_NEAREST_NEWGB,
217   THR_COMP_NEW_NEARGB,
218   THR_COMP_NEAR_NEWGB,
219   THR_COMP_GLOBAL_GLOBALGB,
220 
221   THR_COMP_NEAR_NEARLA2,
222   THR_COMP_NEW_NEWLA2,
223   THR_COMP_NEW_NEARESTLA2,
224   THR_COMP_NEAREST_NEWLA2,
225   THR_COMP_NEW_NEARLA2,
226   THR_COMP_NEAR_NEWLA2,
227   THR_COMP_GLOBAL_GLOBALLA2,
228 
229   THR_COMP_NEAR_NEARL2A2,
230   THR_COMP_NEW_NEWL2A2,
231   THR_COMP_NEW_NEARESTL2A2,
232   THR_COMP_NEAREST_NEWL2A2,
233   THR_COMP_NEW_NEARL2A2,
234   THR_COMP_NEAR_NEWL2A2,
235   THR_COMP_GLOBAL_GLOBALL2A2,
236 
237   THR_COMP_NEAR_NEARL3A2,
238   THR_COMP_NEW_NEWL3A2,
239   THR_COMP_NEW_NEARESTL3A2,
240   THR_COMP_NEAREST_NEWL3A2,
241   THR_COMP_NEW_NEARL3A2,
242   THR_COMP_NEAR_NEWL3A2,
243   THR_COMP_GLOBAL_GLOBALL3A2,
244 
245   THR_COMP_NEAR_NEARGA2,
246   THR_COMP_NEW_NEWGA2,
247   THR_COMP_NEW_NEARESTGA2,
248   THR_COMP_NEAREST_NEWGA2,
249   THR_COMP_NEW_NEARGA2,
250   THR_COMP_NEAR_NEWGA2,
251   THR_COMP_GLOBAL_GLOBALGA2,
252 
253   THR_COMP_NEAR_NEARLL2,
254   THR_COMP_NEW_NEWLL2,
255   THR_COMP_NEW_NEARESTLL2,
256   THR_COMP_NEAREST_NEWLL2,
257   THR_COMP_NEW_NEARLL2,
258   THR_COMP_NEAR_NEWLL2,
259   THR_COMP_GLOBAL_GLOBALLL2,
260 
261   THR_COMP_NEAR_NEARLL3,
262   THR_COMP_NEW_NEWLL3,
263   THR_COMP_NEW_NEARESTLL3,
264   THR_COMP_NEAREST_NEWLL3,
265   THR_COMP_NEW_NEARLL3,
266   THR_COMP_NEAR_NEWLL3,
267   THR_COMP_GLOBAL_GLOBALLL3,
268 
269   THR_COMP_NEAR_NEARLG,
270   THR_COMP_NEW_NEWLG,
271   THR_COMP_NEW_NEARESTLG,
272   THR_COMP_NEAREST_NEWLG,
273   THR_COMP_NEW_NEARLG,
274   THR_COMP_NEAR_NEWLG,
275   THR_COMP_GLOBAL_GLOBALLG,
276 
277   THR_COMP_NEAR_NEARBA,
278   THR_COMP_NEW_NEWBA,
279   THR_COMP_NEW_NEARESTBA,
280   THR_COMP_NEAREST_NEWBA,
281   THR_COMP_NEW_NEARBA,
282   THR_COMP_NEAR_NEWBA,
283   THR_COMP_GLOBAL_GLOBALBA,
284 
285   THR_DC,
286   THR_PAETH,
287   THR_SMOOTH,
288   THR_SMOOTH_V,
289   THR_SMOOTH_H,
290   THR_H_PRED,
291   THR_V_PRED,
292   THR_D135_PRED,
293   THR_D203_PRED,
294   THR_D157_PRED,
295   THR_D67_PRED,
296   THR_D113_PRED,
297   THR_D45_PRED,
298 };
299 
300 /*!\cond */
301 typedef struct SingleInterModeState {
302   int64_t rd;
303   MV_REFERENCE_FRAME ref_frame;
304   int valid;
305 } SingleInterModeState;
306 
307 typedef struct InterModeSearchState {
308   int64_t best_rd;
309   int64_t best_skip_rd[2];
310   MB_MODE_INFO best_mbmode;
311   int best_rate_y;
312   int best_rate_uv;
313   int best_mode_skippable;
314   int best_skip2;
315   THR_MODES best_mode_index;
316   int num_available_refs;
317   int64_t dist_refs[REF_FRAMES];
318   int dist_order_refs[REF_FRAMES];
319   int64_t mode_threshold[MAX_MODES];
320   int64_t best_intra_rd;
321   unsigned int best_pred_sse;
322 
323   /*!
324    * \brief Keep track of best intra rd for use in compound mode.
325    */
326   int64_t best_pred_rd[REFERENCE_MODES];
327   // Save a set of single_newmv for each checked ref_mv.
328   int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329   int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330   int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331   int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332   // The rd of simple translation in single inter modes
333   int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334   int64_t best_single_rd[REF_FRAMES];
335   PREDICTION_MODE best_single_mode[REF_FRAMES];
336 
337   // Single search results by [directions][modes][reference frames]
338   SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339   int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340   SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341                                             [FWD_REFS];
342   int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343   MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344   IntraModeSearchState intra_search_state;
345   RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348 
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350   for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351     InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352     md->ready = 0;
353     md->num = 0;
354     md->dist_sum = 0;
355     md->ld_sum = 0;
356     md->sse_sum = 0;
357     md->sse_sse_sum = 0;
358     md->sse_ld_sum = 0;
359   }
360 }
361 
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363                              int64_t sse, int *est_residue_cost,
364                              int64_t *est_dist) {
365   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366   if (md->ready) {
367     if (sse < md->dist_mean) {
368       *est_residue_cost = 0;
369       *est_dist = sse;
370     } else {
371       *est_dist = (int64_t)round(md->dist_mean);
372       const double est_ld = md->a * sse + md->b;
373       // Clamp estimated rate cost by INT_MAX / 2.
374       // TODO(angiebird@google.com): find better solution than clamping.
375       if (fabs(est_ld) < 1e-2) {
376         *est_residue_cost = INT_MAX / 2;
377       } else {
378         double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379         if (est_residue_cost_dbl < 0) {
380           *est_residue_cost = 0;
381         } else {
382           *est_residue_cost =
383               (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384         }
385       }
386       if (*est_residue_cost <= 0) {
387         *est_residue_cost = 0;
388         *est_dist = sse;
389       }
390     }
391     return 1;
392   }
393   return 0;
394 }
395 
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397   for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398     const int block_idx = inter_mode_data_block_idx(bsize);
399     InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400     if (block_idx == -1) continue;
401     if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402       continue;
403     } else {
404       if (md->ready == 0) {
405         md->dist_mean = md->dist_sum / md->num;
406         md->ld_mean = md->ld_sum / md->num;
407         md->sse_mean = md->sse_sum / md->num;
408         md->sse_sse_mean = md->sse_sse_sum / md->num;
409         md->sse_ld_mean = md->sse_ld_sum / md->num;
410       } else {
411         const double factor = 3;
412         md->dist_mean =
413             (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414         md->ld_mean =
415             (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416         md->sse_mean =
417             (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418         md->sse_sse_mean =
419             (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420             (factor + 1);
421         md->sse_ld_mean =
422             (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423             (factor + 1);
424       }
425 
426       const double my = md->ld_mean;
427       const double mx = md->sse_mean;
428       const double dx = sqrt(md->sse_sse_mean);
429       const double dxy = md->sse_ld_mean;
430 
431       md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432       md->b = my - md->a * mx;
433       md->ready = 1;
434 
435       md->num = 0;
436       md->dist_sum = 0;
437       md->ld_sum = 0;
438       md->sse_sum = 0;
439       md->sse_sse_sum = 0;
440       md->sse_ld_sum = 0;
441     }
442     (void)rdmult;
443   }
444 }
445 
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
447                                             BLOCK_SIZE bsize, int64_t sse,
448                                             int64_t dist, int residue_cost) {
449   if (residue_cost == 0 || sse == dist) return;
450   const int block_idx = inter_mode_data_block_idx(bsize);
451   if (block_idx == -1) return;
452   InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453   if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454     const double ld = (sse - dist) * 1. / residue_cost;
455     ++rd_model->num;
456     rd_model->dist_sum += dist;
457     rd_model->ld_sum += ld;
458     rd_model->sse_sum += sse;
459     rd_model->sse_sse_sum += (double)sse * (double)sse;
460     rd_model->sse_ld_sum += sse * ld;
461   }
462 }
463 
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
465                                              int mode_rate, int64_t sse,
466                                              int64_t rd, RD_STATS *rd_cost,
467                                              RD_STATS *rd_cost_y,
468                                              RD_STATS *rd_cost_uv,
469                                              const MB_MODE_INFO *mbmi) {
470   const int num = inter_modes_info->num;
471   assert(num < MAX_INTER_MODES);
472   inter_modes_info->mbmi_arr[num] = *mbmi;
473   inter_modes_info->mode_rate_arr[num] = mode_rate;
474   inter_modes_info->sse_arr[num] = sse;
475   inter_modes_info->est_rd_arr[num] = rd;
476   inter_modes_info->rd_cost_arr[num] = *rd_cost;
477   inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
478   inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
479   ++inter_modes_info->num;
480 }
481 
compare_rd_idx_pair(const void * a,const void * b)482 static int compare_rd_idx_pair(const void *a, const void *b) {
483   if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
484     // To avoid inconsistency in qsort() ordering when two elements are equal,
485     // using idx as tie breaker. Refer aomedia:2928
486     if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
487       return 0;
488     else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
489       return 1;
490     else
491       return -1;
492   } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
493     return 1;
494   } else {
495     return -1;
496   }
497 }
498 
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)499 static AOM_INLINE void inter_modes_info_sort(
500     const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
501   if (inter_modes_info->num == 0) {
502     return;
503   }
504   for (int i = 0; i < inter_modes_info->num; ++i) {
505     rd_idx_pair_arr[i].idx = i;
506     rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
507   }
508   qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
509         compare_rd_idx_pair);
510 }
511 
512 // Similar to get_horver_correlation, but also takes into account first
513 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)514 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
515                                        int width, int height, float *hcorr,
516                                        float *vcorr) {
517   // The following notation is used:
518   // x - current pixel
519   // y - left neighbor pixel
520   // z - top neighbor pixel
521   int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
522   int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
523   int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
524 
525   // First, process horizontal correlation on just the first row
526   x_sum += diff[0];
527   x2_sum += diff[0] * diff[0];
528   x_firstrow += diff[0];
529   x2_firstrow += diff[0] * diff[0];
530   for (int j = 1; j < width; ++j) {
531     const int16_t x = diff[j];
532     const int16_t y = diff[j - 1];
533     x_sum += x;
534     x_firstrow += x;
535     x2_sum += x * x;
536     x2_firstrow += x * x;
537     xy_sum += x * y;
538   }
539 
540   // Process vertical correlation in the first column
541   x_firstcol += diff[0];
542   x2_firstcol += diff[0] * diff[0];
543   for (int i = 1; i < height; ++i) {
544     const int16_t x = diff[i * stride];
545     const int16_t z = diff[(i - 1) * stride];
546     x_sum += x;
547     x_firstcol += x;
548     x2_sum += x * x;
549     x2_firstcol += x * x;
550     xz_sum += x * z;
551   }
552 
553   // Now process horiz and vert correlation through the rest unit
554   for (int i = 1; i < height; ++i) {
555     for (int j = 1; j < width; ++j) {
556       const int16_t x = diff[i * stride + j];
557       const int16_t y = diff[i * stride + j - 1];
558       const int16_t z = diff[(i - 1) * stride + j];
559       x_sum += x;
560       x2_sum += x * x;
561       xy_sum += x * y;
562       xz_sum += x * z;
563     }
564   }
565 
566   for (int j = 0; j < width; ++j) {
567     x_finalrow += diff[(height - 1) * stride + j];
568     x2_finalrow +=
569         diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
570   }
571   for (int i = 0; i < height; ++i) {
572     x_finalcol += diff[i * stride + width - 1];
573     x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
574   }
575 
576   int64_t xhor_sum = x_sum - x_finalcol;
577   int64_t xver_sum = x_sum - x_finalrow;
578   int64_t y_sum = x_sum - x_firstcol;
579   int64_t z_sum = x_sum - x_firstrow;
580   int64_t x2hor_sum = x2_sum - x2_finalcol;
581   int64_t x2ver_sum = x2_sum - x2_finalrow;
582   int64_t y2_sum = x2_sum - x2_firstcol;
583   int64_t z2_sum = x2_sum - x2_firstrow;
584 
585   const float num_hor = (float)(height * (width - 1));
586   const float num_ver = (float)((height - 1) * width);
587 
588   const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
589   const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
590 
591   const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
592   const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
593 
594   const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
595   const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
596 
597   if (xhor_var_n > 0 && y_var_n > 0) {
598     *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
599     *hcorr = *hcorr < 0 ? 0 : *hcorr;
600   } else {
601     *hcorr = 1.0;
602   }
603   if (xver_var_n > 0 && z_var_n > 0) {
604     *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
605     *vcorr = *vcorr < 0 ? 0 : *vcorr;
606   } else {
607     *vcorr = 1.0;
608   }
609 }
610 
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)611 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
612                        int64_t *sse_y) {
613   const AV1_COMMON *cm = &cpi->common;
614   const int num_planes = av1_num_planes(cm);
615   const MACROBLOCKD *xd = &x->e_mbd;
616   const MB_MODE_INFO *mbmi = xd->mi[0];
617   int64_t total_sse = 0;
618   for (int plane = 0; plane < num_planes; ++plane) {
619     if (plane && !xd->is_chroma_ref) break;
620     const struct macroblock_plane *const p = &x->plane[plane];
621     const struct macroblockd_plane *const pd = &xd->plane[plane];
622     const BLOCK_SIZE bs =
623         get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
624     unsigned int sse;
625 
626     cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
627                             pd->dst.stride, &sse);
628     total_sse += sse;
629     if (!plane && sse_y) *sse_y = sse;
630   }
631   total_sse <<= 4;
632   return total_sse;
633 }
634 
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)635 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
636                           intptr_t block_size, int64_t *ssz) {
637   int i;
638   int64_t error = 0, sqcoeff = 0;
639 
640   for (i = 0; i < block_size; i++) {
641     const int diff = coeff[i] - dqcoeff[i];
642     error += diff * diff;
643     sqcoeff += coeff[i] * coeff[i];
644   }
645 
646   *ssz = sqcoeff;
647   return error;
648 }
649 
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)650 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
651                              intptr_t block_size) {
652   int64_t error = 0;
653 
654   for (int i = 0; i < block_size; i++) {
655     const int diff = coeff[i] - dqcoeff[i];
656     error += diff * diff;
657   }
658 
659   return error;
660 }
661 
662 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)663 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
664                                  const tran_low_t *dqcoeff, intptr_t block_size,
665                                  int64_t *ssz, int bd) {
666   int i;
667   int64_t error = 0, sqcoeff = 0;
668   int shift = 2 * (bd - 8);
669   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
670 
671   for (i = 0; i < block_size; i++) {
672     const int64_t diff = coeff[i] - dqcoeff[i];
673     error += diff * diff;
674     sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
675   }
676   assert(error >= 0 && sqcoeff >= 0);
677   error = (error + rounding) >> shift;
678   sqcoeff = (sqcoeff + rounding) >> shift;
679 
680   *ssz = sqcoeff;
681   return error;
682 }
683 #endif
684 
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)685 static int conditional_skipintra(PREDICTION_MODE mode,
686                                  PREDICTION_MODE best_intra_mode) {
687   if (mode == D113_PRED && best_intra_mode != V_PRED &&
688       best_intra_mode != D135_PRED)
689     return 1;
690   if (mode == D67_PRED && best_intra_mode != V_PRED &&
691       best_intra_mode != D45_PRED)
692     return 1;
693   if (mode == D203_PRED && best_intra_mode != H_PRED &&
694       best_intra_mode != D45_PRED)
695     return 1;
696   if (mode == D157_PRED && best_intra_mode != H_PRED &&
697       best_intra_mode != D135_PRED)
698     return 1;
699   return 0;
700 }
701 
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)702 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
703                        int16_t mode_context) {
704   if (is_inter_compound_mode(mode)) {
705     return mode_costs
706         ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
707   }
708 
709   int mode_cost = 0;
710   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
711 
712   assert(is_inter_mode(mode));
713 
714   if (mode == NEWMV) {
715     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
716     return mode_cost;
717   } else {
718     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
719     mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
720 
721     if (mode == GLOBALMV) {
722       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
723       return mode_cost;
724     } else {
725       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
726       mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
727       mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
728       return mode_cost;
729     }
730   }
731 }
732 
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)733 static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
734                                               int ref_idx) {
735   return ref_idx ? compound_ref1_mode(this_mode)
736                  : compound_ref0_mode(this_mode);
737 }
738 
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])739 static AOM_INLINE void estimate_ref_frame_costs(
740     const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
741     int segment_id, unsigned int *ref_costs_single,
742     unsigned int (*ref_costs_comp)[REF_FRAMES]) {
743   int seg_ref_active =
744       segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
745   if (seg_ref_active) {
746     memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
747     int ref_frame;
748     for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
749       memset(ref_costs_comp[ref_frame], 0,
750              REF_FRAMES * sizeof((*ref_costs_comp)[0]));
751   } else {
752     int intra_inter_ctx = av1_get_intra_inter_context(xd);
753     ref_costs_single[INTRA_FRAME] =
754         mode_costs->intra_inter_cost[intra_inter_ctx][0];
755     unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
756 
757     for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
758       ref_costs_single[i] = base_cost;
759 
760     const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
761     const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
762     const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
763     const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
764     const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
765     const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
766 
767     // Determine cost of a single ref frame, where frame types are represented
768     // by a tree:
769     // Level 0: add cost whether this ref is a forward or backward ref
770     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
773     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
774     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
775     ref_costs_single[ALTREF2_FRAME] +=
776         mode_costs->single_ref_cost[ctx_p1][0][1];
777     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
778 
779     // Level 1: if this ref is forward ref,
780     // add cost whether it is last/last2 or last3/golden
781     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
782     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
783     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
784     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
785 
786     // Level 1: if this ref is backward ref
787     // then add cost whether this ref is altref or backward ref
788     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
789     ref_costs_single[ALTREF2_FRAME] +=
790         mode_costs->single_ref_cost[ctx_p2][1][0];
791     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
792 
793     // Level 2: further add cost whether this ref is last or last2
794     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
795     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
796 
797     // Level 2: last3 or golden
798     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
799     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
800 
801     // Level 2: bwdref or altref2
802     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
803     ref_costs_single[ALTREF2_FRAME] +=
804         mode_costs->single_ref_cost[ctx_p6][5][1];
805 
806     if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
807       // Similar to single ref, determine cost of compound ref frames.
808       // cost_compound_refs = cost_first_ref + cost_second_ref
809       const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
810       const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
811       const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
812       const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
813       const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
814 
815       const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
816       unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
817 
818       ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
819           ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
820               base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
821       ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
822       ref_bicomp_costs[ALTREF_FRAME] = 0;
823 
824       // cost of first ref frame
825       ref_bicomp_costs[LAST_FRAME] +=
826           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827       ref_bicomp_costs[LAST2_FRAME] +=
828           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
829       ref_bicomp_costs[LAST3_FRAME] +=
830           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831       ref_bicomp_costs[GOLDEN_FRAME] +=
832           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
833 
834       ref_bicomp_costs[LAST_FRAME] +=
835           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
836       ref_bicomp_costs[LAST2_FRAME] +=
837           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
838 
839       ref_bicomp_costs[LAST3_FRAME] +=
840           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
841       ref_bicomp_costs[GOLDEN_FRAME] +=
842           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
843 
844       // cost of second ref frame
845       ref_bicomp_costs[BWDREF_FRAME] +=
846           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847       ref_bicomp_costs[ALTREF2_FRAME] +=
848           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
849       ref_bicomp_costs[ALTREF_FRAME] +=
850           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
851 
852       ref_bicomp_costs[BWDREF_FRAME] +=
853           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
854       ref_bicomp_costs[ALTREF2_FRAME] +=
855           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
856 
857       // cost: if one ref frame is forward ref, the other ref is backward ref
858       int ref0, ref1;
859       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
860         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
861           ref_costs_comp[ref0][ref1] =
862               ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
863         }
864       }
865 
866       // cost: if both ref frames are the same side.
867       const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
868       const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
869       const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
870       ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
871           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
872           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
873           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
874       ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
875           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
876           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
877           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
878           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
879       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
880           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
881           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
882           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
883           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
884       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
885           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
886           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
887     } else {
888       int ref0, ref1;
889       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
890         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
891           ref_costs_comp[ref0][ref1] = 512;
892       }
893       ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
894       ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
895       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
896       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
897     }
898   }
899 }
900 
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int skippable)901 static AOM_INLINE void store_coding_context(
902 #if CONFIG_INTERNAL_STATS
903     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
904 #else
905     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
906 #endif  // CONFIG_INTERNAL_STATS
907     int skippable) {
908   MACROBLOCKD *const xd = &x->e_mbd;
909 
910   // Take a snapshot of the coding context so it can be
911   // restored if we decide to encode this way
912   ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
913   ctx->skippable = skippable;
914 #if CONFIG_INTERNAL_STATS
915   ctx->best_mode_index = mode_index;
916 #endif  // CONFIG_INTERNAL_STATS
917   ctx->mic = *xd->mi[0];
918   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
919                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
920 }
921 
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])922 static AOM_INLINE void setup_buffer_ref_mvs_inter(
923     const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
924     BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
925   const AV1_COMMON *cm = &cpi->common;
926   const int num_planes = av1_num_planes(cm);
927   const YV12_BUFFER_CONFIG *scaled_ref_frame =
928       av1_get_scaled_ref_frame(cpi, ref_frame);
929   MACROBLOCKD *const xd = &x->e_mbd;
930   MB_MODE_INFO *const mbmi = xd->mi[0];
931   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
932   const struct scale_factors *const sf =
933       get_ref_scale_factors_const(cm, ref_frame);
934   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
935   assert(yv12 != NULL);
936 
937   if (scaled_ref_frame) {
938     // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
939     // support scaling.
940     av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
941                          num_planes);
942   } else {
943     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
944   }
945 
946   // Gets an initial list of candidate vectors from neighbours and orders them
947   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
948                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
949                    mbmi_ext->mode_context);
950   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
951   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
952   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
953   // Further refinement that is encode side only to test the top few candidates
954   // in full and choose the best as the center point for subsequent searches.
955   // The current implementation doesn't support scaling.
956   av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
957               ref_frame, block_size);
958 
959   // Go back to unscaled reference.
960   if (scaled_ref_frame) {
961     // We had temporarily setup pred block based on scaled reference above. Go
962     // back to unscaled reference now, for subsequent use.
963     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
964   }
965 }
966 
967 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
968 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
969 
970 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)971 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
972   const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
973                                      xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
974                                      xd->mb_to_top_edge - LEFT_TOP_MARGIN,
975                                      xd->mb_to_bottom_edge +
976                                          RIGHT_BOTTOM_MARGIN };
977   clamp_mv(mv, &mv_limits);
978 }
979 
980 /* If the current mode shares the same mv with other modes with higher cost,
981  * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)982 static int skip_repeated_mv(const AV1_COMMON *const cm,
983                             const MACROBLOCK *const x,
984                             PREDICTION_MODE this_mode,
985                             const MV_REFERENCE_FRAME ref_frames[2],
986                             InterModeSearchState *search_state) {
987   const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
988   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
989   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
990   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
991   PREDICTION_MODE compare_mode = MB_MODE_COUNT;
992   if (!is_comp_pred) {
993     if (this_mode == NEARMV) {
994       if (ref_mv_count == 0) {
995         // NEARMV has the same motion vector as NEARESTMV
996         compare_mode = NEARESTMV;
997       }
998       if (ref_mv_count == 1 &&
999           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1000         // NEARMV has the same motion vector as GLOBALMV
1001         compare_mode = GLOBALMV;
1002       }
1003     }
1004     if (this_mode == GLOBALMV) {
1005       if (ref_mv_count == 0 &&
1006           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1007         // GLOBALMV has the same motion vector as NEARESTMV
1008         compare_mode = NEARESTMV;
1009       }
1010       if (ref_mv_count == 1) {
1011         // GLOBALMV has the same motion vector as NEARMV
1012         compare_mode = NEARMV;
1013       }
1014     }
1015 
1016     if (compare_mode != MB_MODE_COUNT) {
1017       // Use modelled_rd to check whether compare mode was searched
1018       if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1019           INT64_MAX) {
1020         const int16_t mode_ctx =
1021             av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1022         const int compare_cost =
1023             cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1024         const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1025 
1026         // Only skip if the mode cost is larger than compare mode cost
1027         if (this_cost > compare_cost) {
1028           search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1029               search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1030           return 1;
1031         }
1032       }
1033     }
1034   }
1035   return 0;
1036 }
1037 
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1038 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1039                                      const AV1_COMMON *cm,
1040                                      const MACROBLOCK *x) {
1041   const MACROBLOCKD *const xd = &x->e_mbd;
1042   *out_mv = in_mv;
1043   lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1044                      cm->features.cur_frame_force_integer_mv);
1045   clamp_mv2(&out_mv->as_mv, xd);
1046   return av1_is_fullmv_in_range(&x->mv_limits,
1047                                 get_fullmv_from_mv(&out_mv->as_mv));
1048 }
1049 
1050 // To use single newmv directly for compound modes, need to clamp the mv to the
1051 // valid mv range. Without this, encoder would generate out of range mv, and
1052 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1053 static INLINE void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1054                                      int ref_idx) {
1055   const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1056   SubpelMvLimits mv_limits;
1057 
1058   av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1059   clamp_mv(&mv->as_mv, &mv_limits);
1060 }
1061 
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1062 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1063                             const BLOCK_SIZE bsize, int_mv *cur_mv,
1064                             int *const rate_mv, HandleInterModeArgs *const args,
1065                             inter_mode_info *mode_info) {
1066   MACROBLOCKD *const xd = &x->e_mbd;
1067   MB_MODE_INFO *const mbmi = xd->mi[0];
1068   const int is_comp_pred = has_second_ref(mbmi);
1069   const PREDICTION_MODE this_mode = mbmi->mode;
1070   const int refs[2] = { mbmi->ref_frame[0],
1071                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1072   const int ref_mv_idx = mbmi->ref_mv_idx;
1073 
1074   if (is_comp_pred) {
1075     const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1076     const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1077     if (this_mode == NEW_NEWMV) {
1078       if (valid_mv0) {
1079         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1080         clamp_mv_in_range(x, &cur_mv[0], 0);
1081       }
1082       if (valid_mv1) {
1083         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1084         clamp_mv_in_range(x, &cur_mv[1], 1);
1085       }
1086       *rate_mv = 0;
1087       for (int i = 0; i < 2; ++i) {
1088         const int_mv ref_mv = av1_get_ref_mv(x, i);
1089         *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1090                                     x->mv_costs->nmv_joint_cost,
1091                                     x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1092       }
1093     } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1094       if (valid_mv1) {
1095         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1096         clamp_mv_in_range(x, &cur_mv[1], 1);
1097       }
1098       const int_mv ref_mv = av1_get_ref_mv(x, 1);
1099       *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1100                                  x->mv_costs->nmv_joint_cost,
1101                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1102     } else {
1103       assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1104       if (valid_mv0) {
1105         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1106         clamp_mv_in_range(x, &cur_mv[0], 0);
1107       }
1108       const int_mv ref_mv = av1_get_ref_mv(x, 0);
1109       *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1110                                  x->mv_costs->nmv_joint_cost,
1111                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1112     }
1113   } else {
1114     // Single ref case.
1115     const int ref_idx = 0;
1116     int search_range = INT_MAX;
1117 
1118     if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1119       const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1120       int min_mv_diff = INT_MAX;
1121       int best_match = -1;
1122       MV prev_ref_mv[2] = { { 0 } };
1123       for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1124         prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1125                                                      idx, &x->mbmi_ext)
1126                                .as_mv;
1127         const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1128                                        abs(ref_mv.col - prev_ref_mv[idx].col));
1129 
1130         if (min_mv_diff > ref_mv_diff) {
1131           min_mv_diff = ref_mv_diff;
1132           best_match = idx;
1133         }
1134       }
1135 
1136       if (min_mv_diff < (16 << 3)) {
1137         if (args->single_newmv_valid[best_match][refs[0]]) {
1138           search_range = min_mv_diff;
1139           search_range +=
1140               AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1141                          prev_ref_mv[best_match].row),
1142                      abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1143                          prev_ref_mv[best_match].col));
1144           // Get full pixel search range.
1145           search_range = (search_range + 4) >> 3;
1146         }
1147       }
1148     }
1149 
1150     int_mv best_mv;
1151     av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1152                              mode_info, &best_mv, args);
1153     if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1154 
1155     args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1156     args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1157     args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1158     cur_mv[0].as_int = best_mv.as_int;
1159 
1160     // Return after single_newmv is set.
1161     if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1162   }
1163 
1164   return 0;
1165 }
1166 
update_mode_start_end_index(const AV1_COMP * const cpi,const MB_MODE_INFO * const mbmi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1167 static INLINE void update_mode_start_end_index(
1168     const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1169     int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1170     int interintra_allowed, int eval_motion_mode) {
1171   *mode_index_start = (int)SIMPLE_TRANSLATION;
1172   *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1173   if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1174     if (!eval_motion_mode) {
1175       *mode_index_end = (int)SIMPLE_TRANSLATION;
1176     } else {
1177       // Set the start index appropriately to process motion modes other than
1178       // simple translation
1179       *mode_index_start = 1;
1180     }
1181   }
1182   if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1183     *mode_index_end = SIMPLE_TRANSLATION;
1184 }
1185 
1186 /*!\brief AV1 motion mode search
1187  *
1188  * \ingroup inter_mode_search
1189  * Function to search over and determine the motion mode. It will update
1190  * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1191  * WARPED_CAUSAL and determine any necessary side information for the selected
1192  * motion mode. It will also perform the full transform search, unless the
1193  * input parameter do_tx_search indicates to do an estimation of the RD rather
1194  * than an RD corresponding to a full transform search. It will return the
1195  * RD for the final motion_mode.
1196  * Do the RD search for a given inter mode and compute all information relevant
1197  * to the input mode. It will compute the best MV,
1198  * compound parameters (if the mode is a compound mode) and interpolation filter
1199  * parameters.
1200  *
1201  * \param[in]     cpi               Top-level encoder structure.
1202  * \param[in]     tile_data         Pointer to struct holding adaptive
1203  *                                  data/contexts/models for the tile during
1204  *                                  encoding.
1205  * \param[in]     x                 Pointer to struct holding all the data for
1206  *                                  the current macroblock.
1207  * \param[in]     bsize             Current block size.
1208  * \param[in,out] rd_stats          Struct to keep track of the overall RD
1209  *                                  information.
1210  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1211  *                                  for only the Y plane.
1212  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1213  *                                  for only the UV planes.
1214  * \param[in]     args              HandleInterModeArgs struct holding
1215  *                                  miscellaneous arguments for inter mode
1216  *                                  search. See the documentation for this
1217  *                                  struct for a description of each member.
1218  * \param[in]     ref_best_rd       Best RD found so far for this block.
1219  *                                  It is used for early termination of this
1220  *                                  search if the RD exceeds this value.
1221  * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1222  *                                  best total RD for a skip mode so far, and
1223  *                                  skip_rd[1] is the best RD for a skip mode so
1224  *                                  far in luma. This is used as a speed feature
1225  *                                  to skip the transform search if the computed
1226  *                                  skip RD for the current mode is not better
1227  *                                  than the best skip_rd so far.
1228  * \param[in,out] rate_mv           The rate associated with the motion vectors.
1229  *                                  This will be modified if a motion search is
1230  *                                  done in the motion mode search.
1231  * \param[in,out] orig_dst          A prediction buffer to hold a computed
1232  *                                  prediction. This will eventually hold the
1233  *                                  final prediction, and the tmp_dst info will
1234  *                                  be copied here.
1235  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1236  *                                  do_tx_search (see below) is 0.
1237  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1238  *                                  a full transform search. This will compute
1239  *                                  an estimated RD for the modes without the
1240  *                                  transform search and later perform the full
1241  *                                  transform search on the best candidates.
1242  * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1243  *                                  information to perform a full transform
1244  *                                  search only on winning candidates searched
1245  *                                  with an estimate for transform coding RD.
1246  * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1247  *                                  motion modes other than SIMPLE_TRANSLATION.
1248  * \param[out]    yrd               Stores the rdcost corresponding to encoding
1249  *                                  the luma plane.
1250  * \return Returns INT64_MAX if the determined motion mode is invalid and the
1251  * current motion mode being tested should be skipped. It returns 0 if the
1252  * motion mode search is a success.
1253  */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1254 static int64_t motion_mode_rd(
1255     const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1256     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1257     RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1258     int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1259     int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1260     int eval_motion_mode, int64_t *yrd) {
1261   const AV1_COMMON *const cm = &cpi->common;
1262   const FeatureFlags *const features = &cm->features;
1263   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1264   const int num_planes = av1_num_planes(cm);
1265   MACROBLOCKD *xd = &x->e_mbd;
1266   MB_MODE_INFO *mbmi = xd->mi[0];
1267   const int is_comp_pred = has_second_ref(mbmi);
1268   const PREDICTION_MODE this_mode = mbmi->mode;
1269   const int rate2_nocoeff = rd_stats->rate;
1270   int best_xskip_txfm = 0;
1271   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1272   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1273   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1274   const int rate_mv0 = *rate_mv;
1275   const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1276                                  is_interintra_allowed(mbmi) &&
1277                                  mbmi->compound_idx;
1278   WARP_SAMPLE_INFO *const warp_sample_info =
1279       &x->warp_sample_info[mbmi->ref_frame[0]];
1280   int *pts0 = warp_sample_info->pts;
1281   int *pts_inref0 = warp_sample_info->pts_inref;
1282 
1283   assert(mbmi->ref_frame[1] != INTRA_FRAME);
1284   const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1285   av1_invalid_rd_stats(&best_rd_stats);
1286   mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1287   MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1288   *yrd = INT64_MAX;
1289   if (features->switchable_motion_mode) {
1290     // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1291     // is allowed.
1292     last_motion_mode_allowed = motion_mode_allowed(
1293         xd->global_motion, xd, mbmi, features->allow_warped_motion);
1294   }
1295 
1296   if (last_motion_mode_allowed == WARPED_CAUSAL) {
1297     // Collect projection samples used in least squares approximation of
1298     // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1299     if (warp_sample_info->num < 0) {
1300       warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1301     }
1302     mbmi->num_proj_ref = warp_sample_info->num;
1303   }
1304   const int total_samples = mbmi->num_proj_ref;
1305   if (total_samples == 0) {
1306     // Do not search WARPED_CAUSAL if there are no samples to use to determine
1307     // warped parameters.
1308     last_motion_mode_allowed = OBMC_CAUSAL;
1309   }
1310 
1311   const MB_MODE_INFO base_mbmi = *mbmi;
1312   MB_MODE_INFO best_mbmi;
1313   const int interp_filter = features->interp_filter;
1314   const int switchable_rate =
1315       av1_is_interp_needed(xd)
1316           ? av1_get_switchable_rate(x, xd, interp_filter,
1317                                     cm->seq_params->enable_dual_filter)
1318           : 0;
1319   int64_t best_rd = INT64_MAX;
1320   int best_rate_mv = rate_mv0;
1321   const int mi_row = xd->mi_row;
1322   const int mi_col = xd->mi_col;
1323   int mode_index_start, mode_index_end;
1324   // Modify the start and end index according to speed features. For example,
1325   // if SIMPLE_TRANSLATION has already been searched according to
1326   // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1327   // to avoid searching it again.
1328   update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1329                               last_motion_mode_allowed, interintra_allowed,
1330                               eval_motion_mode);
1331   // Main function loop. This loops over all of the possible motion modes and
1332   // computes RD to determine the best one. This process includes computing
1333   // any necessary side information for the motion mode and performing the
1334   // transform search.
1335   for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1336        mode_index++) {
1337     if (args->skip_motion_mode && mode_index) continue;
1338     int tmp_rate2 = rate2_nocoeff;
1339     const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1340     int tmp_rate_mv = rate_mv0;
1341 
1342     *mbmi = base_mbmi;
1343     if (is_interintra_mode) {
1344       // Only use SIMPLE_TRANSLATION for interintra
1345       mbmi->motion_mode = SIMPLE_TRANSLATION;
1346     } else {
1347       mbmi->motion_mode = (MOTION_MODE)mode_index;
1348       assert(mbmi->ref_frame[1] != INTRA_FRAME);
1349     }
1350 
1351     // Do not search OBMC if the probability of selecting it is below a
1352     // predetermined threshold for this update_type and block size.
1353     const FRAME_UPDATE_TYPE update_type =
1354         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1355     int use_actual_frame_probs = 1;
1356     int prune_obmc;
1357 #if CONFIG_FPMT_TEST
1358     use_actual_frame_probs =
1359         (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1360     if (!use_actual_frame_probs) {
1361       prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1362                    cpi->sf.inter_sf.prune_obmc_prob_thresh;
1363     }
1364 #endif
1365     if (use_actual_frame_probs) {
1366       prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1367                    cpi->sf.inter_sf.prune_obmc_prob_thresh;
1368     }
1369     if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1370         mbmi->motion_mode == OBMC_CAUSAL)
1371       continue;
1372 
1373     if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1374       // SIMPLE_TRANSLATION mode: no need to recalculate.
1375       // The prediction is calculated before motion_mode_rd() is called in
1376       // handle_inter_mode()
1377     } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1378       const uint32_t cur_mv = mbmi->mv[0].as_int;
1379       // OBMC_CAUSAL not allowed for compound prediction
1380       assert(!is_comp_pred);
1381       if (have_newmv_in_inter_mode(this_mode)) {
1382         av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1383                                  &mbmi->mv[0], NULL);
1384         tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1385       }
1386       if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1387         // Build the predictor according to the current motion vector if it has
1388         // not already been built
1389         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1390                                       0, av1_num_planes(cm) - 1);
1391       }
1392       // Build the inter predictor by blending the predictor corresponding to
1393       // this MV, and the neighboring blocks using the OBMC model
1394       av1_build_obmc_inter_prediction(
1395           cm, xd, args->above_pred_buf, args->above_pred_stride,
1396           args->left_pred_buf, args->left_pred_stride);
1397 #if !CONFIG_REALTIME_ONLY
1398     } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1399       int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1400       mbmi->motion_mode = WARPED_CAUSAL;
1401       mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1402       mbmi->interp_filters =
1403           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1404 
1405       memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1406       memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1407       // Select the samples according to motion vector difference
1408       if (mbmi->num_proj_ref > 1) {
1409         mbmi->num_proj_ref = av1_selectSamples(
1410             &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1411       }
1412 
1413       // Compute the warped motion parameters with a least squares fit
1414       //  using the collected samples
1415       if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1416                                mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1417                                &mbmi->wm_params, mi_row, mi_col)) {
1418         assert(!is_comp_pred);
1419         if (have_newmv_in_inter_mode(this_mode)) {
1420           // Refine MV for NEWMV mode
1421           const int_mv mv0 = mbmi->mv[0];
1422           const WarpedMotionParams wm_params0 = mbmi->wm_params;
1423           const int num_proj_ref0 = mbmi->num_proj_ref;
1424 
1425           const int_mv ref_mv = av1_get_ref_mv(x, 0);
1426           SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1427           av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1428                                             &ref_mv.as_mv, NULL);
1429 
1430           // Refine MV in a small range.
1431           av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1432                                total_samples);
1433 
1434           if (mv0.as_int != mbmi->mv[0].as_int) {
1435             // Keep the refined MV and WM parameters.
1436             tmp_rate_mv = av1_mv_bit_cost(
1437                 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1438                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1439             tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1440           } else {
1441             // Restore the old MV and WM parameters.
1442             mbmi->mv[0] = mv0;
1443             mbmi->wm_params = wm_params0;
1444             mbmi->num_proj_ref = num_proj_ref0;
1445           }
1446         }
1447 
1448         // Build the warped predictor
1449         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1450                                       av1_num_planes(cm) - 1);
1451       } else {
1452         continue;
1453       }
1454 #endif  // !CONFIG_REALTIME_ONLY
1455     } else if (is_interintra_mode) {
1456       const int ret =
1457           av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1458                                       &tmp_rate_mv, &tmp_rate2, orig_dst);
1459       if (ret < 0) continue;
1460     }
1461 
1462     // If we are searching newmv and the mv is the same as refmv, skip the
1463     // current mode
1464     if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1465 
1466     // Update rd_stats for the current motion mode
1467     txfm_info->skip_txfm = 0;
1468     rd_stats->dist = 0;
1469     rd_stats->sse = 0;
1470     rd_stats->skip_txfm = 1;
1471     rd_stats->rate = tmp_rate2;
1472     const ModeCosts *mode_costs = &x->mode_costs;
1473     if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1474     if (interintra_allowed) {
1475       rd_stats->rate +=
1476           mode_costs->interintra_cost[size_group_lookup[bsize]]
1477                                      [mbmi->ref_frame[1] == INTRA_FRAME];
1478     }
1479     if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1480         (mbmi->ref_frame[1] != INTRA_FRAME)) {
1481       if (last_motion_mode_allowed == WARPED_CAUSAL) {
1482         rd_stats->rate +=
1483             mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1484       } else {
1485         rd_stats->rate +=
1486             mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1487       }
1488     }
1489 
1490     int64_t this_yrd = INT64_MAX;
1491 
1492     if (!do_tx_search) {
1493       // Avoid doing a transform search here to speed up the overall mode
1494       // search. It will be done later in the mode search if the current
1495       // motion mode seems promising.
1496       int64_t curr_sse = -1;
1497       int64_t sse_y = -1;
1498       int est_residue_cost = 0;
1499       int64_t est_dist = 0;
1500       int64_t est_rd = 0;
1501       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1502         curr_sse = get_sse(cpi, x, &sse_y);
1503         const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1504                                                  &est_residue_cost, &est_dist);
1505         (void)has_est_rd;
1506         assert(has_est_rd);
1507       } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1508                  cpi->sf.rt_sf.use_nonrd_pick_mode) {
1509         model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1510             cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1511             NULL, &curr_sse, NULL, NULL, NULL);
1512         sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1513       }
1514       est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1515       if (est_rd * 0.80 > *best_est_rd) {
1516         mbmi->ref_frame[1] = ref_frame_1;
1517         continue;
1518       }
1519       const int mode_rate = rd_stats->rate;
1520       rd_stats->rate += est_residue_cost;
1521       rd_stats->dist = est_dist;
1522       rd_stats->rdcost = est_rd;
1523       if (rd_stats->rdcost < *best_est_rd) {
1524         *best_est_rd = rd_stats->rdcost;
1525         assert(sse_y >= 0);
1526         ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
1527                              ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1528                              : INT64_MAX;
1529       }
1530       if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1531         if (!is_comp_pred) {
1532           assert(curr_sse >= 0);
1533           inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1534                                 rd_stats->rdcost, rd_stats, rd_stats_y,
1535                                 rd_stats_uv, mbmi);
1536         }
1537       } else {
1538         assert(curr_sse >= 0);
1539         inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1540                               rd_stats->rdcost, rd_stats, rd_stats_y,
1541                               rd_stats_uv, mbmi);
1542       }
1543       mbmi->skip_txfm = 0;
1544     } else {
1545       // Perform full transform search
1546       int64_t skip_rd = INT64_MAX;
1547       int64_t skip_rdy = INT64_MAX;
1548       if (cpi->sf.inter_sf.txfm_rd_gate_level) {
1549         // Check if the mode is good enough based on skip RD
1550         int64_t sse_y = INT64_MAX;
1551         int64_t curr_sse = get_sse(cpi, x, &sse_y);
1552         skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1553         skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1554         int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1555                                         cpi->sf.inter_sf.txfm_rd_gate_level, 0);
1556         if (!eval_txfm) continue;
1557       }
1558 
1559       // Do transform search
1560       const int mode_rate = rd_stats->rate;
1561       if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1562                            rd_stats->rate, ref_best_rd)) {
1563         if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1564           return INT64_MAX;
1565         }
1566         continue;
1567       }
1568       const int skip_ctx = av1_get_skip_txfm_context(xd);
1569       const int y_rate =
1570           rd_stats->skip_txfm
1571               ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1572               : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1573       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1574 
1575       const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1576       if (curr_rd < ref_best_rd) {
1577         ref_best_rd = curr_rd;
1578         ref_skip_rd[0] = skip_rd;
1579         ref_skip_rd[1] = skip_rdy;
1580       }
1581       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1582         inter_mode_data_push(
1583             tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1584             rd_stats_y->rate + rd_stats_uv->rate +
1585                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1586       }
1587     }
1588 
1589     if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1590       if (is_nontrans_global_motion(xd, xd->mi[0])) {
1591         mbmi->interp_filters =
1592             av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1593       }
1594     }
1595 
1596     const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1597     if (mode_index == 0) {
1598       args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1599     }
1600     if (mode_index == 0 || tmp_rd < best_rd) {
1601       // Update best_rd data if this is the best motion mode so far
1602       best_mbmi = *mbmi;
1603       best_rd = tmp_rd;
1604       best_rd_stats = *rd_stats;
1605       best_rd_stats_y = *rd_stats_y;
1606       best_rate_mv = tmp_rate_mv;
1607       *yrd = this_yrd;
1608       if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1609       memcpy(best_blk_skip, txfm_info->blk_skip,
1610              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1611       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1612       best_xskip_txfm = mbmi->skip_txfm;
1613     }
1614   }
1615   // Update RD and mbmi stats for selected motion mode
1616   mbmi->ref_frame[1] = ref_frame_1;
1617   *rate_mv = best_rate_mv;
1618   if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1619     av1_invalid_rd_stats(rd_stats);
1620     restore_dst_buf(xd, *orig_dst, num_planes);
1621     return INT64_MAX;
1622   }
1623   *mbmi = best_mbmi;
1624   *rd_stats = best_rd_stats;
1625   *rd_stats_y = best_rd_stats_y;
1626   if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1627   memcpy(txfm_info->blk_skip, best_blk_skip,
1628          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1629   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1630   txfm_info->skip_txfm = best_xskip_txfm;
1631 
1632   restore_dst_buf(xd, *orig_dst, num_planes);
1633   return 0;
1634 }
1635 
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst)1636 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1637                             MACROBLOCK *const x, BLOCK_SIZE bsize,
1638                             const BUFFER_SET *const orig_dst) {
1639   assert(bsize < BLOCK_SIZES_ALL);
1640   const AV1_COMMON *cm = &cpi->common;
1641   const int num_planes = av1_num_planes(cm);
1642   MACROBLOCKD *const xd = &x->e_mbd;
1643   const int mi_row = xd->mi_row;
1644   const int mi_col = xd->mi_col;
1645   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 0,
1646                                 av1_num_planes(cm) - 1);
1647 
1648   int64_t total_sse = 0;
1649   for (int plane = 0; plane < num_planes; ++plane) {
1650     const struct macroblock_plane *const p = &x->plane[plane];
1651     const struct macroblockd_plane *const pd = &xd->plane[plane];
1652     const BLOCK_SIZE plane_bsize =
1653         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1654     const int bw = block_size_wide[plane_bsize];
1655     const int bh = block_size_high[plane_bsize];
1656 
1657     av1_subtract_plane(x, plane_bsize, plane);
1658     int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh) << 4;
1659     sse >>= ((cpi->frame_info.bit_depth - 8) * 2);
1660     total_sse += sse;
1661   }
1662   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1663   rd_stats->dist = rd_stats->sse = total_sse;
1664   rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1665   rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1666 
1667   restore_dst_buf(xd, *orig_dst, num_planes);
1668   return 0;
1669 }
1670 
1671 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1672 // mode
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1673 static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1674                                       int ref_idx,
1675                                       const MV_REFERENCE_FRAME *ref_frame,
1676                                       PREDICTION_MODE single_mode) {
1677   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1678   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1679   assert(single_mode != NEWMV);
1680   if (single_mode == NEARESTMV) {
1681     return 0;
1682   } else if (single_mode == NEARMV) {
1683     // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1684     // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1685     if (ref_mv_count < 2) return 1;
1686   } else if (single_mode == GLOBALMV) {
1687     // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1688     if (ref_mv_count == 0) return 1;
1689     // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1690     else if (ref_mv_count == 1)
1691       return 0;
1692 
1693     int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1694     // Check GLOBALMV is matching with any mv in ref_mv_stack
1695     for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1696       int_mv this_mv;
1697 
1698       if (ref_idx == 0)
1699         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1700       else
1701         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1702 
1703       if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1704         return 1;
1705     }
1706   }
1707   return 0;
1708 }
1709 
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1710 static INLINE int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1711                               int ref_idx, int ref_mv_idx,
1712                               int skip_repeated_ref_mv,
1713                               const MV_REFERENCE_FRAME *ref_frame,
1714                               const MB_MODE_INFO_EXT *mbmi_ext) {
1715   const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1716   assert(is_inter_singleref_mode(single_mode));
1717   if (single_mode == NEWMV) {
1718     this_mv->as_int = INVALID_MV;
1719   } else if (single_mode == GLOBALMV) {
1720     if (skip_repeated_ref_mv &&
1721         check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1722       return 0;
1723     *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1724   } else {
1725     assert(single_mode == NEARMV || single_mode == NEARESTMV);
1726     const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1727     const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1728     if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1729       assert(ref_mv_offset >= 0);
1730       if (ref_idx == 0) {
1731         *this_mv =
1732             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1733       } else {
1734         *this_mv =
1735             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1736       }
1737     } else {
1738       if (skip_repeated_ref_mv &&
1739           check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1740         return 0;
1741       *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1742     }
1743   }
1744   return 1;
1745 }
1746 
1747 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1748 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type)1749 static INLINE int skip_nearest_near_mv_using_refmv_weight(
1750     const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1751     const int8_t ref_frame_type) {
1752   if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1753 
1754   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1755   const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1756   const int ref_mv_count =
1757       AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1758 
1759   if (ref_mv_count == 0) return 0;
1760   // If ref mv list has at least one nearest candidate do not prune NEARESTMV
1761   if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1762 
1763   // Count number of ref mvs populated from nearest candidates
1764   int nearest_refmv_count = 0;
1765   for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1766     if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1767   }
1768 
1769   // nearest_refmv_count indicates the closeness of block motion characteristics
1770   // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1771   // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1772   // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1773   // mode since these modes work well for blocks that shares similar motion
1774   // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1775   // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1776   // mode is pruned if none of the ref mvs are populated from nearest candidate.
1777   const int prune_thresh = 1 + (ref_mv_count >= 2);
1778   if (nearest_refmv_count < prune_thresh) return 1;
1779   return 0;
1780 }
1781 
1782 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1783 static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1784                                const AV1_COMMON *cm, const MACROBLOCK *x,
1785                                int skip_repeated_ref_mv) {
1786   const MACROBLOCKD *xd = &x->e_mbd;
1787   const MB_MODE_INFO *mbmi = xd->mi[0];
1788   const int is_comp_pred = has_second_ref(mbmi);
1789 
1790   int ret = 1;
1791   for (int i = 0; i < is_comp_pred + 1; ++i) {
1792     int_mv this_mv;
1793     this_mv.as_int = INVALID_MV;
1794     ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1795                       skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1796     if (!ret) return 0;
1797     const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1798     if (single_mode == NEWMV) {
1799       const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1800       cur_mv[i] =
1801           (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1802                          .this_mv
1803                    : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1804                          .comp_mv;
1805     } else {
1806       ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1807     }
1808   }
1809   return ret;
1810 }
1811 
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1812 static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
1813                                const MB_MODE_INFO_EXT *mbmi_ext,
1814                                const int (*const drl_mode_cost0)[2],
1815                                int8_t ref_frame_type) {
1816   int cost = 0;
1817   if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1818     for (int idx = 0; idx < 2; ++idx) {
1819       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1820         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1821         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1822         if (mbmi->ref_mv_idx == idx) return cost;
1823       }
1824     }
1825     return cost;
1826   }
1827 
1828   if (have_nearmv_in_inter_mode(mbmi->mode)) {
1829     for (int idx = 1; idx < 3; ++idx) {
1830       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1831         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1832         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1833         if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1834       }
1835     }
1836     return cost;
1837   }
1838   return cost;
1839 }
1840 
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1841 static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
1842                                         const MB_MODE_INFO *const mbmi,
1843                                         PREDICTION_MODE this_mode) {
1844   for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1845     const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1846     const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1847     if (single_mode == NEWMV &&
1848         args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1849       return 0;
1850     }
1851   }
1852   return 1;
1853 }
1854 
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1855 static int get_drl_refmv_count(const MACROBLOCK *const x,
1856                                const MV_REFERENCE_FRAME *ref_frame,
1857                                PREDICTION_MODE mode) {
1858   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1859   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1860   const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1861   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1862   const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1863   const int has_drl =
1864       (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1865   const int ref_set =
1866       has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1867 
1868   return ref_set;
1869 }
1870 
1871 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1872 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1873                                          const int qindex,
1874                                          const int ref_mv_idx) {
1875   if (reduce_inter_modes >= 3) return 1;
1876   // Q-index logic based pruning is enabled only for
1877   // reduce_inter_modes = 2.
1878   assert(reduce_inter_modes == 2);
1879   // When reduce_inter_modes=2, pruning happens as below based on q index.
1880   // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1881   // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1882   // For q index range between 171 and 255: no pruning.
1883   const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1884   return (ref_mv_idx >= min_prune_ref_mv_idx);
1885 }
1886 
1887 // Whether this reference motion vector can be skipped, based on initial
1888 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1889 static bool ref_mv_idx_early_breakout(
1890     const SPEED_FEATURES *const sf,
1891     const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1892     const HandleInterModeArgs *const args, int64_t ref_best_rd,
1893     int ref_mv_idx) {
1894   MACROBLOCKD *xd = &x->e_mbd;
1895   MB_MODE_INFO *mbmi = xd->mi[0];
1896   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1897   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1898   const int is_comp_pred = has_second_ref(mbmi);
1899   if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1900     if (mbmi->ref_frame[0] == LAST2_FRAME ||
1901         mbmi->ref_frame[0] == LAST3_FRAME ||
1902         mbmi->ref_frame[1] == LAST2_FRAME ||
1903         mbmi->ref_frame[1] == LAST3_FRAME) {
1904       const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1905       if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1906           REF_CAT_LEVEL) {
1907         return true;
1908       }
1909     }
1910     // TODO(any): Experiment with reduce_inter_modes for compound prediction
1911     if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1912         have_newmv_in_inter_mode(mbmi->mode)) {
1913       if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1914           mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1915         const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1916         const int do_prune = prune_ref_mv_idx_using_qindex(
1917             sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1918         if (do_prune &&
1919             (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1920              REF_CAT_LEVEL)) {
1921           return true;
1922         }
1923       }
1924     }
1925   }
1926 
1927   mbmi->ref_mv_idx = ref_mv_idx;
1928   if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1929     return true;
1930   }
1931   size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1932   const int drl_cost = get_drl_cost(
1933       mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1934   est_rd_rate += drl_cost;
1935   if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1936       mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1937     return true;
1938   }
1939   return false;
1940 }
1941 
1942 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1943 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1944                                           RD_STATS *rd_stats,
1945                                           HandleInterModeArgs *args,
1946                                           int ref_mv_idx, int64_t ref_best_rd,
1947                                           BLOCK_SIZE bsize) {
1948   MACROBLOCKD *xd = &x->e_mbd;
1949   MB_MODE_INFO *mbmi = xd->mi[0];
1950   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1951   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1952   const AV1_COMMON *cm = &cpi->common;
1953   const int is_comp_pred = has_second_ref(mbmi);
1954   const ModeCosts *mode_costs = &x->mode_costs;
1955 
1956   struct macroblockd_plane *p = xd->plane;
1957   const BUFFER_SET orig_dst = {
1958     { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1959     { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1960   };
1961   av1_init_rd_stats(rd_stats);
1962 
1963   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1964   mbmi->comp_group_idx = 0;
1965   mbmi->compound_idx = 1;
1966   if (mbmi->ref_frame[1] == INTRA_FRAME) {
1967     mbmi->ref_frame[1] = NONE_FRAME;
1968   }
1969   int16_t mode_ctx =
1970       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1971 
1972   mbmi->num_proj_ref = 0;
1973   mbmi->motion_mode = SIMPLE_TRANSLATION;
1974   mbmi->ref_mv_idx = ref_mv_idx;
1975 
1976   rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1977   const int drl_cost =
1978       get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
1979   rd_stats->rate += drl_cost;
1980 
1981   int_mv cur_mv[2];
1982   if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
1983     return INT64_MAX;
1984   }
1985   assert(have_nearmv_in_inter_mode(mbmi->mode));
1986   for (int i = 0; i < is_comp_pred + 1; ++i) {
1987     mbmi->mv[i].as_int = cur_mv[i].as_int;
1988   }
1989   const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
1990   rd_stats->rate += ref_mv_cost;
1991 
1992   if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
1993     return INT64_MAX;
1994   }
1995 
1996   mbmi->motion_mode = SIMPLE_TRANSLATION;
1997   mbmi->num_proj_ref = 0;
1998   if (is_comp_pred) {
1999     // Only compound_average
2000     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2001     mbmi->comp_group_idx = 0;
2002     mbmi->compound_idx = 1;
2003   }
2004   set_default_interp_filters(mbmi, cm->features.interp_filter);
2005 
2006   const int mi_row = xd->mi_row;
2007   const int mi_col = xd->mi_col;
2008   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2009                                 AOM_PLANE_Y, AOM_PLANE_Y);
2010   int est_rate;
2011   int64_t est_dist;
2012   model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2013                                   NULL, NULL, NULL, NULL, NULL);
2014   return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2015 }
2016 
2017 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2018 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2019 // it is included.
mask_set_bit(int * mask,int index)2020 static INLINE void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2021 
mask_check_bit(int mask,int index)2022 static INLINE bool mask_check_bit(int mask, int index) {
2023   return (mask >> index) & 0x1;
2024 }
2025 
2026 // Before performing the full MV search in handle_inter_mode, do a simple
2027 // translation search and see if we can eliminate any motion vectors.
2028 // Returns an integer where, if the i-th bit is set, it means that the i-th
2029 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2030 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2031                                 RD_STATS *rd_stats,
2032                                 HandleInterModeArgs *const args,
2033                                 int64_t ref_best_rd, BLOCK_SIZE bsize,
2034                                 const int ref_set) {
2035   AV1_COMMON *const cm = &cpi->common;
2036   const MACROBLOCKD *const xd = &x->e_mbd;
2037   const MB_MODE_INFO *const mbmi = xd->mi[0];
2038   const PREDICTION_MODE this_mode = mbmi->mode;
2039 
2040   // Only search indices if they have some chance of being good.
2041   int good_indices = 0;
2042   for (int i = 0; i < ref_set; ++i) {
2043     if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2044                                   ref_best_rd, i)) {
2045       continue;
2046     }
2047     mask_set_bit(&good_indices, i);
2048   }
2049 
2050   // Only prune in NEARMV mode, if the speed feature is set, and the block size
2051   // is large enough. If these conditions are not met, return all good indices
2052   // found so far.
2053   if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2054     return good_indices;
2055   if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2056   if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2057   // Do not prune when there is internal resizing. TODO(elliottk) fix this
2058   // so b/2384 can be resolved.
2059   if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2060       (mbmi->ref_frame[1] > 0 &&
2061        av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2062     return good_indices;
2063   }
2064 
2065   // Calculate the RD cost for the motion vectors using simple translation.
2066   int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2067   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2068     // If this index is bad, ignore it.
2069     if (!mask_check_bit(good_indices, ref_mv_idx)) {
2070       continue;
2071     }
2072     idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2073         cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2074   }
2075   // Find the index with the best RD cost.
2076   int best_idx = 0;
2077   for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2078     if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2079       best_idx = i;
2080     }
2081   }
2082   // Only include indices that are good and within a % of the best.
2083   const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2084   // If the simple translation cost is not within this multiple of the
2085   // best RD, skip it. Note that the cutoff is derived experimentally.
2086   const double ref_dth = 5;
2087   int result = 0;
2088   for (int i = 0; i < ref_set; ++i) {
2089     if (mask_check_bit(good_indices, i) &&
2090         (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2091         (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2092       mask_set_bit(&result, i);
2093     }
2094   }
2095   return result;
2096 }
2097 
2098 /*!\brief Motion mode information for inter mode search speedup.
2099  *
2100  * Used in a speed feature to search motion modes other than
2101  * SIMPLE_TRANSLATION only on winning candidates.
2102  */
2103 typedef struct motion_mode_candidate {
2104   /*!
2105    * Mode info for the motion mode candidate.
2106    */
2107   MB_MODE_INFO mbmi;
2108   /*!
2109    * Rate describing the cost of the motion vectors for this candidate.
2110    */
2111   int rate_mv;
2112   /*!
2113    * Rate before motion mode search and transform coding is applied.
2114    */
2115   int rate2_nocoeff;
2116   /*!
2117    * An integer value 0 or 1 which indicates whether or not to skip the motion
2118    * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2119    * candidate.
2120    */
2121   int skip_motion_mode;
2122   /*!
2123    * Total RD cost for this candidate.
2124    */
2125   int64_t rd_cost;
2126 } motion_mode_candidate;
2127 
2128 /*!\cond */
2129 typedef struct motion_mode_best_st_candidate {
2130   motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2131   int num_motion_mode_cand;
2132 } motion_mode_best_st_candidate;
2133 
2134 // Checks if the current reference frame matches with neighbouring block's
2135 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2136 static AOM_INLINE int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2137                                                    MB_MODE_INFO *nb_mbmi) {
2138   MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2139                                           nb_mbmi->ref_frame[1] };
2140   MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2141                                            cur_mbmi->ref_frame[1] };
2142   const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2143   int match_found = 0;
2144 
2145   for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2146     if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2147         (cur_ref_frames[i] == nb_ref_frames[1]))
2148       match_found = 1;
2149   }
2150   return match_found;
2151 }
2152 
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2153 static AOM_INLINE int find_ref_match_in_above_nbs(const int total_mi_cols,
2154                                                   MACROBLOCKD *xd) {
2155   if (!xd->up_available) return 1;
2156   const int mi_col = xd->mi_col;
2157   MB_MODE_INFO **cur_mbmi = xd->mi;
2158   // prev_row_mi points into the mi array, starting at the beginning of the
2159   // previous row.
2160   MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2161   const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2162   uint8_t mi_step;
2163   for (int above_mi_col = mi_col; above_mi_col < end_col;
2164        above_mi_col += mi_step) {
2165     MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2166     mi_step = mi_size_wide[above_mi[0]->bsize];
2167     int match_found = 0;
2168     if (is_inter_block(*above_mi))
2169       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2170     if (match_found) return 1;
2171   }
2172   return 0;
2173 }
2174 
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2175 static AOM_INLINE int find_ref_match_in_left_nbs(const int total_mi_rows,
2176                                                  MACROBLOCKD *xd) {
2177   if (!xd->left_available) return 1;
2178   const int mi_row = xd->mi_row;
2179   MB_MODE_INFO **cur_mbmi = xd->mi;
2180   // prev_col_mi points into the mi array, starting at the top of the
2181   // previous column
2182   MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2183   const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2184   uint8_t mi_step;
2185   for (int left_mi_row = mi_row; left_mi_row < end_row;
2186        left_mi_row += mi_step) {
2187     MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2188     mi_step = mi_size_high[left_mi[0]->bsize];
2189     int match_found = 0;
2190     if (is_inter_block(*left_mi))
2191       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2192     if (match_found) return 1;
2193   }
2194   return 0;
2195 }
2196 /*!\endcond */
2197 
2198 /*! \brief Struct used to hold TPL data to
2199  * narrow down parts of the inter mode search.
2200  */
2201 typedef struct {
2202   /*!
2203    * The best inter cost out of all of the reference frames.
2204    */
2205   int64_t best_inter_cost;
2206   /*!
2207    * The inter cost for each reference frame.
2208    */
2209   int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2210 } PruneInfoFromTpl;
2211 
2212 #if !CONFIG_REALTIME_ONLY
2213 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2214 static AOM_INLINE void get_block_level_tpl_stats(
2215     AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2216     PruneInfoFromTpl *inter_cost_info_from_tpl) {
2217   AV1_COMMON *const cm = &cpi->common;
2218 
2219   assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2220                  cpi->gf_frame_index < cpi->ppi->gf_group.size));
2221   const int tpl_idx = cpi->gf_frame_index;
2222   TplParams *const tpl_data = &cpi->ppi->tpl_data;
2223   if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2224   const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2225   const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2226   const int mi_wide = mi_size_wide[bsize];
2227   const int mi_high = mi_size_high[bsize];
2228   const int tpl_stride = tpl_frame->stride;
2229   const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2230   const int mi_col_sr =
2231       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2232   const int mi_col_end_sr =
2233       coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2234   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2235 
2236   const int row_step = step;
2237   const int col_step_sr =
2238       coded_to_superres_mi(step, cm->superres_scale_denominator);
2239   for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2240        row += row_step) {
2241     for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2242          col += col_step_sr) {
2243       const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2244           row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2245 
2246       // Sums up the inter cost of corresponding ref frames
2247       for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2248         inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2249             this_stats->pred_error[ref_idx];
2250       }
2251     }
2252   }
2253 
2254   // Computes the best inter cost (minimum inter_cost)
2255   int64_t best_inter_cost = INT64_MAX;
2256   for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2257     const int64_t cur_inter_cost =
2258         inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2259     // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2260     // calculating the minimum inter_cost
2261     if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2262         valid_refs[ref_idx])
2263       best_inter_cost = cur_inter_cost;
2264   }
2265   inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2266 }
2267 #endif
2268 
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2269 static AOM_INLINE int prune_modes_based_on_tpl_stats(
2270     PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2271     const PREDICTION_MODE this_mode, int prune_mode_level) {
2272   const int have_newmv = have_newmv_in_inter_mode(this_mode);
2273   if ((prune_mode_level < 2) && have_newmv) return 0;
2274 
2275   const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2276   if (best_inter_cost == INT64_MAX) return 0;
2277 
2278   const int prune_level = prune_mode_level - 1;
2279   int64_t cur_inter_cost;
2280 
2281   const int is_globalmv =
2282       (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2283   const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2284 
2285   // Thresholds used for pruning:
2286   // Lower value indicates aggressive pruning and higher value indicates
2287   // conservative pruning which is set based on ref_mv_idx and speed feature.
2288   // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2289   // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2290   static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2291     { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2292   };
2293 
2294   const int is_comp_pred = (refs[1] > INTRA_FRAME);
2295   if (!is_comp_pred) {
2296     cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2297   } else {
2298     const int64_t inter_cost_ref0 =
2299         inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2300     const int64_t inter_cost_ref1 =
2301         inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2302     // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2303     // more aggressive pruning
2304     cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2305   }
2306 
2307   // Prune the mode if cur_inter_cost is greater than threshold times
2308   // best_inter_cost
2309   if (cur_inter_cost >
2310       ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2311         best_inter_cost) >>
2312        2))
2313     return 1;
2314   return 0;
2315 }
2316 
2317 /*!\brief High level function to select parameters for compound mode.
2318  *
2319  * \ingroup inter_mode_search
2320  * The main search functionality is done in the call to av1_compound_type_rd().
2321  *
2322  * \param[in]     cpi               Top-level encoder structure.
2323  * \param[in]     x                 Pointer to struct holding all the data for
2324  *                                  the current macroblock.
2325  * \param[in]     args              HandleInterModeArgs struct holding
2326  *                                  miscellaneous arguments for inter mode
2327  *                                  search. See the documentation for this
2328  *                                  struct for a description of each member.
2329  * \param[in]     ref_best_rd       Best RD found so far for this block.
2330  *                                  It is used for early termination of this
2331  *                                  search if the RD exceeds this value.
2332  * \param[in,out] cur_mv            Current motion vector.
2333  * \param[in]     bsize             Current block size.
2334  * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2335                                     compound mode.
2336  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2337  *                                  allocated buffers for the compound
2338  *                                  predictors and masks in the compound type
2339  *                                  search.
2340  * \param[in,out] orig_dst          A prediction buffer to hold a computed
2341  *                                  prediction. This will eventually hold the
2342  *                                  final prediction, and the tmp_dst info will
2343  *                                  be copied here.
2344  * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2345  *                                  computed prediction.
2346  * \param[in,out] rate_mv           The rate associated with the motion vectors.
2347  *                                  This will be modified if a motion search is
2348  *                                  done in the motion mode search.
2349  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2350  *                                  information.
2351  * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2352  *                                  best total RD for a skip mode so far, and
2353  *                                  skip_rd[1] is the best RD for a skip mode so
2354  *                                  far in luma. This is used as a speed feature
2355  *                                  to skip the transform search if the computed
2356  *                                  skip RD for the current mode is not better
2357  *                                  than the best skip_rd so far.
2358  * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2359  *                                  predictor. If this is 0, the inter predictor
2360  *                                  has already been built and thus we can avoid
2361  *                                  repeating computation.
2362  * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2363  * a viable candidate.
2364  */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2365 static int process_compound_inter_mode(
2366     AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2367     int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2368     int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2369     const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2370     RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2371   MACROBLOCKD *xd = &x->e_mbd;
2372   MB_MODE_INFO *mbmi = xd->mi[0];
2373   const AV1_COMMON *cm = &cpi->common;
2374   const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2375                                    cm->seq_params->enable_masked_compound;
2376   int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2377                          (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2378 
2379   const int num_planes = av1_num_planes(cm);
2380   const int mi_row = xd->mi_row;
2381   const int mi_col = xd->mi_col;
2382   int is_luma_interp_done = 0;
2383   set_default_interp_filters(mbmi, cm->features.interp_filter);
2384 
2385   int64_t best_rd_compound;
2386   int64_t rd_thresh;
2387   const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2388   const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2389   rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2390                                          comp_type_rd_scale);
2391   // Select compound type and any parameters related to that type
2392   // (for example, the mask parameters if it is a masked mode) and compute
2393   // the RD
2394   *compmode_interinter_cost = av1_compound_type_rd(
2395       cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2396       orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2397       ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2398   if (ref_best_rd < INT64_MAX &&
2399       (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2400           ref_best_rd) {
2401     restore_dst_buf(xd, *orig_dst, num_planes);
2402     return 1;
2403   }
2404 
2405   // Build only uv predictor for COMPOUND_AVERAGE.
2406   // Note there is no need to call av1_enc_build_inter_predictor
2407   // for luma if COMPOUND_AVERAGE is selected because it is the first
2408   // candidate in av1_compound_type_rd, which means it used the dst_buf
2409   // rather than the tmp_buf.
2410   if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2411     if (num_planes > 1) {
2412       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2413                                     AOM_PLANE_U, num_planes - 1);
2414     }
2415     *skip_build_pred = 1;
2416   }
2417   return 0;
2418 }
2419 
2420 // Speed feature to prune out MVs that are similar to previous MVs if they
2421 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2422 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2423                                    int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2424                                    MB_MODE_INFO *mbmi, int pruning_factor) {
2425   int i;
2426   const int is_comp_pred = has_second_ref(mbmi);
2427   const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2428 
2429   // Skip the evaluation if an MV match is found.
2430   if (ref_mv_idx > 0) {
2431     for (int idx = 0; idx < ref_mv_idx; ++idx) {
2432       if (save_mv[idx][0].as_int == INVALID_MV) continue;
2433 
2434       int mv_diff = 0;
2435       for (i = 0; i < 1 + is_comp_pred; ++i) {
2436         mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2437                    abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2438       }
2439 
2440       // If this mode is not the best one, and current MV is similar to
2441       // previous stored MV, terminate this ref_mv_idx evaluation.
2442       if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2443     }
2444   }
2445 
2446   if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2447     for (i = 0; i < is_comp_pred + 1; ++i)
2448       save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2449   }
2450 
2451   return 0;
2452 }
2453 
2454 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2455  *
2456  * \ingroup inter_mode_search
2457  *
2458  * Compares the sse of zero mv and the best sse found in single new_mv. If the
2459  * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2460  * Else returns 0.
2461  *
2462  * Note that the sse of here comes from single_motion_search. So it is
2463  * interpolated with the filter in motion search, not the actual interpolation
2464  * filter used in encoding.
2465  *
2466  * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2467  * \param[in]     x                 Pointer to struct holding all the data for
2468  *                                  the current macroblock.
2469  * \param[in]     bsize             The current block_size.
2470  * \param[in]     args              The args to handle_inter_mode, used to track
2471  *                                  the best SSE.
2472  * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2473  *                                       prune_zero_mv_with_sse value
2474  * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2475  */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args,int prune_zero_mv_with_sse)2476 static AOM_INLINE int prune_zero_mv_with_sse(
2477     const aom_variance_fn_ptr_t *fn_ptr, const MACROBLOCK *x, BLOCK_SIZE bsize,
2478     const HandleInterModeArgs *args, int prune_zero_mv_with_sse) {
2479   const MACROBLOCKD *xd = &x->e_mbd;
2480   const MB_MODE_INFO *mbmi = xd->mi[0];
2481 
2482   const int is_comp_pred = has_second_ref(mbmi);
2483   const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2484 
2485   // Check that the global mv is the same as ZEROMV
2486   assert(mbmi->mv[0].as_int == 0);
2487   assert(IMPLIES(is_comp_pred, mbmi->mv[0].as_int == 0));
2488   assert(xd->global_motion[refs[0]].wmtype == TRANSLATION ||
2489          xd->global_motion[refs[0]].wmtype == IDENTITY);
2490 
2491   // Don't prune if we have invalid data
2492   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2493     assert(mbmi->mv[0].as_int == 0);
2494     if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2495       return 0;
2496     }
2497   }
2498 
2499   // Sum up the sse of ZEROMV and best NEWMV
2500   unsigned int this_sse_sum = 0;
2501   unsigned int best_sse_sum = 0;
2502   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2503     const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2504     const struct macroblockd_plane *pd = xd->plane;
2505     const struct buf_2d *src_buf = &p->src;
2506     const struct buf_2d *ref_buf = &pd->pre[idx];
2507     const uint8_t *src = src_buf->buf;
2508     const uint8_t *ref = ref_buf->buf;
2509     const int src_stride = src_buf->stride;
2510     const int ref_stride = ref_buf->stride;
2511 
2512     unsigned int this_sse;
2513     fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2514     this_sse_sum += this_sse;
2515 
2516     const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2517     best_sse_sum += best_sse;
2518   }
2519 
2520   const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2521   if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2522     return 1;
2523   }
2524 
2525   return 0;
2526 }
2527 
2528 /*!\brief Searches for interpolation filter in realtime mode during winner eval
2529  *
2530  * \ingroup inter_mode_search
2531  *
2532  * Does a simple interpolation filter search during winner mode evaluation. This
2533  * is currently only used by realtime mode as \ref
2534  * av1_interpolation_filter_search is not called during realtime encoding.
2535  *
2536  * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2537  * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2538  * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2539  *  *
2540  * \param[in]     cpi               Pointer to the compressor. Used for feature
2541  *                                  flags.
2542  * \param[in,out] x                 Pointer to macroblock. This is primarily
2543  *                                  used to access the buffers.
2544  * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2545  * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2546  * \param[in]     bsize             The current block_size.
2547  * \return Returns true if a predictor is built in xd->dst, false otherwise.
2548  */
fast_interp_search(const AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize)2549 static AOM_INLINE bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2550                                           int mi_row, int mi_col,
2551                                           BLOCK_SIZE bsize) {
2552   static const InterpFilters filters_ref_set[3] = {
2553     { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2554     { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2555     { MULTITAP_SHARP, MULTITAP_SHARP }
2556   };
2557 
2558   const AV1_COMMON *const cm = &cpi->common;
2559   MACROBLOCKD *const xd = &x->e_mbd;
2560   MB_MODE_INFO *const mi = xd->mi[0];
2561   int64_t best_cost = INT64_MAX;
2562   int best_filter_index = -1;
2563   // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2564   const int num_planes = av1_num_planes(cm);
2565   const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2566   assert(is_inter_mode(mi->mode));
2567   assert(mi->motion_mode == SIMPLE_TRANSLATION);
2568   assert(!is_inter_compound_mode(mi->mode));
2569 
2570   if (!av1_is_interp_needed(xd)) {
2571     return false;
2572   }
2573 
2574   struct macroblockd_plane *pd = xd->plane;
2575   const BUFFER_SET orig_dst = {
2576     { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2577     { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2578   };
2579   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2580   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2581                                  tmp_buf + 2 * MAX_SB_SQUARE },
2582                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2583   const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2584 
2585   for (int i = 0; i < 3; ++i) {
2586     if (is_240p_or_lesser) {
2587       if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2588         continue;
2589       }
2590     } else {
2591       if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2592         continue;
2593       }
2594     }
2595     int64_t cost;
2596     RD_STATS tmp_rd = { 0 };
2597 
2598     mi->interp_filters.as_filters = filters_ref_set[i];
2599     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2600 
2601     model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2602                        ? MODELRD_LEGACY
2603                        : MODELRD_TYPE_INTERP_FILTER](
2604         cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2605         &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2606 
2607     tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2608                                            cm->seq_params->enable_dual_filter);
2609     cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2610     if (cost < best_cost) {
2611       best_filter_index = i;
2612       best_cost = cost;
2613       swap_dst_buf(xd, dst_bufs, num_planes);
2614     }
2615   }
2616   assert(best_filter_index >= 0);
2617 
2618   mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2619 
2620   const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2621 
2622   if (is_best_pred_in_orig) {
2623     swap_dst_buf(xd, dst_bufs, num_planes);
2624   } else {
2625     // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2626     // is_best_pred_in_orig is false, that means the current buffer is the
2627     // original one.
2628     assert(&orig_dst == dst_bufs[0]);
2629     assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2630     const int width = block_size_wide[bsize];
2631     const int height = block_size_high[bsize];
2632 #if CONFIG_AV1_HIGHBITDEPTH
2633     const bool is_hbd = is_cur_buf_hbd(xd);
2634     if (is_hbd) {
2635       aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2636                                tmp_dst.stride[AOM_PLANE_Y],
2637                                CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2638                                orig_dst.stride[AOM_PLANE_Y], width, height);
2639     } else {
2640       aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2641                         orig_dst.plane[AOM_PLANE_Y],
2642                         orig_dst.stride[AOM_PLANE_Y], width, height);
2643     }
2644 #else
2645     aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2646                       orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2647                       width, height);
2648 #endif
2649   }
2650 
2651   // Build the YUV predictor.
2652   if (num_planes > 1) {
2653     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2654                                   AOM_PLANE_U, AOM_PLANE_V);
2655   }
2656 
2657   return true;
2658 }
2659 
2660 /*!\brief AV1 inter mode RD computation
2661  *
2662  * \ingroup inter_mode_search
2663  * Do the RD search for a given inter mode and compute all information relevant
2664  * to the input mode. It will compute the best MV,
2665  * compound parameters (if the mode is a compound mode) and interpolation filter
2666  * parameters.
2667  *
2668  * \param[in]     cpi               Top-level encoder structure.
2669  * \param[in]     tile_data         Pointer to struct holding adaptive
2670  *                                  data/contexts/models for the tile during
2671  *                                  encoding.
2672  * \param[in]     x                 Pointer to structure holding all the data
2673  *                                  for the current macroblock.
2674  * \param[in]     bsize             Current block size.
2675  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2676  *                                  information.
2677  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2678  *                                  for only the Y plane.
2679  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2680  *                                  for only the UV planes.
2681  * \param[in]     args              HandleInterModeArgs struct holding
2682  *                                  miscellaneous arguments for inter mode
2683  *                                  search. See the documentation for this
2684  *                                  struct for a description of each member.
2685  * \param[in]     ref_best_rd       Best RD found so far for this block.
2686  *                                  It is used for early termination of this
2687  *                                  search if the RD exceeds this value.
2688  * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2689  *                                  built in this search.
2690  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2691  *                                  allocated buffers for the compound
2692  *                                  predictors and masks in the compound type
2693  *                                  search.
2694  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2695  *                                  do_tx_search (see below) is 0.
2696  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2697  *                                  a full transform search. This will compute
2698  *                                  an estimated RD for the modes without the
2699  *                                  transform search and later perform the full
2700  *                                  transform search on the best candidates.
2701  * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2702  *                                  information to perform a full transform
2703  *                                  search only on winning candidates searched
2704  *                                  with an estimate for transform coding RD.
2705  * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2706  *                                  motion mode information used in a speed
2707  *                                  feature to search motion modes other than
2708  *                                  SIMPLE_TRANSLATION only on winning
2709  *                                  candidates.
2710  * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
2711  *                                  best total RD for a skip mode so far, and
2712  *                                  skip_rd[1] is the best RD for a skip mode so
2713  *                                  far in luma. This is used as a speed feature
2714  *                                  to skip the transform search if the computed
2715  *                                  skip RD for the current mode is not better
2716  *                                  than the best skip_rd so far.
2717  * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2718  *                                         narrow down the search based on data
2719  *                                         collected in the TPL model.
2720  * \param[out]    yrd               Stores the rdcost corresponding to encoding
2721  *                                  the luma plane.
2722  *
2723  * \return The RD cost for the mode being searched.
2724  */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2725 static int64_t handle_inter_mode(
2726     AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2727     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2728     RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2729     uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2730     int64_t *best_est_rd, const int do_tx_search,
2731     InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2732     int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2733     int64_t *yrd) {
2734   const AV1_COMMON *cm = &cpi->common;
2735   const int num_planes = av1_num_planes(cm);
2736   MACROBLOCKD *xd = &x->e_mbd;
2737   MB_MODE_INFO *mbmi = xd->mi[0];
2738   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2739   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2740   const int is_comp_pred = has_second_ref(mbmi);
2741   const PREDICTION_MODE this_mode = mbmi->mode;
2742 
2743 #if CONFIG_REALTIME_ONLY
2744   const int prune_modes_based_on_tpl = 0;
2745 #else   // CONFIG_REALTIME_ONLY
2746   const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2747   const int prune_modes_based_on_tpl =
2748       cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2749       av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2750 #endif  // CONFIG_REALTIME_ONLY
2751   int i;
2752   // Reference frames for this mode
2753   const int refs[2] = { mbmi->ref_frame[0],
2754                         (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2755   int rate_mv = 0;
2756   int64_t rd = INT64_MAX;
2757   // Do first prediction into the destination buffer. Do the next
2758   // prediction into a temporary buffer. Then keep track of which one
2759   // of these currently holds the best predictor, and use the other
2760   // one for future predictions. In the end, copy from tmp_buf to
2761   // dst if necessary.
2762   struct macroblockd_plane *pd = xd->plane;
2763   const BUFFER_SET orig_dst = {
2764     { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2765     { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2766   };
2767   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2768                                  tmp_buf + 2 * MAX_SB_SQUARE },
2769                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2770 
2771   int64_t ret_val = INT64_MAX;
2772   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2773   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2774   int64_t best_rd = INT64_MAX;
2775   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2776   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2777   int64_t best_yrd = INT64_MAX;
2778   MB_MODE_INFO best_mbmi = *mbmi;
2779   int best_xskip_txfm = 0;
2780   int64_t newmv_ret_val = INT64_MAX;
2781   inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2782 
2783   // Do not prune the mode based on inter cost from tpl if the current ref frame
2784   // is the winner ref in neighbouring blocks.
2785   int ref_match_found_in_above_nb = 0;
2786   int ref_match_found_in_left_nb = 0;
2787   if (prune_modes_based_on_tpl) {
2788     ref_match_found_in_above_nb =
2789         find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2790     ref_match_found_in_left_nb =
2791         find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2792   }
2793 
2794   // First, perform a simple translation search for each of the indices. If
2795   // an index performs well, it will be fully searched in the main loop
2796   // of this function.
2797   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2798   // Save MV results from first 2 ref_mv_idx.
2799   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2800   int best_ref_mv_idx = -1;
2801   const int idx_mask =
2802       ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2803   const int16_t mode_ctx =
2804       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2805   const ModeCosts *mode_costs = &x->mode_costs;
2806   const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2807   const int base_rate =
2808       args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2809 
2810   // As per the experiments, in real-time preset impact of model rd based
2811   // breakouts is less on encoding time if the following conditions are true.
2812   //    (1) compound mode is disabled
2813   //    (2) interpolation filter search is disabled
2814   // TODO(any): Check the impact of model rd based breakouts in other presets
2815   const int skip_interp_search_modelrd_calc =
2816       cpi->oxcf.mode == REALTIME &&
2817       cm->current_frame.reference_mode == SINGLE_REFERENCE &&
2818       (cpi->sf.rt_sf.skip_interp_filter_search ||
2819        cpi->sf.winner_mode_sf.winner_mode_ifs);
2820 
2821   for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2822     save_mv[i][0].as_int = INVALID_MV;
2823     save_mv[i][1].as_int = INVALID_MV;
2824   }
2825   args->start_mv_cnt = 0;
2826 
2827   // Main loop of this function. This will  iterate over all of the ref mvs
2828   // in the dynamic reference list and do the following:
2829   //    1.) Get the current MV. Create newmv MV if necessary
2830   //    2.) Search compound type and parameters if applicable
2831   //    3.) Do interpolation filter search
2832   //    4.) Build the inter predictor
2833   //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2834   //        WARPED_CAUSAL)
2835   //    6.) Update stats if best so far
2836   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2837     mbmi->ref_mv_idx = ref_mv_idx;
2838 
2839     mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2840     mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2841     const int drl_cost = get_drl_cost(
2842         mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2843     mode_info[ref_mv_idx].drl_cost = drl_cost;
2844     mode_info[ref_mv_idx].skip = 0;
2845 
2846     if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2847       // MV did not perform well in simple translation search. Skip it.
2848       continue;
2849     }
2850     if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2851         !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2852       // Skip mode if TPL model indicates it will not be beneficial.
2853       if (prune_modes_based_on_tpl_stats(
2854               inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2855               cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2856         continue;
2857     }
2858     av1_init_rd_stats(rd_stats);
2859 
2860     // Initialize compound mode data
2861     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2862     mbmi->comp_group_idx = 0;
2863     mbmi->compound_idx = 1;
2864     if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2865 
2866     mbmi->num_proj_ref = 0;
2867     mbmi->motion_mode = SIMPLE_TRANSLATION;
2868 
2869     // Compute cost for signalling this DRL index
2870     rd_stats->rate = base_rate;
2871     rd_stats->rate += drl_cost;
2872 
2873     int rs = 0;
2874     int compmode_interinter_cost = 0;
2875 
2876     int_mv cur_mv[2];
2877 
2878     // TODO(Cherma): Extend this speed feature to support compound mode
2879     int skip_repeated_ref_mv =
2880         is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2881     // Generate the current mv according to the prediction mode
2882     if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2883       continue;
2884     }
2885 
2886     // The above call to build_cur_mv does not handle NEWMV modes. Build
2887     // the mv here if we have NEWMV for any predictors.
2888     if (have_newmv_in_inter_mode(this_mode)) {
2889 #if CONFIG_COLLECT_COMPONENT_TIMING
2890       start_timing(cpi, handle_newmv_time);
2891 #endif
2892       newmv_ret_val =
2893           handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2894 #if CONFIG_COLLECT_COMPONENT_TIMING
2895       end_timing(cpi, handle_newmv_time);
2896 #endif
2897 
2898       if (newmv_ret_val != 0) continue;
2899 
2900       if (is_inter_singleref_mode(this_mode) &&
2901           cur_mv[0].as_int != INVALID_MV) {
2902         const MV_REFERENCE_FRAME ref = refs[0];
2903         const unsigned int this_sse = x->pred_sse[ref];
2904         if (this_sse < args->best_single_sse_in_refs[ref]) {
2905           args->best_single_sse_in_refs[ref] = this_sse;
2906         }
2907 
2908         if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2909           const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
2910           const int pix_idx = num_pels_log2_lookup[bsize] - 4;
2911           const double scale_factor[3][11] = {
2912             { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
2913             { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
2914             { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
2915           };
2916           assert(pix_idx >= 0);
2917           assert(th_idx <= 2);
2918           if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
2919             continue;
2920         }
2921       }
2922 
2923       rd_stats->rate += rate_mv;
2924     }
2925     // Copy the motion vector for this mode into mbmi struct
2926     for (i = 0; i < is_comp_pred + 1; ++i) {
2927       mbmi->mv[i].as_int = cur_mv[i].as_int;
2928     }
2929 
2930     if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2931         mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2932       continue;
2933     }
2934 
2935     // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2936     // is enabled, and the current MV is similar to a previous one.
2937     if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2938         prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2939                                 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2940       continue;
2941 
2942     if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2943         cpi->sf.gm_sf.gm_search_type == GM_DISABLE_SEARCH &&
2944         (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2945       if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2946                                  cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2947         continue;
2948       }
2949     }
2950 
2951     int skip_build_pred = 0;
2952     const int mi_row = xd->mi_row;
2953     const int mi_col = xd->mi_col;
2954 
2955     // Handle a compound predictor, continue if it is determined this
2956     // cannot be the best compound mode
2957     if (is_comp_pred) {
2958 #if CONFIG_COLLECT_COMPONENT_TIMING
2959       start_timing(cpi, compound_type_rd_time);
2960 #endif
2961       const int not_best_mode = process_compound_inter_mode(
2962           cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2963           rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2964           &skip_build_pred);
2965 #if CONFIG_COLLECT_COMPONENT_TIMING
2966       end_timing(cpi, compound_type_rd_time);
2967 #endif
2968       if (not_best_mode) continue;
2969     }
2970 
2971     if (!skip_interp_search_modelrd_calc) {
2972 #if CONFIG_COLLECT_COMPONENT_TIMING
2973       start_timing(cpi, interpolation_filter_search_time);
2974 #endif
2975       // Determine the interpolation filter for this mode
2976       ret_val = av1_interpolation_filter_search(
2977           x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2978           &skip_build_pred, args, ref_best_rd);
2979 #if CONFIG_COLLECT_COMPONENT_TIMING
2980       end_timing(cpi, interpolation_filter_search_time);
2981 #endif
2982       if (args->modelled_rd != NULL && !is_comp_pred) {
2983         args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
2984       }
2985       if (ret_val != 0) {
2986         restore_dst_buf(xd, orig_dst, num_planes);
2987         continue;
2988       } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
2989                  ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
2990         restore_dst_buf(xd, orig_dst, num_planes);
2991         continue;
2992       }
2993 
2994       // Compute modelled RD if enabled
2995       if (args->modelled_rd != NULL) {
2996         if (is_comp_pred) {
2997           const int mode0 = compound_ref0_mode(this_mode);
2998           const int mode1 = compound_ref1_mode(this_mode);
2999           const int64_t mrd =
3000               AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3001                      args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3002           if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3003             restore_dst_buf(xd, orig_dst, num_planes);
3004             continue;
3005           }
3006         }
3007       }
3008     }
3009 
3010     rd_stats->rate += compmode_interinter_cost;
3011     if (skip_build_pred != 1) {
3012       // Build this inter predictor if it has not been previously built
3013       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3014                                     av1_num_planes(cm) - 1);
3015     }
3016 
3017 #if CONFIG_COLLECT_COMPONENT_TIMING
3018     start_timing(cpi, motion_mode_rd_time);
3019 #endif
3020     int rate2_nocoeff = rd_stats->rate;
3021     // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3022     // OBMC_CAUSAL or WARPED_CAUSAL
3023     int64_t this_yrd;
3024     ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3025                              rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3026                              &orig_dst, best_est_rd, do_tx_search,
3027                              inter_modes_info, 0, &this_yrd);
3028 #if CONFIG_COLLECT_COMPONENT_TIMING
3029     end_timing(cpi, motion_mode_rd_time);
3030 #endif
3031     assert(
3032         IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3033 
3034     if (ret_val != INT64_MAX) {
3035       int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3036       const THR_MODES mode_enum = get_prediction_mode_idx(
3037           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3038       // Collect mode stats for multiwinner mode processing
3039       store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3040                               rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3041                               cpi->sf.winner_mode_sf.multi_winner_mode_type,
3042                               do_tx_search);
3043       if (tmp_rd < best_rd) {
3044         best_yrd = this_yrd;
3045         // Update the best rd stats if we found the best mode so far
3046         best_rd_stats = *rd_stats;
3047         best_rd_stats_y = *rd_stats_y;
3048         best_rd_stats_uv = *rd_stats_uv;
3049         best_rd = tmp_rd;
3050         best_mbmi = *mbmi;
3051         best_xskip_txfm = txfm_info->skip_txfm;
3052         memcpy(best_blk_skip, txfm_info->blk_skip,
3053                sizeof(best_blk_skip[0]) * xd->height * xd->width);
3054         av1_copy_array(best_tx_type_map, xd->tx_type_map,
3055                        xd->height * xd->width);
3056         motion_mode_cand->rate_mv = rate_mv;
3057         motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3058       }
3059 
3060       if (tmp_rd < ref_best_rd) {
3061         ref_best_rd = tmp_rd;
3062         best_ref_mv_idx = ref_mv_idx;
3063       }
3064     }
3065     restore_dst_buf(xd, orig_dst, num_planes);
3066   }
3067 
3068   if (best_rd == INT64_MAX) return INT64_MAX;
3069 
3070   // re-instate status of the best choice
3071   *rd_stats = best_rd_stats;
3072   *rd_stats_y = best_rd_stats_y;
3073   *rd_stats_uv = best_rd_stats_uv;
3074   *yrd = best_yrd;
3075   *mbmi = best_mbmi;
3076   txfm_info->skip_txfm = best_xskip_txfm;
3077   assert(IMPLIES(mbmi->comp_group_idx == 1,
3078                  mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3079   memcpy(txfm_info->blk_skip, best_blk_skip,
3080          sizeof(best_blk_skip[0]) * xd->height * xd->width);
3081   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3082 
3083   rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3084 
3085   return rd_stats->rdcost;
3086 }
3087 
3088 /*!\brief Search for the best intrabc predictor
3089  *
3090  * \ingroup intra_mode_search
3091  * \callergraph
3092  * This function performs a motion search to find the best intrabc predictor.
3093  *
3094  * \returns Returns the best overall rdcost (including the non-intrabc modes
3095  * search before this function).
3096  */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)3097 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3098                                        PICK_MODE_CONTEXT *ctx,
3099                                        RD_STATS *rd_stats, BLOCK_SIZE bsize,
3100                                        int64_t best_rd) {
3101   const AV1_COMMON *const cm = &cpi->common;
3102   if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3103       cpi->sf.rt_sf.use_nonrd_pick_mode)
3104     return INT64_MAX;
3105   const int num_planes = av1_num_planes(cm);
3106 
3107   MACROBLOCKD *const xd = &x->e_mbd;
3108   const TileInfo *tile = &xd->tile;
3109   MB_MODE_INFO *mbmi = xd->mi[0];
3110   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3111 
3112   const int mi_row = xd->mi_row;
3113   const int mi_col = xd->mi_col;
3114   const int w = block_size_wide[bsize];
3115   const int h = block_size_high[bsize];
3116   const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3117   const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3118 
3119   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3120   const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3121   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3122                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3123                    mbmi_ext->mode_context);
3124   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3125   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3126   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3127   int_mv nearestmv, nearmv;
3128   av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3129                                    0);
3130 
3131   if (nearestmv.as_int == INVALID_MV) {
3132     nearestmv.as_int = 0;
3133   }
3134   if (nearmv.as_int == INVALID_MV) {
3135     nearmv.as_int = 0;
3136   }
3137 
3138   int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3139   if (dv_ref.as_int == 0) {
3140     av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3141   }
3142   // Ref DV should not have sub-pel.
3143   assert((dv_ref.as_mv.col & 7) == 0);
3144   assert((dv_ref.as_mv.row & 7) == 0);
3145   mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3146 
3147   struct buf_2d yv12_mb[MAX_MB_PLANE];
3148   av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3149   for (int i = 0; i < num_planes; ++i) {
3150     xd->plane[i].pre[0] = yv12_mb[i];
3151   }
3152 
3153   enum IntrabcMotionDirection {
3154     IBC_MOTION_ABOVE,
3155     IBC_MOTION_LEFT,
3156     IBC_MOTION_DIRECTIONS
3157   };
3158 
3159   MB_MODE_INFO best_mbmi = *mbmi;
3160   RD_STATS best_rdstats = *rd_stats;
3161   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3162   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3163   av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3164 
3165   FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3166   const search_site_config *lookahead_search_sites =
3167       cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3168   av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3169                                      &dv_ref.as_mv, lookahead_search_sites,
3170                                      /*fine_search_interval=*/0);
3171   const IntraBCMVCosts *const dv_costs = x->dv_costs;
3172   av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3173 
3174   for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3175        dir < IBC_MOTION_DIRECTIONS; ++dir) {
3176     switch (dir) {
3177       case IBC_MOTION_ABOVE:
3178         fullms_params.mv_limits.col_min =
3179             (tile->mi_col_start - mi_col) * MI_SIZE;
3180         fullms_params.mv_limits.col_max =
3181             (tile->mi_col_end - mi_col) * MI_SIZE - w;
3182         fullms_params.mv_limits.row_min =
3183             (tile->mi_row_start - mi_row) * MI_SIZE;
3184         fullms_params.mv_limits.row_max =
3185             (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3186         break;
3187       case IBC_MOTION_LEFT:
3188         fullms_params.mv_limits.col_min =
3189             (tile->mi_col_start - mi_col) * MI_SIZE;
3190         fullms_params.mv_limits.col_max =
3191             (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3192         // TODO(aconverse@google.com): Minimize the overlap between above and
3193         // left areas.
3194         fullms_params.mv_limits.row_min =
3195             (tile->mi_row_start - mi_row) * MI_SIZE;
3196         int bottom_coded_mi_edge =
3197             AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3198         fullms_params.mv_limits.row_max =
3199             (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3200         break;
3201       default: assert(0);
3202     }
3203     assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3204     assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3205     assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3206     assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3207 
3208     av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3209 
3210     if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3211         fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3212       continue;
3213     }
3214 
3215     const int step_param = cpi->mv_search_params.mv_step_param;
3216     const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3217     IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3218     int_mv best_mv, best_hash_mv;
3219 
3220     int bestsme = av1_full_pixel_search(start_mv, &fullms_params, step_param,
3221                                         NULL, &best_mv.as_fullmv, NULL);
3222     const int hashsme = av1_intrabc_hash_search(
3223         cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3224     if (hashsme < bestsme) {
3225       best_mv = best_hash_mv;
3226       bestsme = hashsme;
3227     }
3228 
3229     if (bestsme == INT_MAX) continue;
3230     const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3231     if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3232                                 get_fullmv_from_mv(&dv)))
3233       continue;
3234     if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3235                          cm->seq_params->mib_size_log2))
3236       continue;
3237 
3238     // DV should not have sub-pel.
3239     assert((dv.col & 7) == 0);
3240     assert((dv.row & 7) == 0);
3241     memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3242     mbmi->filter_intra_mode_info.use_filter_intra = 0;
3243     mbmi->use_intrabc = 1;
3244     mbmi->mode = DC_PRED;
3245     mbmi->uv_mode = UV_DC_PRED;
3246     mbmi->motion_mode = SIMPLE_TRANSLATION;
3247     mbmi->mv[0].as_mv = dv;
3248     mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3249     mbmi->skip_txfm = 0;
3250     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3251                                   av1_num_planes(cm) - 1);
3252 
3253     // TODO(aconverse@google.com): The full motion field defining discount
3254     // in MV_COST_WEIGHT is too large. Explore other values.
3255     const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3256                                         dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3257     const int rate_mode = x->mode_costs.intrabc_cost[1];
3258     RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3259     if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3260                          &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3261       continue;
3262     rd_stats_yuv.rdcost =
3263         RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3264     if (rd_stats_yuv.rdcost < best_rd) {
3265       best_rd = rd_stats_yuv.rdcost;
3266       best_mbmi = *mbmi;
3267       best_rdstats = rd_stats_yuv;
3268       memcpy(best_blk_skip, txfm_info->blk_skip,
3269              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3270       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3271     }
3272   }
3273   *mbmi = best_mbmi;
3274   *rd_stats = best_rdstats;
3275   memcpy(txfm_info->blk_skip, best_blk_skip,
3276          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3277   av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3278 #if CONFIG_RD_DEBUG
3279   mbmi->rd_stats = *rd_stats;
3280 #endif
3281   return best_rd;
3282 }
3283 
3284 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3285 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3286 // the typedef will prevent doxygen from finding this function and generating
3287 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3288 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3289 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3290                                struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3291                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3292   const AV1_COMMON *const cm = &cpi->common;
3293   MACROBLOCKD *const xd = &x->e_mbd;
3294   MB_MODE_INFO *const mbmi = xd->mi[0];
3295   const int num_planes = av1_num_planes(cm);
3296   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3297   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3298   uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3299   int64_t dist_y = 0, dist_uv = 0;
3300 
3301   ctx->rd_stats.skip_txfm = 0;
3302   mbmi->ref_frame[0] = INTRA_FRAME;
3303   mbmi->ref_frame[1] = NONE_FRAME;
3304   mbmi->use_intrabc = 0;
3305   mbmi->mv[0].as_int = 0;
3306   mbmi->skip_mode = 0;
3307 
3308   const int64_t intra_yrd =
3309       av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3310                                  &y_skip_txfm, bsize, best_rd, ctx);
3311 
3312   // Initialize default mode evaluation params
3313   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3314 
3315   if (intra_yrd < best_rd) {
3316     // Search intra modes for uv planes if needed
3317     if (num_planes > 1) {
3318       // Set up the tx variables for reproducing the y predictions in case we
3319       // need it for chroma-from-luma.
3320       if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3321         memcpy(txfm_info->blk_skip, ctx->blk_skip,
3322                sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3323         av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3324       }
3325       const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3326       av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3327                                   &dist_uv, &uv_skip_txfm, bsize,
3328                                   max_uv_tx_size);
3329     }
3330 
3331     // Intra block is always coded as non-skip
3332     rd_cost->rate =
3333         rate_y + rate_uv +
3334         x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3335     rd_cost->dist = dist_y + dist_uv;
3336     rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3337     rd_cost->skip_txfm = 0;
3338   } else {
3339     rd_cost->rate = INT_MAX;
3340   }
3341 
3342   if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3343     best_rd = rd_cost->rdcost;
3344   if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3345     ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3346     memcpy(ctx->blk_skip, txfm_info->blk_skip,
3347            sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3348     assert(rd_cost->rate != INT_MAX);
3349   }
3350   if (rd_cost->rate == INT_MAX) return;
3351 
3352   ctx->mic = *xd->mi[0];
3353   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3354                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
3355   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3356 }
3357 
3358 static AOM_INLINE void calc_target_weighted_pred(
3359     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3360     const uint8_t *above, int above_stride, const uint8_t *left,
3361     int left_stride);
3362 
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3363 static AOM_INLINE void rd_pick_skip_mode(
3364     RD_STATS *rd_cost, InterModeSearchState *search_state,
3365     const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3366     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3367   const AV1_COMMON *const cm = &cpi->common;
3368   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3369   const int num_planes = av1_num_planes(cm);
3370   MACROBLOCKD *const xd = &x->e_mbd;
3371   MB_MODE_INFO *const mbmi = xd->mi[0];
3372 
3373   x->compound_idx = 1;  // COMPOUND_AVERAGE
3374   RD_STATS skip_mode_rd_stats;
3375   av1_invalid_rd_stats(&skip_mode_rd_stats);
3376 
3377   if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3378       skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3379     return;
3380   }
3381 
3382   const MV_REFERENCE_FRAME ref_frame =
3383       LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3384   const MV_REFERENCE_FRAME second_ref_frame =
3385       LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3386   const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3387   const THR_MODES mode_index =
3388       get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3389 
3390   if (mode_index == THR_INVALID) {
3391     return;
3392   }
3393 
3394   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3395        cpi->sf.inter_sf.disable_onesided_comp) &&
3396       cpi->all_one_sided_refs) {
3397     return;
3398   }
3399 
3400   mbmi->mode = this_mode;
3401   mbmi->uv_mode = UV_DC_PRED;
3402   mbmi->ref_frame[0] = ref_frame;
3403   mbmi->ref_frame[1] = second_ref_frame;
3404   const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3405   if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3406     MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3407     if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3408         mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3409       return;
3410     }
3411     av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3412                      xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3413                      mbmi_ext->mode_context);
3414     // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3415     // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3416     av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3417   }
3418 
3419   assert(this_mode == NEAREST_NEARESTMV);
3420   if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3421     return;
3422   }
3423 
3424   mbmi->filter_intra_mode_info.use_filter_intra = 0;
3425   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3426   mbmi->comp_group_idx = 0;
3427   mbmi->compound_idx = x->compound_idx;
3428   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3429   mbmi->motion_mode = SIMPLE_TRANSLATION;
3430   mbmi->ref_mv_idx = 0;
3431   mbmi->skip_mode = mbmi->skip_txfm = 1;
3432   mbmi->palette_mode_info.palette_size[0] = 0;
3433   mbmi->palette_mode_info.palette_size[1] = 0;
3434 
3435   set_default_interp_filters(mbmi, cm->features.interp_filter);
3436 
3437   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3438   for (int i = 0; i < num_planes; i++) {
3439     xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3440     xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3441   }
3442 
3443   BUFFER_SET orig_dst;
3444   for (int i = 0; i < num_planes; i++) {
3445     orig_dst.plane[i] = xd->plane[i].dst.buf;
3446     orig_dst.stride[i] = xd->plane[i].dst.stride;
3447   }
3448 
3449   // Obtain the rdcost for skip_mode.
3450   skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst);
3451 
3452   // Compare the use of skip_mode with the best intra/inter mode obtained.
3453   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3454   int64_t best_intra_inter_mode_cost = INT64_MAX;
3455   if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3456     const ModeCosts *mode_costs = &x->mode_costs;
3457     best_intra_inter_mode_cost = RDCOST(
3458         x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3459         rd_cost->dist);
3460     // Account for non-skip mode rate in total rd stats
3461     rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3462     av1_rd_cost_update(x->rdmult, rd_cost);
3463   }
3464 
3465   if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3466       (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3467     assert(mode_index != THR_INVALID);
3468     search_state->best_mbmode.skip_mode = 1;
3469     search_state->best_mbmode = *mbmi;
3470     memset(search_state->best_mbmode.inter_tx_size,
3471            search_state->best_mbmode.tx_size,
3472            sizeof(search_state->best_mbmode.inter_tx_size));
3473     set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3474                   search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3475                   xd);
3476     search_state->best_mode_index = mode_index;
3477 
3478     // Update rd_cost
3479     rd_cost->rate = skip_mode_rd_stats.rate;
3480     rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3481     rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3482 
3483     search_state->best_rd = rd_cost->rdcost;
3484     search_state->best_skip2 = 1;
3485     search_state->best_mode_skippable = 1;
3486 
3487     x->txfm_search_info.skip_txfm = 1;
3488   }
3489 }
3490 
3491 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3492 static AOM_INLINE MB_MODE_INFO *get_winner_mode_stats(
3493     MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3494     int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3495     RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3496     THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3497     int mode_idx) {
3498   MB_MODE_INFO *winner_mbmi;
3499   if (multi_winner_mode_type) {
3500     assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3501     WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3502     winner_mbmi = &winner_mode_stat->mbmi;
3503 
3504     *winner_rd_cost = &winner_mode_stat->rd_cost;
3505     *winner_rate_y = winner_mode_stat->rate_y;
3506     *winner_rate_uv = winner_mode_stat->rate_uv;
3507     *winner_mode_index = winner_mode_stat->mode_index;
3508   } else {
3509     winner_mbmi = best_mbmode;
3510     *winner_rd_cost = best_rd_cost;
3511     *winner_rate_y = best_rate_y;
3512     *winner_rate_uv = best_rate_uv;
3513     *winner_mode_index = *best_mode_index;
3514   }
3515   return winner_mbmi;
3516 }
3517 
3518 // speed feature: fast intra/inter transform type search
3519 // Used for speed >= 2
3520 // When this speed feature is on, in rd mode search, only DCT is used.
3521 // After the mode is determined, this function is called, to select
3522 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3523 static AOM_INLINE void refine_winner_mode_tx(
3524     const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3525     PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3526     MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3527     int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3528   const AV1_COMMON *const cm = &cpi->common;
3529   MACROBLOCKD *const xd = &x->e_mbd;
3530   MB_MODE_INFO *const mbmi = xd->mi[0];
3531   TxfmSearchParams *txfm_params = &x->txfm_search_params;
3532   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3533   int64_t best_rd;
3534   const int num_planes = av1_num_planes(cm);
3535 
3536   if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3537                                          rd_cost->skip_txfm))
3538     return;
3539 
3540   // Set params for winner mode evaluation
3541   set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3542 
3543   // No best mode identified so far
3544   if (*best_mode_index == THR_INVALID) return;
3545 
3546   best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3547   for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3548     RD_STATS *winner_rd_stats = NULL;
3549     int winner_rate_y = 0, winner_rate_uv = 0;
3550     THR_MODES winner_mode_index = 0;
3551 
3552     // TODO(any): Combine best mode and multi-winner mode processing paths
3553     // Get winner mode stats for current mode index
3554     MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3555         x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3556         &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3557         cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3558 
3559     if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3560         winner_mode_index != THR_INVALID &&
3561         is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3562                                           rd_cost->skip_txfm)) {
3563       RD_STATS rd_stats = *winner_rd_stats;
3564       int skip_blk = 0;
3565       RD_STATS rd_stats_y, rd_stats_uv;
3566       const int skip_ctx = av1_get_skip_txfm_context(xd);
3567 
3568       *mbmi = *winner_mbmi;
3569 
3570       set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3571 
3572       // Select prediction reference frames.
3573       for (int i = 0; i < num_planes; i++) {
3574         xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3575         if (has_second_ref(mbmi))
3576           xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3577       }
3578 
3579       if (is_inter_mode(mbmi->mode)) {
3580         const int mi_row = xd->mi_row;
3581         const int mi_col = xd->mi_col;
3582         bool is_predictor_built = false;
3583         const PREDICTION_MODE prediction_mode = mbmi->mode;
3584         // Do interpolation filter search for realtime mode if applicable.
3585         if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3586             cpi->oxcf.mode == REALTIME &&
3587             cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3588             is_inter_mode(prediction_mode) &&
3589             mbmi->motion_mode == SIMPLE_TRANSLATION &&
3590             !is_inter_compound_mode(prediction_mode)) {
3591           is_predictor_built =
3592               fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3593         }
3594         if (!is_predictor_built) {
3595           av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3596                                         av1_num_planes(cm) - 1);
3597         }
3598         if (mbmi->motion_mode == OBMC_CAUSAL)
3599           av1_build_obmc_inter_predictors_sb(cm, xd);
3600 
3601         av1_subtract_plane(x, bsize, 0);
3602         if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3603             !xd->lossless[mbmi->segment_id]) {
3604           av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3605                                               INT64_MAX);
3606           assert(rd_stats_y.rate != INT_MAX);
3607         } else {
3608           av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3609                                             INT64_MAX);
3610           memset(mbmi->inter_tx_size, mbmi->tx_size,
3611                  sizeof(mbmi->inter_tx_size));
3612           for (int i = 0; i < xd->height * xd->width; ++i)
3613             set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3614         }
3615       } else {
3616         av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3617                                           INT64_MAX);
3618       }
3619 
3620       if (num_planes > 1) {
3621         av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3622       } else {
3623         av1_init_rd_stats(&rd_stats_uv);
3624       }
3625 
3626       const ModeCosts *mode_costs = &x->mode_costs;
3627       if (is_inter_mode(mbmi->mode) &&
3628           RDCOST(x->rdmult,
3629                  mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3630                      rd_stats_uv.rate,
3631                  (rd_stats_y.dist + rd_stats_uv.dist)) >
3632               RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3633                      (rd_stats_y.sse + rd_stats_uv.sse))) {
3634         skip_blk = 1;
3635         rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3636         rd_stats_uv.rate = 0;
3637         rd_stats_y.dist = rd_stats_y.sse;
3638         rd_stats_uv.dist = rd_stats_uv.sse;
3639       } else {
3640         skip_blk = 0;
3641         rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3642       }
3643       int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3644                       winner_rate_y - winner_rate_uv;
3645       int64_t this_rd =
3646           RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3647       if (best_rd > this_rd) {
3648         *best_mbmode = *mbmi;
3649         *best_mode_index = winner_mode_index;
3650         av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3651         av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3652         rd_cost->rate = this_rate;
3653         rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3654         rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3655         rd_cost->rdcost = this_rd;
3656         best_rd = this_rd;
3657         *best_skip2 = skip_blk;
3658       }
3659     }
3660   }
3661 }
3662 
3663 /*!\cond */
3664 typedef struct {
3665   // Mask for each reference frame, specifying which prediction modes to NOT try
3666   // during search.
3667   uint32_t pred_modes[REF_FRAMES];
3668   // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3669   // reference frames (i, j).
3670   // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3671   // (NONE_FRAME).
3672   bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3673 } mode_skip_mask_t;
3674 /*!\endcond */
3675 
3676 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3677 static AOM_INLINE void disable_reference(
3678     MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3679   for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3680     ref_combo[ref][ref2 + 1] = true;
3681   }
3682 }
3683 
3684 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3685 static AOM_INLINE void disable_inter_references_except_altref(
3686     bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3687   disable_reference(LAST_FRAME, ref_combo);
3688   disable_reference(LAST2_FRAME, ref_combo);
3689   disable_reference(LAST3_FRAME, ref_combo);
3690   disable_reference(GOLDEN_FRAME, ref_combo);
3691   disable_reference(BWDREF_FRAME, ref_combo);
3692   disable_reference(ALTREF2_FRAME, ref_combo);
3693 }
3694 
3695 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3696   { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
3697   { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
3698   { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
3699   { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
3700   { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
3701   { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3702   { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
3703   { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
3704 };
3705 
3706 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3707 
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3708 static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
3709                                          REF_SET ref_set) {
3710   if (ref_set == REF_SET_FULL) {
3711     // Everything available by default.
3712     memset(mask, 0, sizeof(*mask));
3713   } else {
3714     // All modes available by default.
3715     memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3716     // All references disabled first.
3717     for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3718       for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3719         mask->ref_combo[ref1][ref2 + 1] = true;
3720       }
3721     }
3722     const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3723     int num_ref_combos;
3724 
3725     // Then enable reduced set of references explicitly.
3726     switch (ref_set) {
3727       case REF_SET_REDUCED:
3728         ref_set_combos = reduced_ref_combos;
3729         num_ref_combos =
3730             (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3731         break;
3732       case REF_SET_REALTIME:
3733         ref_set_combos = real_time_ref_combos;
3734         num_ref_combos =
3735             (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3736         break;
3737       default: assert(0); num_ref_combos = 0;
3738     }
3739 
3740     for (int i = 0; i < num_ref_combos; ++i) {
3741       const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3742       mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3743     }
3744   }
3745 }
3746 
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3747 static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
3748                                            const AV1_COMP *cpi, MACROBLOCK *x,
3749                                            BLOCK_SIZE bsize) {
3750   const AV1_COMMON *const cm = &cpi->common;
3751   const struct segmentation *const seg = &cm->seg;
3752   MACROBLOCKD *const xd = &x->e_mbd;
3753   MB_MODE_INFO *const mbmi = xd->mi[0];
3754   unsigned char segment_id = mbmi->segment_id;
3755   const SPEED_FEATURES *const sf = &cpi->sf;
3756   REF_SET ref_set = REF_SET_FULL;
3757 
3758   if (sf->rt_sf.use_real_time_ref_set)
3759     ref_set = REF_SET_REALTIME;
3760   else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3761     ref_set = REF_SET_REDUCED;
3762 
3763   default_skip_mask(mask, ref_set);
3764 
3765   int min_pred_mv_sad = INT_MAX;
3766   MV_REFERENCE_FRAME ref_frame;
3767   if (ref_set == REF_SET_REALTIME) {
3768     // For real-time encoding, we only look at a subset of ref frames. So the
3769     // threshold for pruning should be computed from this subset as well.
3770     const int num_rt_refs =
3771         sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3772     for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3773       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3774       if (ref != INTRA_FRAME) {
3775         min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3776       }
3777     }
3778   } else {
3779     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3780       min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3781   }
3782 
3783   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3784     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3785       // Skip checking missing reference in both single and compound reference
3786       // modes.
3787       disable_reference(ref_frame, mask->ref_combo);
3788     } else {
3789       // Skip fixed mv modes for poor references
3790       if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3791         mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3792       }
3793     }
3794     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3795         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3796       // Reference not used for the segment.
3797       disable_reference(ref_frame, mask->ref_combo);
3798     }
3799   }
3800   // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3801   // is disabled for this segment. This is to prevent the possibility that we
3802   // end up unable to pick any mode.
3803   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3804     // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3805     // unless ARNR filtering is enabled in which case we want
3806     // an unfiltered alternative. We allow near/nearest as well
3807     // because they may result in zero-zero MVs but be cheaper.
3808     if (cpi->rc.is_src_frame_alt_ref &&
3809         (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3810       disable_inter_references_except_altref(mask->ref_combo);
3811 
3812       mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3813       const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3814       int_mv near_mv, nearest_mv, global_mv;
3815       get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3816                   &x->mbmi_ext);
3817       get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3818       get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3819 
3820       if (near_mv.as_int != global_mv.as_int)
3821         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3822       if (nearest_mv.as_int != global_mv.as_int)
3823         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3824     }
3825   }
3826 
3827   if (cpi->rc.is_src_frame_alt_ref) {
3828     if (sf->inter_sf.alt_ref_search_fp &&
3829         (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3830       mask->pred_modes[ALTREF_FRAME] = 0;
3831       disable_inter_references_except_altref(mask->ref_combo);
3832       disable_reference(INTRA_FRAME, mask->ref_combo);
3833     }
3834   }
3835 
3836   if (sf->inter_sf.alt_ref_search_fp) {
3837     if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
3838       int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
3839       // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3840       // those are past frames
3841       MV_REFERENCE_FRAME start_frame =
3842           sf->inter_sf.alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3843       for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3844         if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3845             0) {
3846           // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3847           // to the relative dist of LAST_FRAME.
3848           if (sf->inter_sf.alt_ref_search_fp == 1 &&
3849               (abs(cpi->ref_frame_dist_info
3850                        .ref_relative_dist[ref_frame - LAST_FRAME]) >
3851                1.5 * abs(cpi->ref_frame_dist_info
3852                              .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3853             continue;
3854           }
3855           if (x->pred_mv_sad[ref_frame] > sad_thresh)
3856             mask->pred_modes[ref_frame] |= INTER_ALL;
3857         }
3858       }
3859     }
3860   }
3861 
3862   if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3863     if (x->best_pred_mv_sad[0] < INT_MAX) {
3864       int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
3865       const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3866 
3867       // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3868       for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3869         ref_frame = prune_ref_list[ref_idx];
3870         if (x->pred_mv_sad[ref_frame] > sad_thresh)
3871           mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3872       }
3873     }
3874   }
3875 
3876   if (bsize > sf->part_sf.max_intra_bsize) {
3877     disable_reference(INTRA_FRAME, mask->ref_combo);
3878   }
3879 
3880   if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3881     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3882       mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3883       mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3884     }
3885   }
3886 
3887   mask->pred_modes[INTRA_FRAME] |=
3888       ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3889 }
3890 
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3891 static AOM_INLINE void init_neighbor_pred_buf(
3892     const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
3893     int is_hbd) {
3894   if (is_hbd) {
3895     const int len = sizeof(uint16_t);
3896     args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3897     args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3898                                                  (MAX_SB_SQUARE >> 1) * len);
3899     args->above_pred_buf[2] =
3900         CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3901     args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3902     args->left_pred_buf[1] =
3903         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3904     args->left_pred_buf[2] =
3905         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3906   } else {
3907     args->above_pred_buf[0] = obmc_buffer->above_pred;
3908     args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3909     args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3910     args->left_pred_buf[0] = obmc_buffer->left_pred;
3911     args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3912     args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3913   }
3914 }
3915 
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3916 static AOM_INLINE int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3917                                       MV_REFERENCE_FRAME ref_frame) {
3918   const AV1_COMMON *const cm = &cpi->common;
3919   MV_REFERENCE_FRAME rf[2];
3920   av1_set_ref_frame(rf, ref_frame);
3921 
3922   if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3923 
3924   if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3925                                        cm->cur_frame->ref_display_order_hint)) {
3926     return 1;
3927   }
3928 
3929   return 0;
3930 }
3931 
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3932 static AOM_INLINE int is_ref_frame_used_by_compound_ref(
3933     int ref_frame, int skip_ref_frame_mask) {
3934   for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3935     if (!(skip_ref_frame_mask & (1 << r))) {
3936       const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3937       if (rf[0] == ref_frame || rf[1] == ref_frame) {
3938         return 1;
3939       }
3940     }
3941   }
3942   return 0;
3943 }
3944 
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3945 static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3946                                                  const MB_MODE_INFO *mi_cache) {
3947   if (!mi_cache) {
3948     return 0;
3949   }
3950 
3951   if (ref_frame < REF_FRAMES) {
3952     return (ref_frame == mi_cache->ref_frame[0] ||
3953             ref_frame == mi_cache->ref_frame[1]);
3954   }
3955 
3956   // if we are here, then the current mode is compound.
3957   MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
3958   return ref_frame == cached_ref_type;
3959 }
3960 
3961 // Please add/modify parameter setting in this function, making it consistent
3962 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])3963 static AOM_INLINE void set_params_rd_pick_inter_mode(
3964     const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
3965     BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
3966     unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
3967     struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
3968   const AV1_COMMON *const cm = &cpi->common;
3969   MACROBLOCKD *const xd = &x->e_mbd;
3970   MB_MODE_INFO *const mbmi = xd->mi[0];
3971   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3972   unsigned char segment_id = mbmi->segment_id;
3973 
3974   init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
3975   av1_collect_neighbors_ref_counts(xd);
3976   estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
3977                            ref_costs_comp);
3978 
3979   const int mi_row = xd->mi_row;
3980   const int mi_col = xd->mi_col;
3981   x->best_pred_mv_sad[0] = INT_MAX;
3982   x->best_pred_mv_sad[1] = INT_MAX;
3983 
3984   for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
3985        ++ref_frame) {
3986     x->pred_mv_sad[ref_frame] = INT_MAX;
3987     mbmi_ext->mode_context[ref_frame] = 0;
3988     mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3989     if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
3990       // Skip the ref frame if the mask says skip and the ref is not used by
3991       // compound ref.
3992       if (skip_ref_frame_mask & (1 << ref_frame) &&
3993           !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
3994           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
3995         continue;
3996       }
3997       assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
3998       setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
3999     }
4000     if (cpi->sf.inter_sf.alt_ref_search_fp ||
4001         cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4002       // Store the best pred_mv_sad across all past frames
4003       if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4004           0)
4005         x->best_pred_mv_sad[0] =
4006             AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4007       else
4008         // Store the best pred_mv_sad across all future frames
4009         x->best_pred_mv_sad[1] =
4010             AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4011     }
4012   }
4013 
4014   if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4015     // No second reference on RT ref set, so no need to initialize
4016     for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4017          ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4018       mbmi_ext->mode_context[ref_frame] = 0;
4019       mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4020       const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4021       if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4022             (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4023         continue;
4024       }
4025 
4026       if (skip_ref_frame_mask & (1 << ref_frame) &&
4027           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4028         continue;
4029       }
4030       // Ref mv list population is not required, when compound references are
4031       // pruned.
4032       if (prune_ref_frame(cpi, x, ref_frame)) continue;
4033 
4034       av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4035                        xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4036                        mbmi_ext->mode_context);
4037       // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4038       // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4039       av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4040     }
4041   }
4042 
4043   av1_count_overlappable_neighbors(cm, xd);
4044   const FRAME_UPDATE_TYPE update_type =
4045       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4046   int use_actual_frame_probs = 1;
4047   int prune_obmc;
4048 #if CONFIG_FPMT_TEST
4049   use_actual_frame_probs =
4050       (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4051   if (!use_actual_frame_probs) {
4052     prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4053                  cpi->sf.inter_sf.prune_obmc_prob_thresh;
4054   }
4055 #endif
4056   if (use_actual_frame_probs) {
4057     prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4058                  cpi->sf.inter_sf.prune_obmc_prob_thresh;
4059   }
4060   if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4061     if (check_num_overlappable_neighbors(mbmi) &&
4062         is_motion_variation_allowed_bsize(bsize)) {
4063       int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4064       int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4065                                        MAX_SB_SIZE >> 1 };
4066       int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4067                                         MAX_SB_SIZE >> 1 };
4068       int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4069       av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4070                                           dst_width1, dst_height1,
4071                                           args->above_pred_stride);
4072       av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4073                                          dst_width2, dst_height2,
4074                                          args->left_pred_stride);
4075       const int num_planes = av1_num_planes(cm);
4076       av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4077                            mi_col, 0, num_planes);
4078       calc_target_weighted_pred(
4079           cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4080           args->left_pred_buf[0], args->left_pred_stride[0]);
4081     }
4082   }
4083 
4084   init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4085 
4086   // Set params for mode evaluation
4087   set_mode_eval_params(cpi, x, MODE_EVAL);
4088 
4089   x->comp_rd_stats_idx = 0;
4090 
4091   for (int idx = 0; idx < REF_FRAMES; idx++) {
4092     args->best_single_sse_in_refs[idx] = INT32_MAX;
4093   }
4094 }
4095 
init_single_inter_mode_search_state(InterModeSearchState * search_state)4096 static AOM_INLINE void init_single_inter_mode_search_state(
4097     InterModeSearchState *search_state) {
4098   for (int dir = 0; dir < 2; ++dir) {
4099     for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4100       for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4101         SingleInterModeState *state;
4102 
4103         state = &search_state->single_state[dir][mode][ref_frame];
4104         state->ref_frame = NONE_FRAME;
4105         state->rd = INT64_MAX;
4106 
4107         state = &search_state->single_state_modelled[dir][mode][ref_frame];
4108         state->ref_frame = NONE_FRAME;
4109         state->rd = INT64_MAX;
4110 
4111         search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4112       }
4113     }
4114   }
4115 
4116   for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4117     search_state->best_single_rd[ref_frame] = INT64_MAX;
4118     search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4119   }
4120   av1_zero(search_state->single_state_cnt);
4121   av1_zero(search_state->single_state_modelled_cnt);
4122 }
4123 
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)4124 static AOM_INLINE void init_inter_mode_search_state(
4125     InterModeSearchState *search_state, const AV1_COMP *cpi,
4126     const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4127   init_intra_mode_search_state(&search_state->intra_search_state);
4128   av1_invalid_rd_stats(&search_state->best_y_rdcost);
4129 
4130   search_state->best_rd = best_rd_so_far;
4131   search_state->best_skip_rd[0] = INT64_MAX;
4132   search_state->best_skip_rd[1] = INT64_MAX;
4133 
4134   av1_zero(search_state->best_mbmode);
4135 
4136   search_state->best_rate_y = INT_MAX;
4137 
4138   search_state->best_rate_uv = INT_MAX;
4139 
4140   search_state->best_mode_skippable = 0;
4141 
4142   search_state->best_skip2 = 0;
4143 
4144   search_state->best_mode_index = THR_INVALID;
4145 
4146   const MACROBLOCKD *const xd = &x->e_mbd;
4147   const MB_MODE_INFO *const mbmi = xd->mi[0];
4148   const unsigned char segment_id = mbmi->segment_id;
4149 
4150   search_state->num_available_refs = 0;
4151   memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4152   memset(search_state->dist_order_refs, -1,
4153          sizeof(search_state->dist_order_refs));
4154 
4155   for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4156     search_state->mode_threshold[i] = 0;
4157   const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4158   for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4159     search_state->mode_threshold[i] =
4160         ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4161         RD_THRESH_FAC_FRAC_BITS;
4162 
4163   search_state->best_intra_rd = INT64_MAX;
4164 
4165   search_state->best_pred_sse = UINT_MAX;
4166 
4167   av1_zero(search_state->single_newmv);
4168   av1_zero(search_state->single_newmv_rate);
4169   av1_zero(search_state->single_newmv_valid);
4170   for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4171     for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4172       for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4173         search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4174         search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4175       }
4176     }
4177   }
4178 
4179   for (int i = 0; i < REFERENCE_MODES; ++i) {
4180     search_state->best_pred_rd[i] = INT64_MAX;
4181   }
4182 
4183   if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4184     for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4185       search_state->mode_threshold[i] =
4186           ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4187           RD_THRESH_FAC_FRAC_BITS;
4188 
4189     for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4190       for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4191         for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4192           search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4193           search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4194         }
4195       }
4196     }
4197 
4198     init_single_inter_mode_search_state(search_state);
4199   }
4200 }
4201 
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)4202 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4203                            const MV_REFERENCE_FRAME *ref_frame,
4204                            const PREDICTION_MODE this_mode) {
4205   if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4206     return true;
4207   }
4208 
4209   return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4210 }
4211 
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)4212 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4213                                       BLOCK_SIZE bsize,
4214                                       PREDICTION_MODE curr_mode,
4215                                       const MV_REFERENCE_FRAME *ref_frames) {
4216   const int comp_pred = ref_frames[1] > INTRA_FRAME;
4217   if (comp_pred) {
4218     if (!is_comp_ref_allowed(bsize)) return 1;
4219     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4220       return 1;
4221     }
4222 
4223     const AV1_COMMON *const cm = &cpi->common;
4224     if (frame_is_intra_only(cm)) return 1;
4225 
4226     const CurrentFrame *const current_frame = &cm->current_frame;
4227     if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4228 
4229     const struct segmentation *const seg = &cm->seg;
4230     const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4231     // Do not allow compound prediction if the segment level reference frame
4232     // feature is in use as in this case there can only be one reference.
4233     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4234   }
4235 
4236   if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4237     // Mode must be compatible
4238     if (!is_interintra_allowed_bsize(bsize)) return 1;
4239     if (!is_interintra_allowed_mode(curr_mode)) return 1;
4240   }
4241 
4242   return 0;
4243 }
4244 
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4245 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4246                                         BLOCK_SIZE bsize, int mib_size) {
4247   const int sb_size_mask = mib_size - 1;
4248   const MACROBLOCKD *const xd = &x->e_mbd;
4249   const int mi_row = xd->mi_row;
4250   const int mi_col = xd->mi_col;
4251   const int mi_row_in_sb = mi_row & sb_size_mask;
4252   const int mi_col_in_sb = mi_col & sb_size_mask;
4253   const int mi_w = mi_size_wide[bsize];
4254   const int mi_h = mi_size_high[bsize];
4255   int picked_ref_frames_mask = 0;
4256   for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4257     for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4258       picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4259     }
4260   }
4261   return picked_ref_frames_mask;
4262 }
4263 
4264 // Check if reference frame pair of the current block matches with the given
4265 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4266 static INLINE int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4267                                        const MV_REFERENCE_FRAME *ref_frames) {
4268   return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4269           (ref_frames[1] == mbmi->ref_frame[1]));
4270 }
4271 
4272 // Case 1: return 0, means don't skip this mode
4273 // Case 2: return 1, means skip this mode completely
4274 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4275 static int inter_mode_search_order_independent_skip(
4276     const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4277     InterModeSearchState *search_state, int skip_ref_frame_mask,
4278     PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4279   if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4280     return 1;
4281   }
4282 
4283   const int ref_type = av1_ref_frame_type(ref_frame);
4284   if (!cpi->sf.rt_sf.use_real_time_ref_set)
4285     if (prune_ref_frame(cpi, x, ref_type)) return 1;
4286 
4287   // This is only used in motion vector unit test.
4288   if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4289       ref_frame[0] == INTRA_FRAME)
4290     return 1;
4291 
4292   const AV1_COMMON *const cm = &cpi->common;
4293   if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4294     return 1;
4295   }
4296 
4297   // Reuse the prediction mode in cache
4298   if (x->use_mb_mode_cache) {
4299     const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4300     const PREDICTION_MODE cached_mode = cached_mi->mode;
4301     const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4302     const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4303 
4304     // If the cached mode is intra, then we just need to match the mode.
4305     if (is_mode_intra(cached_mode) && mode != cached_mode) {
4306       return 1;
4307     }
4308 
4309     // If the cached mode is single inter mode, then we match the mode and
4310     // reference frame.
4311     if (cached_mode_is_single) {
4312       if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4313         return 1;
4314       }
4315     } else {
4316       // If the cached mode is compound, then we need to consider several cases.
4317       const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4318       if (mode_is_single) {
4319         // If the mode is single, we know the modes can't match. But we might
4320         // still want to search it if compound mode depends on the current mode.
4321         int skip_motion_mode_only = 0;
4322         if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4323           skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4324         } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4325           skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4326         } else if (cached_mode == NEW_NEWMV) {
4327           skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4328                                    ref_frame[0] == cached_frame[1]);
4329         }
4330 
4331         return 1 + skip_motion_mode_only;
4332       } else {
4333         // If both modes are compound, then everything must match.
4334         if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4335             ref_frame[1] != cached_frame[1]) {
4336           return 1;
4337         }
4338       }
4339     }
4340   }
4341 
4342   const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4343   // If no valid mode has been found so far in PARTITION_NONE when finding a
4344   // valid partition is required, do not skip mode.
4345   if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4346       x->must_find_valid_partition)
4347     return 0;
4348 
4349   const SPEED_FEATURES *const sf = &cpi->sf;
4350   // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4351   // frames
4352   if (sf->inter_sf.prune_nearmv_using_neighbors &&
4353       (mode == NEAR_NEARMV || mode == NEARMV)) {
4354     const MACROBLOCKD *const xd = &x->e_mbd;
4355     if (search_state->best_rd != INT64_MAX && xd->left_available &&
4356         xd->up_available) {
4357       const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4358                                                     { 1, 1, 0 },
4359                                                     { 2, 1, 0 } };
4360       const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4361 
4362       assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4363              qindex_sub_range < 3);
4364       const int num_ref_frame_pair_match_thresh =
4365           thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4366                     [qindex_sub_range];
4367 
4368       assert(num_ref_frame_pair_match_thresh <= 2 &&
4369              num_ref_frame_pair_match_thresh >= 0);
4370       int num_ref_frame_pair_match = 0;
4371 
4372       num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4373       num_ref_frame_pair_match +=
4374           match_ref_frame_pair(xd->above_mbmi, ref_frame);
4375 
4376       // Pruning based on ref frame pair match with neighbors.
4377       if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4378     }
4379   }
4380 
4381   int skip_motion_mode = 0;
4382   if (mbmi->partition != PARTITION_NONE) {
4383     int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4384     if (ref_type <= ALTREF_FRAME && skip_ref) {
4385       // Since the compound ref modes depends on the motion estimation result of
4386       // two single ref modes (best mv of single ref modes as the start point),
4387       // if current single ref mode is marked skip, we need to check if it will
4388       // be used in compound ref modes.
4389       if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4390         // Found a not skipped compound ref mode which contains current
4391         // single ref. So this single ref can't be skipped completely
4392         // Just skip its motion mode search, still try its simple
4393         // transition mode.
4394         skip_motion_mode = 1;
4395         skip_ref = 0;
4396       }
4397     }
4398     // If we are reusing the prediction from cache, and the current frame is
4399     // required by the cache, then we cannot prune it.
4400     if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4401       skip_ref = 0;
4402       // If the cache only needs the current reference type for compound
4403       // prediction, then we can skip motion mode search.
4404       skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4405                           x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4406     }
4407     if (skip_ref) return 1;
4408   }
4409 
4410   if (ref_frame[0] == INTRA_FRAME) {
4411     if (mode != DC_PRED) {
4412       // Disable intra modes other than DC_PRED for blocks with low variance
4413       // Threshold for intra skipping based on source variance
4414       // TODO(debargha): Specialize the threshold for super block sizes
4415       const unsigned int skip_intra_var_thresh = 64;
4416       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4417           x->source_variance < skip_intra_var_thresh)
4418         return 1;
4419     }
4420   }
4421 
4422   if (skip_motion_mode) return 2;
4423 
4424   return 0;
4425 }
4426 
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4427 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4428                              const MV_REFERENCE_FRAME *ref_frames,
4429                              const AV1_COMMON *cm) {
4430   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4431   mbmi->ref_mv_idx = 0;
4432   mbmi->mode = curr_mode;
4433   mbmi->uv_mode = UV_DC_PRED;
4434   mbmi->ref_frame[0] = ref_frames[0];
4435   mbmi->ref_frame[1] = ref_frames[1];
4436   pmi->palette_size[0] = 0;
4437   pmi->palette_size[1] = 0;
4438   mbmi->filter_intra_mode_info.use_filter_intra = 0;
4439   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4440   mbmi->motion_mode = SIMPLE_TRANSLATION;
4441   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4442   set_default_interp_filters(mbmi, cm->features.interp_filter);
4443 }
4444 
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4445 static AOM_INLINE void collect_single_states(MACROBLOCK *x,
4446                                              InterModeSearchState *search_state,
4447                                              const MB_MODE_INFO *const mbmi) {
4448   int i, j;
4449   const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4450   const PREDICTION_MODE this_mode = mbmi->mode;
4451   const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4452   const int mode_offset = INTER_OFFSET(this_mode);
4453   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4454 
4455   // Simple rd
4456   int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4457   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4458     const int64_t rd =
4459         search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4460     if (rd < simple_rd) simple_rd = rd;
4461   }
4462 
4463   // Insertion sort of single_state
4464   const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4465   SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4466   i = search_state->single_state_cnt[dir][mode_offset];
4467   for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4468     state_s[j] = state_s[j - 1];
4469   state_s[j] = this_state_s;
4470   search_state->single_state_cnt[dir][mode_offset]++;
4471 
4472   // Modelled rd
4473   int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4474   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4475     const int64_t rd =
4476         search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4477     if (rd < modelled_rd) modelled_rd = rd;
4478   }
4479 
4480   // Insertion sort of single_state_modelled
4481   const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4482   SingleInterModeState *state_m =
4483       search_state->single_state_modelled[dir][mode_offset];
4484   i = search_state->single_state_modelled_cnt[dir][mode_offset];
4485   for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4486     state_m[j] = state_m[j - 1];
4487   state_m[j] = this_state_m;
4488   search_state->single_state_modelled_cnt[dir][mode_offset]++;
4489 }
4490 
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4491 static AOM_INLINE void analyze_single_states(
4492     const AV1_COMP *cpi, InterModeSearchState *search_state) {
4493   const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4494   assert(prune_level >= 1);
4495   int i, j, dir, mode;
4496 
4497   for (dir = 0; dir < 2; ++dir) {
4498     int64_t best_rd;
4499     SingleInterModeState(*state)[FWD_REFS];
4500     const int prune_factor = prune_level >= 2 ? 6 : 5;
4501 
4502     // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4503     // reference frames for all the modes (NEARESTMV and NEARMV may not
4504     // have same motion vectors). Always keep the best of each mode
4505     // because it might form the best possible combination with other mode.
4506     state = search_state->single_state[dir];
4507     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4508                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4509     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4510       for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4511         if (state[mode][i].rd != INT64_MAX &&
4512             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4513           state[mode][i].valid = 0;
4514         }
4515       }
4516     }
4517 
4518     state = search_state->single_state_modelled[dir];
4519     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4520                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4521     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4522       for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4523         if (state[mode][i].rd != INT64_MAX &&
4524             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4525           state[mode][i].valid = 0;
4526         }
4527       }
4528     }
4529   }
4530 
4531   // Ordering by simple rd first, then by modelled rd
4532   for (dir = 0; dir < 2; ++dir) {
4533     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4534       const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4535       const int state_cnt_m =
4536           search_state->single_state_modelled_cnt[dir][mode];
4537       SingleInterModeState *state_s = search_state->single_state[dir][mode];
4538       SingleInterModeState *state_m =
4539           search_state->single_state_modelled[dir][mode];
4540       int count = 0;
4541       const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4542       for (i = 0; i < state_cnt_s; ++i) {
4543         if (state_s[i].rd == INT64_MAX) break;
4544         if (state_s[i].valid) {
4545           search_state->single_rd_order[dir][mode][count++] =
4546               state_s[i].ref_frame;
4547         }
4548       }
4549       if (count >= max_candidates) continue;
4550 
4551       for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4552         if (state_m[i].rd == INT64_MAX) break;
4553         if (!state_m[i].valid) continue;
4554         const int ref_frame = state_m[i].ref_frame;
4555         int match = 0;
4556         // Check if existing already
4557         for (j = 0; j < count; ++j) {
4558           if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4559             match = 1;
4560             break;
4561           }
4562         }
4563         if (match) continue;
4564         // Check if this ref_frame is removed in simple rd
4565         int valid = 1;
4566         for (j = 0; j < state_cnt_s; ++j) {
4567           if (ref_frame == state_s[j].ref_frame) {
4568             valid = state_s[j].valid;
4569             break;
4570           }
4571         }
4572         if (valid) {
4573           search_state->single_rd_order[dir][mode][count++] = ref_frame;
4574         }
4575       }
4576     }
4577   }
4578 }
4579 
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4580 static int compound_skip_get_candidates(
4581     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4582     const int dir, const PREDICTION_MODE mode) {
4583   const int mode_offset = INTER_OFFSET(mode);
4584   const SingleInterModeState *state =
4585       search_state->single_state[dir][mode_offset];
4586   const SingleInterModeState *state_modelled =
4587       search_state->single_state_modelled[dir][mode_offset];
4588 
4589   int max_candidates = 0;
4590   for (int i = 0; i < FWD_REFS; ++i) {
4591     if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4592     max_candidates++;
4593   }
4594 
4595   int candidates = max_candidates;
4596   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4597     candidates = AOMMIN(2, max_candidates);
4598   }
4599   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4600     if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4601         state[0].ref_frame == state_modelled[0].ref_frame)
4602       candidates = 1;
4603     if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4604   }
4605 
4606   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4607     // Limit the number of candidates to 1 in each direction for compound
4608     // prediction
4609     candidates = AOMMIN(1, candidates);
4610   }
4611   return candidates;
4612 }
4613 
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4614 static int compound_skip_by_single_states(
4615     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4616     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4617     const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4618   const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4619   const int mode[2] = { compound_ref0_mode(this_mode),
4620                         compound_ref1_mode(this_mode) };
4621   const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4622   const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4623                             refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4624   int ref_searched[2] = { 0, 0 };
4625   int ref_mv_match[2] = { 1, 1 };
4626   int i, j;
4627 
4628   for (i = 0; i < 2; ++i) {
4629     const SingleInterModeState *state =
4630         search_state->single_state[mode_dir[i]][mode_offset[i]];
4631     const int state_cnt =
4632         search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4633     for (j = 0; j < state_cnt; ++j) {
4634       if (state[j].ref_frame == refs[i]) {
4635         ref_searched[i] = 1;
4636         break;
4637       }
4638     }
4639   }
4640 
4641   const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4642   for (i = 0; i < 2; ++i) {
4643     if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4644       continue;
4645     }
4646     const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4647     for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4648       int_mv single_mv;
4649       int_mv comp_mv;
4650       get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4651                   &x->mbmi_ext);
4652       get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4653       if (single_mv.as_int != comp_mv.as_int) {
4654         ref_mv_match[i] = 0;
4655         break;
4656       }
4657     }
4658   }
4659 
4660   for (i = 0; i < 2; ++i) {
4661     if (!ref_searched[i] || !ref_mv_match[i]) continue;
4662     const int candidates =
4663         compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4664     const MV_REFERENCE_FRAME *ref_order =
4665         search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4666     int match = 0;
4667     for (j = 0; j < candidates; ++j) {
4668       if (refs[i] == ref_order[j]) {
4669         match = 1;
4670         break;
4671       }
4672     }
4673     if (!match) return 1;
4674   }
4675 
4676   return 0;
4677 }
4678 
4679 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4680 static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
4681                                    const MV_REFERENCE_FRAME *ref_frames,
4682                                    int *const is_ref_match) {
4683   if (is_inter_block(mbmi)) {
4684     is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4685     is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4686     if (has_second_ref(mbmi)) {
4687       is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4688       is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4689     }
4690   }
4691 }
4692 
4693 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4694 static INLINE int compound_skip_using_neighbor_refs(
4695     MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4696     const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4697   // Exclude non-extended compound modes from pruning
4698   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4699       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4700     return 0;
4701 
4702   if (prune_ext_comp_using_neighbors >= 3) return 1;
4703 
4704   int is_ref_match[2] = { 0 };  // 0 - match for forward refs
4705                                 // 1 - match for backward refs
4706   // Check if ref frames of this block matches with left neighbor.
4707   if (xd->left_available)
4708     match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4709 
4710   // Check if ref frames of this block matches with above neighbor.
4711   if (xd->up_available)
4712     match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4713 
4714   // Combine ref frame match with neighbors in forward and backward refs.
4715   const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4716 
4717   // Pruning based on ref frame match with neighbors.
4718   if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4719   return 1;
4720 }
4721 
4722 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4723 static INLINE void update_best_single_mode(InterModeSearchState *search_state,
4724                                            const PREDICTION_MODE this_mode,
4725                                            const MV_REFERENCE_FRAME ref_frame,
4726                                            int64_t this_rd) {
4727   if (this_rd < search_state->best_single_rd[ref_frame]) {
4728     search_state->best_single_rd[ref_frame] = this_rd;
4729     search_state->best_single_mode[ref_frame] = this_mode;
4730   }
4731 }
4732 
4733 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4734 static INLINE int skip_compound_using_best_single_mode_ref(
4735     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4736     const PREDICTION_MODE *best_single_mode,
4737     int prune_comp_using_best_single_mode_ref) {
4738   // Exclude non-extended compound modes from pruning
4739   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4740       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4741     return 0;
4742 
4743   assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4744   const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4745   // Get ref frame direction corresponding to NEWMV
4746   // 0 - NEWMV corresponding to forward direction
4747   // 1 - NEWMV corresponding to backward direction
4748   const int newmv_dir = comp_mode_ref0 != NEWMV;
4749 
4750   // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4751   // have NEWMV as single mode winner.
4752   // Example: For an extended-compound mode,
4753   // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4754   // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4755   // - Avoid pruning this mode, if best single mode corresponding to ref frame
4756   //   ALTREF_FRAME is NEWMV
4757   const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4758   if (single_mode == NEWMV) return 0;
4759 
4760   // Avoid pruning the compound mode when best single mode is not available
4761   if (prune_comp_using_best_single_mode_ref == 1)
4762     if (single_mode == MB_MODE_COUNT) return 0;
4763   return 1;
4764 }
4765 
compare_int64(const void * a,const void * b)4766 static int compare_int64(const void *a, const void *b) {
4767   int64_t a64 = *((int64_t *)a);
4768   int64_t b64 = *((int64_t *)b);
4769   if (a64 < b64) {
4770     return -1;
4771   } else if (a64 == b64) {
4772     return 0;
4773   } else {
4774     return 1;
4775   }
4776 }
4777 
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4778 static INLINE void update_search_state(
4779     InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4780     PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4781     const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4782     THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4783   const MACROBLOCKD *xd = &x->e_mbd;
4784   const MB_MODE_INFO *mbmi = xd->mi[0];
4785   const int skip_ctx = av1_get_skip_txfm_context(xd);
4786   const int skip_txfm =
4787       mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4788   const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4789 
4790   search_state->best_rd = new_best_rd_stats->rdcost;
4791   search_state->best_mode_index = new_best_mode;
4792   *best_rd_stats_dst = *new_best_rd_stats;
4793   search_state->best_mbmode = *mbmi;
4794   search_state->best_skip2 = skip_txfm;
4795   search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4796   // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4797   // rate_uv because av1_txfm_search process is replaced by rd estimation.
4798   // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4799   // These two values will be updated when av1_txfm_search is called.
4800   if (txfm_search_done) {
4801     search_state->best_rate_y =
4802         new_best_rd_stats_y->rate +
4803         x->mode_costs.skip_txfm_cost[skip_ctx]
4804                                     [new_best_rd_stats->skip_txfm || skip_txfm];
4805     search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4806   }
4807   search_state->best_y_rdcost = *new_best_rd_stats_y;
4808   memcpy(ctx->blk_skip, txfm_info->blk_skip,
4809          sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4810   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4811 }
4812 
4813 // Find the best RD for a reference frame (among single reference modes)
4814 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4815 static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4816   assert(ref_frame_rd[0] == INT64_MAX);
4817   int64_t ref_copy[REF_FRAMES - 1];
4818   memcpy(ref_copy, ref_frame_rd + 1,
4819          sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4820   qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4821 
4822   int64_t cutoff = ref_copy[0];
4823   // The cut-off is within 10% of the best.
4824   if (cutoff != INT64_MAX) {
4825     assert(cutoff < INT64_MAX / 200);
4826     cutoff = (110 * cutoff) / 100;
4827   }
4828   ref_frame_rd[0] = cutoff;
4829 }
4830 
4831 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4832 static INLINE bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4833                                         MV_REFERENCE_FRAME frame1,
4834                                         MV_REFERENCE_FRAME frame2) {
4835   assert(frame2 > 0);
4836   return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4837          ref_frame_rd[frame2] <= ref_frame_rd[0];
4838 }
4839 
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4840 static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
4841     const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4842     HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4843     PICK_MODE_CONTEXT *const ctx,
4844     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4845     const motion_mode_best_st_candidate *const best_motion_mode_cands,
4846     int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4847     InterModeSearchState *const search_state, int64_t *yrd) {
4848   const AV1_COMMON *const cm = &cpi->common;
4849   const int num_planes = av1_num_planes(cm);
4850   MACROBLOCKD *const xd = &x->e_mbd;
4851   MB_MODE_INFO *const mbmi = xd->mi[0];
4852   InterModesInfo *const inter_modes_info = x->inter_modes_info;
4853   const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4854 
4855   for (int cand = 0; cand < num_best_cand; cand++) {
4856     RD_STATS rd_stats;
4857     RD_STATS rd_stats_y;
4858     RD_STATS rd_stats_uv;
4859     av1_init_rd_stats(&rd_stats);
4860     av1_init_rd_stats(&rd_stats_y);
4861     av1_init_rd_stats(&rd_stats_uv);
4862     int rate_mv;
4863 
4864     rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4865     args->skip_motion_mode =
4866         best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4867     *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4868     rd_stats.rate =
4869         best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4870 
4871     // Continue if the best candidate is compound.
4872     if (!is_inter_singleref_mode(mbmi->mode)) continue;
4873 
4874     x->txfm_search_info.skip_txfm = 0;
4875     struct macroblockd_plane *pd = xd->plane;
4876     const BUFFER_SET orig_dst = {
4877       { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4878       { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4879     };
4880 
4881     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4882     // Initialize motion mode to simple translation
4883     // Calculation of switchable rate depends on it.
4884     mbmi->motion_mode = 0;
4885     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4886     for (int i = 0; i < num_planes; i++) {
4887       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4888       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4889     }
4890 
4891     int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4892                            search_state->best_skip_rd[1] };
4893     int64_t this_yrd = INT64_MAX;
4894     int64_t ret_value = motion_mode_rd(
4895         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4896         search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4897         do_tx_search, inter_modes_info, 1, &this_yrd);
4898 
4899     if (ret_value != INT64_MAX) {
4900       rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4901       const THR_MODES mode_enum = get_prediction_mode_idx(
4902           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4903       // Collect mode stats for multiwinner mode processing
4904       store_winner_mode_stats(
4905           &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4906           mode_enum, NULL, bsize, rd_stats.rdcost,
4907           cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4908       if (rd_stats.rdcost < search_state->best_rd) {
4909         *yrd = this_yrd;
4910         update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4911                             &rd_stats_uv, mode_enum, x, do_tx_search);
4912         if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4913       }
4914     }
4915   }
4916 }
4917 
4918 /*!\cond */
4919 // Arguments for speed feature pruning of inter mode search
4920 typedef struct {
4921   int *skip_motion_mode;
4922   mode_skip_mask_t *mode_skip_mask;
4923   InterModeSearchState *search_state;
4924   int skip_ref_frame_mask;
4925   int reach_first_comp_mode;
4926   int mode_thresh_mul_fact;
4927   int num_single_modes_processed;
4928   int prune_cpd_using_sr_stats_ready;
4929 } InterModeSFArgs;
4930 /*!\endcond */
4931 
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args,int is_low_temp_var)4932 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4933                            int64_t *ref_frame_rd, int midx,
4934                            InterModeSFArgs *args, int is_low_temp_var) {
4935   const SPEED_FEATURES *const sf = &cpi->sf;
4936   MACROBLOCKD *const xd = &x->e_mbd;
4937   // Get the actual prediction mode we are trying in this iteration
4938   const THR_MODES mode_enum = av1_default_mode_order[midx];
4939   const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4940   const PREDICTION_MODE this_mode = mode_def->mode;
4941   const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4942   const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4943   const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4944   const int comp_pred = second_ref_frame > INTRA_FRAME;
4945 
4946   if (ref_frame == INTRA_FRAME) return 1;
4947 
4948   const FRAME_UPDATE_TYPE update_type =
4949       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4950   if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
4951       comp_pred) {
4952     return 1;
4953   }
4954 
4955   // This is for real time encoding.
4956   if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
4957       this_mode != NEARESTMV)
4958     return 1;
4959 
4960   // Check if this mode should be skipped because it is incompatible with the
4961   // current frame
4962   if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
4963     return 1;
4964   const int ret = inter_mode_search_order_independent_skip(
4965       cpi, x, args->mode_skip_mask, args->search_state,
4966       args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
4967   if (ret == 1) return 1;
4968   *(args->skip_motion_mode) = (ret == 2);
4969 
4970   // We've reached the first compound prediction mode, get stats from the
4971   // single reference predictors to help with pruning
4972   if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
4973       args->reach_first_comp_mode == 0) {
4974     analyze_single_states(cpi, args->search_state);
4975     args->reach_first_comp_mode = 1;
4976   }
4977 
4978   // Prune aggressively when best mode is skippable.
4979   int mul_fact = args->search_state->best_mode_skippable
4980                      ? args->mode_thresh_mul_fact
4981                      : (1 << MODE_THRESH_QBITS);
4982   int64_t mode_threshold =
4983       (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
4984       MODE_THRESH_QBITS;
4985 
4986   if (args->search_state->best_rd < mode_threshold) return 1;
4987 
4988   // Skip this compound mode based on the RD results from the single prediction
4989   // modes
4990   if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
4991     if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
4992                                        ref_frame, second_ref_frame, x))
4993       return 1;
4994   }
4995 
4996   if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
4997     // After we done with single reference modes, find the 2nd best RD
4998     // for a reference frame. Only search compound modes that have a reference
4999     // frame at least as good as the 2nd best.
5000     if (!args->prune_cpd_using_sr_stats_ready &&
5001         args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5002       find_top_ref(ref_frame_rd);
5003       args->prune_cpd_using_sr_stats_ready = 1;
5004     }
5005     if (args->prune_cpd_using_sr_stats_ready &&
5006         !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5007       return 1;
5008   }
5009 
5010   // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5011   if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5012       (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5013     return 1;
5014   }
5015 
5016   if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5017     if (compound_skip_using_neighbor_refs(
5018             xd, this_mode, ref_frames,
5019             sf->inter_sf.prune_ext_comp_using_neighbors))
5020       return 1;
5021   }
5022 
5023   if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5024     if (skip_compound_using_best_single_mode_ref(
5025             this_mode, ref_frames, args->search_state->best_single_mode,
5026             sf->inter_sf.prune_comp_using_best_single_mode_ref))
5027       return 1;
5028   }
5029 
5030   if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5031     const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5032     if (skip_nearest_near_mv_using_refmv_weight(x, this_mode, ref_frame_type))
5033       return 1;
5034   }
5035 
5036   if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5037       ref_frame == GOLDEN_FRAME && !comp_pred) {
5038     const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5039     if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5040         args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5041       if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5042         return 1;
5043     }
5044   }
5045 
5046   return 0;
5047 }
5048 
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)5049 static void record_best_compound(REFERENCE_MODE reference_mode,
5050                                  RD_STATS *rd_stats, int comp_pred, int rdmult,
5051                                  InterModeSearchState *search_state,
5052                                  int compmode_cost) {
5053   int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5054 
5055   if (reference_mode == REFERENCE_MODE_SELECT) {
5056     single_rate = rd_stats->rate - compmode_cost;
5057     hybrid_rate = rd_stats->rate;
5058   } else {
5059     single_rate = rd_stats->rate;
5060     hybrid_rate = rd_stats->rate + compmode_cost;
5061   }
5062 
5063   single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5064   hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5065 
5066   if (!comp_pred) {
5067     if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5068       search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5069   } else {
5070     if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5071       search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5072   }
5073   if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5074     search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5075 }
5076 
5077 // Does a transform search over a list of the best inter mode candidates.
5078 // This is called if the original mode search computed an RD estimate
5079 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)5080 static void tx_search_best_inter_candidates(
5081     AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5082     int64_t best_rd_so_far, BLOCK_SIZE bsize,
5083     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5084     InterModeSearchState *search_state, RD_STATS *rd_cost,
5085     PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5086   AV1_COMMON *const cm = &cpi->common;
5087   MACROBLOCKD *const xd = &x->e_mbd;
5088   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5089   const ModeCosts *mode_costs = &x->mode_costs;
5090   const int num_planes = av1_num_planes(cm);
5091   const int skip_ctx = av1_get_skip_txfm_context(xd);
5092   MB_MODE_INFO *const mbmi = xd->mi[0];
5093   InterModesInfo *inter_modes_info = x->inter_modes_info;
5094   inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5095   search_state->best_rd = best_rd_so_far;
5096   search_state->best_mode_index = THR_INVALID;
5097   // Initialize best mode stats for winner mode processing
5098   x->winner_mode_count = 0;
5099   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5100                           NULL, bsize, best_rd_so_far,
5101                           cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5102   inter_modes_info->num =
5103       inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5104           ? inter_modes_info->num
5105           : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5106   const int64_t top_est_rd =
5107       inter_modes_info->num > 0
5108           ? inter_modes_info
5109                 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5110           : INT64_MAX;
5111   *yrd = INT64_MAX;
5112   int64_t best_rd_in_this_partition = INT64_MAX;
5113   int num_inter_mode_cands = inter_modes_info->num;
5114   int newmv_mode_evaled = 0;
5115   int max_allowed_cands = INT_MAX;
5116   if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5117     // The bound on the no. of inter mode candidates, beyond which the
5118     // candidates are limited if a newmv mode got evaluated, is set as
5119     // max_allowed_cands + 1.
5120     const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5121     assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5122     max_allowed_cands =
5123         num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5124   }
5125 
5126   int num_mode_thresh = INT_MAX;
5127   if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5128     // Bound the no. of transform searches per prediction mode beyond a
5129     // threshold.
5130     const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5131     assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5132     num_mode_thresh =
5133         num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5134   }
5135 
5136   int num_tx_cands = 0;
5137   int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5138   // Iterate over best inter mode candidates and perform tx search
5139   for (int j = 0; j < num_inter_mode_cands; ++j) {
5140     const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5141     *mbmi = inter_modes_info->mbmi_arr[data_idx];
5142     const PREDICTION_MODE prediction_mode = mbmi->mode;
5143     int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5144     if (curr_est_rd * 0.80 > top_est_rd) break;
5145 
5146     if (num_tx_cands > num_mode_thresh) {
5147       if ((prediction_mode != NEARESTMV &&
5148            num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5149           (prediction_mode == NEARESTMV &&
5150            num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5151         continue;
5152     }
5153 
5154     txfm_info->skip_txfm = 0;
5155     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5156 
5157     // Select prediction reference frames.
5158     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5159     for (int i = 0; i < num_planes; i++) {
5160       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5161       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5162     }
5163 
5164     bool is_predictor_built = false;
5165 
5166     // Initialize RD stats
5167     RD_STATS rd_stats;
5168     RD_STATS rd_stats_y;
5169     RD_STATS rd_stats_uv;
5170     const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5171     int64_t skip_rd = INT64_MAX;
5172     if (cpi->sf.inter_sf.txfm_rd_gate_level) {
5173       // Check if the mode is good enough based on skip RD
5174       int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5175       skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5176       int eval_txfm =
5177           check_txfm_eval(x, bsize, search_state->best_skip_rd[0], skip_rd,
5178                           cpi->sf.inter_sf.txfm_rd_gate_level, 0);
5179       if (!eval_txfm) continue;
5180     }
5181 
5182     // Build the prediction for this mode
5183     if (!is_predictor_built) {
5184       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5185                                     av1_num_planes(cm) - 1);
5186     }
5187     if (mbmi->motion_mode == OBMC_CAUSAL) {
5188       av1_build_obmc_inter_predictors_sb(cm, xd);
5189     }
5190 
5191     num_tx_cands++;
5192     if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5193     num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5194     int64_t this_yrd = INT64_MAX;
5195     // Do the transform search
5196     if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5197                          mode_rate, search_state->best_rd)) {
5198       continue;
5199     } else {
5200       const int y_rate =
5201           rd_stats.skip_txfm
5202               ? mode_costs->skip_txfm_cost[skip_ctx][1]
5203               : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5204       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5205 
5206       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5207         inter_mode_data_push(
5208             tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5209             rd_stats_y.rate + rd_stats_uv.rate +
5210                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5211       }
5212     }
5213     rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5214     if (rd_stats.rdcost < best_rd_in_this_partition) {
5215       best_rd_in_this_partition = rd_stats.rdcost;
5216       *yrd = this_yrd;
5217     }
5218 
5219     const THR_MODES mode_enum = get_prediction_mode_idx(
5220         prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5221 
5222     // Collect mode stats for multiwinner mode processing
5223     const int txfm_search_done = 1;
5224     store_winner_mode_stats(
5225         &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5226         NULL, bsize, rd_stats.rdcost,
5227         cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5228 
5229     if (rd_stats.rdcost < search_state->best_rd) {
5230       update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5231                           &rd_stats_uv, mode_enum, x, txfm_search_done);
5232       search_state->best_skip_rd[0] = skip_rd;
5233       // Limit the total number of modes to be evaluated if the first is valid
5234       // and transform skip or compound
5235       if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5236         if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5237           // Evaluate more candidates at high quantizers where occurrence of
5238           // transform skip is high.
5239           const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5240           const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5241           num_inter_mode_cands =
5242               AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5243         } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5244           const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5245           // Evaluate more candidates at low quantizers where occurrence of
5246           // single reference mode is high.
5247           const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5248                                                 { 10, 7, 5, 3 } };
5249           const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5250           num_inter_mode_cands = AOMMIN(
5251               max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5252         }
5253       }
5254     }
5255     // If the number of candidates evaluated exceeds max_allowed_cands, break if
5256     // a newmv mode was evaluated already.
5257     if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5258   }
5259 }
5260 
5261 // Indicates number of winner simple translation modes to be used
5262 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5263 
5264 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5265 // speed feature. This list consists of modes that have only searched
5266 // SIMPLE_TRANSLATION. The final list will be used to search other motion
5267 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)5268 static void handle_winner_cand(
5269     MB_MODE_INFO *const mbmi,
5270     motion_mode_best_st_candidate *best_motion_mode_cands,
5271     int max_winner_motion_mode_cand, int64_t this_rd,
5272     motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5273   // Number of current motion mode candidates in list
5274   const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5275   int valid_motion_mode_cand_loc = num_motion_mode_cand;
5276 
5277   // find the best location to insert new motion mode candidate
5278   for (int j = 0; j < num_motion_mode_cand; j++) {
5279     if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5280       valid_motion_mode_cand_loc = j;
5281       break;
5282     }
5283   }
5284 
5285   // Insert motion mode if location is found
5286   if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5287     if (num_motion_mode_cand > 0 &&
5288         valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5289       memmove(
5290           &best_motion_mode_cands
5291                ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5292           &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5293           (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5294            valid_motion_mode_cand_loc) *
5295               sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5296     motion_mode_cand->mbmi = *mbmi;
5297     motion_mode_cand->rd_cost = this_rd;
5298     motion_mode_cand->skip_motion_mode = skip_motion_mode;
5299     best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5300         *motion_mode_cand;
5301     best_motion_mode_cands->num_motion_mode_cand =
5302         AOMMIN(max_winner_motion_mode_cand,
5303                best_motion_mode_cands->num_motion_mode_cand + 1);
5304   }
5305 }
5306 
5307 /*!\brief Search intra modes in interframes
5308  *
5309  * \ingroup intra_mode_search
5310  *
5311  * This function searches for the best intra mode when the current frame is an
5312  * interframe. This function however does *not* handle luma palette mode.
5313  * Palette mode is currently handled by \ref av1_search_palette_mode.
5314  *
5315  * This function will first iterate through the luma mode candidates to find the
5316  * best luma intra mode. Once the best luma mode it's found, it will then search
5317  * for the best chroma mode. Because palette mode is currently not handled by
5318  * here, a cache of uv mode is stored in
5319  * InterModeSearchState::intra_search_state so it can be reused later by \ref
5320  * av1_search_palette_mode.
5321  *
5322  * \param[in,out] search_state      Struct keep track of the prediction mode
5323  *                                  search state in interframe.
5324  *
5325  * \param[in]     cpi               Top-level encoder structure.
5326  * \param[in,out] x                 Pointer to struct holding all the data for
5327  *                                  the current prediction block.
5328  * \param[out]    rd_cost           Stores the best rd_cost among all the
5329  *                                  prediction modes searched.
5330  * \param[in]     bsize             Current block size.
5331  * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5332  *                                  copy the tx_type and txfm_skip arrays.
5333  *                                  for only the Y plane.
5334  * \param[in]     sf_args           Stores the list of intra mode candidates
5335  *                                  to be searched.
5336  * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5337  *                                      current ref frame is an intra frame.
5338  * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5339  *                                  terminate chroma intra mode search.
5340  *
5341  * \remark If a new best mode is found, search_state and rd_costs are updated
5342  * correspondingly. While x is also modified, it is only used as a temporary
5343  * buffer, and the final decisions are stored in search_state.
5344  */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,const InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5345 static AOM_INLINE void search_intra_modes_in_interframe(
5346     InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5347     RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5348     const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5349     int64_t yrd_threshold) {
5350   const AV1_COMMON *const cm = &cpi->common;
5351   const SPEED_FEATURES *const sf = &cpi->sf;
5352   const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5353   MACROBLOCKD *const xd = &x->e_mbd;
5354   MB_MODE_INFO *const mbmi = xd->mi[0];
5355   IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5356 
5357   int is_best_y_mode_intra = 0;
5358   RD_STATS best_intra_rd_stats_y;
5359   int64_t best_rd_y = INT64_MAX;
5360   int best_mode_cost_y = -1;
5361   MB_MODE_INFO best_mbmi = *xd->mi[0];
5362   THR_MODES best_mode_enum = THR_INVALID;
5363   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5364   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5365   const int num_4x4 = bsize_to_num_blk(bsize);
5366 
5367   // Performs luma search
5368   int64_t best_model_rd = INT64_MAX;
5369   int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5370   for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5371     top_intra_model_rd[i] = INT64_MAX;
5372   }
5373   for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5374     if (sf->intra_sf.skip_intra_in_interframe &&
5375         search_state->intra_search_state.skip_intra_modes)
5376       break;
5377     set_y_mode_and_delta_angle(
5378         mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5379     assert(mbmi->mode < INTRA_MODE_END);
5380 
5381     // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5382     if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5383       continue;
5384 
5385     const THR_MODES mode_enum =
5386         get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5387     if ((!intra_mode_cfg->enable_smooth_intra ||
5388          cpi->sf.intra_sf.disable_smooth_intra) &&
5389         (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5390          mbmi->mode == SMOOTH_V_PRED))
5391       continue;
5392     if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5393       continue;
5394     if (av1_is_directional_mode(mbmi->mode) &&
5395         !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5396         mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5397       continue;
5398     const PREDICTION_MODE this_mode = mbmi->mode;
5399 
5400     assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5401     assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5402     init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5403     x->txfm_search_info.skip_txfm = 0;
5404 
5405     if (this_mode != DC_PRED) {
5406       // Only search the oblique modes if the best so far is
5407       // one of the neighboring directional modes
5408       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5409           (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5410         if (search_state->best_mode_index != THR_INVALID &&
5411             search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5412           continue;
5413       }
5414       if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5415         if (conditional_skipintra(
5416                 this_mode, search_state->intra_search_state.best_intra_mode))
5417           continue;
5418       }
5419     }
5420 
5421     RD_STATS intra_rd_stats_y;
5422     int mode_cost_y;
5423     int64_t intra_rd_y = INT64_MAX;
5424     const int is_luma_result_valid = av1_handle_intra_y_mode(
5425         intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5426         &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5427         &best_model_rd, top_intra_model_rd);
5428     if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5429       is_best_y_mode_intra = 1;
5430       if (intra_rd_y < best_rd_y) {
5431         best_intra_rd_stats_y = intra_rd_stats_y;
5432         best_mode_cost_y = mode_cost_y;
5433         best_rd_y = intra_rd_y;
5434         best_mbmi = *mbmi;
5435         best_mode_enum = mode_enum;
5436         memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5437                sizeof(best_blk_skip[0]) * num_4x4);
5438         av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5439       }
5440     }
5441   }
5442 
5443   if (!is_best_y_mode_intra) {
5444     return;
5445   }
5446 
5447   assert(best_rd_y < INT64_MAX);
5448 
5449   // Restores the best luma mode
5450   *mbmi = best_mbmi;
5451   memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5452          sizeof(best_blk_skip[0]) * num_4x4);
5453   av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5454 
5455   // Performs chroma search
5456   RD_STATS intra_rd_stats, intra_rd_stats_uv;
5457   av1_init_rd_stats(&intra_rd_stats);
5458   av1_init_rd_stats(&intra_rd_stats_uv);
5459   const int num_planes = av1_num_planes(cm);
5460   if (num_planes > 1) {
5461     const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5462         intra_search_state, cpi, x, bsize, &intra_rd_stats,
5463         &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5464 
5465     if (!intra_uv_mode_valid) {
5466       return;
5467     }
5468   }
5469 
5470   // Merge the luma and chroma rd stats
5471   assert(best_mode_cost_y >= 0);
5472   intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5473   if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5474     // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5475     // in the tokenonly rate, but for intra blocks, tx_size is always coded
5476     // (prediction granularity), so we account for it in the full rate,
5477     // not the tokenonly rate.
5478     best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5479   }
5480 
5481   const ModeCosts *mode_costs = &x->mode_costs;
5482   const PREDICTION_MODE mode = mbmi->mode;
5483   if (num_planes > 1 && xd->is_chroma_ref) {
5484     const int uv_mode_cost =
5485         mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5486     intra_rd_stats.rate +=
5487         intra_rd_stats_uv.rate +
5488         intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5489   }
5490 
5491   // Intra block is always coded as non-skip
5492   intra_rd_stats.skip_txfm = 0;
5493   intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5494   // Add in the cost of the no skip flag.
5495   const int skip_ctx = av1_get_skip_txfm_context(xd);
5496   intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5497   // Calculate the final RD estimate for this mode.
5498   const int64_t this_rd =
5499       RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5500   // Keep record of best intra rd
5501   if (this_rd < search_state->best_intra_rd) {
5502     search_state->best_intra_rd = this_rd;
5503     intra_search_state->best_intra_mode = mode;
5504   }
5505 
5506   for (int i = 0; i < REFERENCE_MODES; ++i) {
5507     search_state->best_pred_rd[i] =
5508         AOMMIN(search_state->best_pred_rd[i], this_rd);
5509   }
5510 
5511   intra_rd_stats.rdcost = this_rd;
5512 
5513   // Collect mode stats for multiwinner mode processing
5514   const int txfm_search_done = 1;
5515   store_winner_mode_stats(
5516       &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5517       &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5518       cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5519   if (intra_rd_stats.rdcost < search_state->best_rd) {
5520     update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5521                         &best_intra_rd_stats_y, &intra_rd_stats_uv,
5522                         best_mode_enum, x, txfm_search_done);
5523   }
5524 }
5525 
5526 #if !CONFIG_REALTIME_ONLY
5527 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5528 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5529 static AOM_INLINE void calculate_cost_from_tpl_data(
5530     const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
5531     int mi_col, int64_t *inter_cost, int64_t *intra_cost) {
5532   const AV1_COMMON *const cm = &cpi->common;
5533   // Only consider full SB.
5534   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5535   const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5536   const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5537                   (block_size_high[sb_size] / tpl_bsize_1d);
5538   SuperBlockEnc *sb_enc = &x->sb_enc;
5539   if (sb_enc->tpl_data_count == len) {
5540     const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5541     const int tpl_stride = sb_enc->tpl_stride;
5542     const int tplw = mi_size_wide[tpl_bsize];
5543     const int tplh = mi_size_high[tpl_bsize];
5544     const int nw = mi_size_wide[bsize] / tplw;
5545     const int nh = mi_size_high[bsize] / tplh;
5546     if (nw >= 1 && nh >= 1) {
5547       const int of_h = mi_row % mi_size_high[sb_size];
5548       const int of_w = mi_col % mi_size_wide[sb_size];
5549       const int start = of_h / tplh * tpl_stride + of_w / tplw;
5550 
5551       for (int k = 0; k < nh; k++) {
5552         for (int l = 0; l < nw; l++) {
5553           *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5554           *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5555         }
5556       }
5557       *inter_cost /= nw * nh;
5558       *intra_cost /= nw * nh;
5559     }
5560   }
5561 }
5562 #endif  // !CONFIG_REALTIME_ONLY
5563 
5564 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5565 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,const SPEED_FEATURES * const sf,int64_t inter_cost,int64_t intra_cost)5566 static AOM_INLINE void skip_intra_modes_in_interframe(
5567     AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5568     InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5569     int64_t inter_cost, int64_t intra_cost) {
5570   MACROBLOCKD *const xd = &x->e_mbd;
5571   const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5572   if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5573       bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5574     const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5575     const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5576     if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5577         x->source_variance > 128) {
5578       search_state->intra_search_state.skip_intra_modes = 1;
5579       return;
5580     }
5581   }
5582 
5583   const unsigned int src_var_thresh_intra_skip = 1;
5584   const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5585   if (!(skip_intra_in_interframe &&
5586         (x->source_variance > src_var_thresh_intra_skip)))
5587     return;
5588 
5589   // Prune intra search based on best inter mode being transfrom skip.
5590   if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5591     const int qindex_thresh[2] = { 200, MAXQ };
5592     const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5593     if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5594         (x->qindex <= qindex_thresh[ind])) {
5595       search_state->intra_search_state.skip_intra_modes = 1;
5596       return;
5597     } else if ((skip_intra_in_interframe >= 4) &&
5598                (inter_cost < 0 || intra_cost < 0)) {
5599       search_state->intra_search_state.skip_intra_modes = 1;
5600       return;
5601     }
5602   }
5603   // Use ML model to prune intra search.
5604   if (inter_cost >= 0 && intra_cost >= 0) {
5605     const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5606                                      ? &av1_intrap_nn_config
5607                                      : &av1_intrap_hd_nn_config;
5608     float nn_features[6];
5609     float scores[2] = { 0.0f };
5610 
5611     nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5612     nn_features[1] = (float)mi_size_wide_log2[bsize];
5613     nn_features[2] = (float)mi_size_high_log2[bsize];
5614     nn_features[3] = (float)intra_cost;
5615     nn_features[4] = (float)inter_cost;
5616     const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5617     const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5618     nn_features[5] = (float)(ac_q_max / ac_q);
5619 
5620     av1_nn_predict(nn_features, nn_config, 1, scores);
5621 
5622     // For two parameters, the max prob returned from av1_nn_softmax equals
5623     // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5624     // calling of av1_nn_softmax.
5625     const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5626     assert(skip_intra_in_interframe <= 5);
5627     if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5628       search_state->intra_search_state.skip_intra_modes = 1;
5629     }
5630   }
5631 }
5632 
get_block_temp_var(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)5633 static AOM_INLINE int get_block_temp_var(const AV1_COMP *cpi,
5634                                          const MACROBLOCK *x,
5635                                          BLOCK_SIZE bsize) {
5636   const AV1_COMMON *const cm = &cpi->common;
5637   const SPEED_FEATURES *const sf = &cpi->sf;
5638 
5639   if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5640       !sf->rt_sf.short_circuit_low_temp_var ||
5641       !sf->rt_sf.prune_inter_modes_using_temp_var) {
5642     return 0;
5643   }
5644 
5645   const int mi_row = x->e_mbd.mi_row;
5646   const int mi_col = x->e_mbd.mi_col;
5647   int is_low_temp_var = 0;
5648 
5649   if (cm->seq_params->sb_size == BLOCK_64X64)
5650     is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5651         &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5652   else
5653     is_low_temp_var = av1_get_force_skip_low_temp_var(
5654         &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5655 
5656   return is_low_temp_var;
5657 }
5658 
5659 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5660 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5661                             struct macroblock *x, struct RD_STATS *rd_cost,
5662                             BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5663                             int64_t best_rd_so_far) {
5664   AV1_COMMON *const cm = &cpi->common;
5665   const FeatureFlags *const features = &cm->features;
5666   const int num_planes = av1_num_planes(cm);
5667   const SPEED_FEATURES *const sf = &cpi->sf;
5668   MACROBLOCKD *const xd = &x->e_mbd;
5669   MB_MODE_INFO *const mbmi = xd->mi[0];
5670   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5671   int i;
5672   const ModeCosts *mode_costs = &x->mode_costs;
5673   const int *comp_inter_cost =
5674       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5675 
5676   InterModeSearchState search_state;
5677   init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5678   INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5679     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5680     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5681   };
5682   HandleInterModeArgs args = { { NULL },
5683                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5684                                { NULL },
5685                                { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5686                                  MAX_SB_SIZE >> 1 },
5687                                NULL,
5688                                NULL,
5689                                NULL,
5690                                search_state.modelled_rd,
5691                                INT_MAX,
5692                                INT_MAX,
5693                                search_state.simple_rd,
5694                                0,
5695                                interintra_modes,
5696                                { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5697                                { { 0, 0 } },
5698                                0,
5699                                0,
5700                                -1,
5701                                -1,
5702                                -1,
5703                                { 0 },
5704                                { 0 },
5705                                UINT_MAX };
5706   // Currently, is_low_temp_var is used in real time encoding.
5707   const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5708 
5709   for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5710   // Indicates the appropriate number of simple translation winner modes for
5711   // exhaustive motion mode evaluation
5712   const int max_winner_motion_mode_cand =
5713       num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
5714   assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5715   motion_mode_candidate motion_mode_cand;
5716   motion_mode_best_st_candidate best_motion_mode_cands;
5717   // Initializing the number of motion mode candidates to zero.
5718   best_motion_mode_cands.num_motion_mode_cand = 0;
5719   for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5720     best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5721 
5722   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5723 
5724   av1_invalid_rd_stats(rd_cost);
5725 
5726   for (i = 0; i < REF_FRAMES; ++i) {
5727     x->warp_sample_info[i].num = -1;
5728   }
5729 
5730   // Ref frames that are selected by square partition blocks.
5731   int picked_ref_frames_mask = 0;
5732   if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
5733       mbmi->partition != PARTITION_NONE) {
5734     // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5735     // partition blocks. prune_ref_frame_for_rect_partitions >=2
5736     // implies prune for vert, horiz and extended partition blocks.
5737     if ((mbmi->partition != PARTITION_VERT &&
5738          mbmi->partition != PARTITION_HORZ) ||
5739         sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5740       picked_ref_frames_mask =
5741           fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5742     }
5743   }
5744 
5745 #if CONFIG_COLLECT_COMPONENT_TIMING
5746   start_timing(cpi, set_params_rd_pick_inter_mode_time);
5747 #endif
5748   // Skip ref frames that never selected by square blocks.
5749   const int skip_ref_frame_mask =
5750       picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5751   mode_skip_mask_t mode_skip_mask;
5752   unsigned int ref_costs_single[REF_FRAMES];
5753   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5754   struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5755   // init params, set frame modes, speed features
5756   set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5757                                 skip_ref_frame_mask, ref_costs_single,
5758                                 ref_costs_comp, yv12_mb);
5759 #if CONFIG_COLLECT_COMPONENT_TIMING
5760   end_timing(cpi, set_params_rd_pick_inter_mode_time);
5761 #endif
5762 
5763   int64_t best_est_rd = INT64_MAX;
5764   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5765   // If do_tx_search is 0, only estimated RD should be computed.
5766   // If do_tx_search is 1, all modes have TX search performed.
5767   const int do_tx_search =
5768       !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5769         (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
5770          num_pels_log2_lookup[bsize] > 8));
5771   InterModesInfo *inter_modes_info = x->inter_modes_info;
5772   inter_modes_info->num = 0;
5773 
5774   // Temporary buffers used by handle_inter_mode().
5775   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5776 
5777   // The best RD found for the reference frame, among single reference modes.
5778   // Note that the 0-th element will contain a cut-off that is later used
5779   // to determine if we should skip a compound mode.
5780   int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5781                                        INT64_MAX, INT64_MAX, INT64_MAX,
5782                                        INT64_MAX, INT64_MAX };
5783 
5784   // Prepared stats used later to check if we could skip intra mode eval.
5785   int64_t inter_cost = -1;
5786   int64_t intra_cost = -1;
5787   // Need to tweak the threshold for hdres speed 0 & 1.
5788   const int mi_row = xd->mi_row;
5789   const int mi_col = xd->mi_col;
5790 
5791   // Obtain the relevant tpl stats for pruning inter modes
5792   PruneInfoFromTpl inter_cost_info_from_tpl;
5793 #if !CONFIG_REALTIME_ONLY
5794   if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
5795     // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5796     // prune_ref_by_selective_ref_frame()
5797     // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
5798     // prune_ref_by_selective_ref_frame()
5799     // Populating valid_refs[idx] = 1 ensures that
5800     // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5801     // pruned ref frame.
5802     int valid_refs[INTER_REFS_PER_FRAME];
5803     for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5804       const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5805       valid_refs[frame - 1] =
5806           x->tpl_keep_ref_frame[frame] ||
5807           !prune_ref_by_selective_ref_frame(
5808               cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5809     }
5810     av1_zero(inter_cost_info_from_tpl);
5811     get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5812                               &inter_cost_info_from_tpl);
5813   }
5814 
5815   const int do_pruning =
5816       (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5817   if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5818       cpi->oxcf.algo_cfg.enable_tpl_model)
5819     calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5820                                  &intra_cost);
5821 #endif  // !CONFIG_REALTIME_ONLY
5822 
5823   // Initialize best mode stats for winner mode processing.
5824   const int max_winner_mode_count =
5825       winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
5826   zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
5827   x->winner_mode_count = 0;
5828   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5829                           NULL, bsize, best_rd_so_far,
5830                           sf->winner_mode_sf.multi_winner_mode_type, 0);
5831 
5832   int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5833   if (sf->inter_sf.prune_inter_modes_if_skippable) {
5834     // Higher multiplication factor values for lower quantizers.
5835     mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5836   }
5837 
5838   // Initialize arguments for mode loop speed features
5839   InterModeSFArgs sf_args = { &args.skip_motion_mode,
5840                               &mode_skip_mask,
5841                               &search_state,
5842                               skip_ref_frame_mask,
5843                               0,
5844                               mode_thresh_mul_fact,
5845                               0,
5846                               0 };
5847   int64_t best_inter_yrd = INT64_MAX;
5848 
5849   // This is the main loop of this function. It loops over all possible inter
5850   // modes and calls handle_inter_mode() to compute the RD for each.
5851   // Here midx is just an iterator index that should not be used by itself
5852   // except to keep track of the number of modes searched. It should be used
5853   // with av1_default_mode_order to get the enum that defines the mode, which
5854   // can be used with av1_mode_defs to get the prediction mode and the ref
5855   // frames.
5856   // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5857   // good speedup for real time case. If we decide to use compound mode in real
5858   // time, maybe we can modify av1_default_mode_order table.
5859   THR_MODES mode_start = THR_INTER_MODE_START;
5860   THR_MODES mode_end = THR_INTER_MODE_END;
5861   const CurrentFrame *const current_frame = &cm->current_frame;
5862   if (current_frame->reference_mode == SINGLE_REFERENCE) {
5863     mode_start = SINGLE_REF_MODE_START;
5864     mode_end = SINGLE_REF_MODE_END;
5865   }
5866 
5867   for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5868     // Get the actual prediction mode we are trying in this iteration
5869     const THR_MODES mode_enum = av1_default_mode_order[midx];
5870     const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5871     const PREDICTION_MODE this_mode = mode_def->mode;
5872     const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5873 
5874     const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5875     const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5876     const int is_single_pred =
5877         ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5878     const int comp_pred = second_ref_frame > INTRA_FRAME;
5879 
5880     init_mbmi(mbmi, this_mode, ref_frames, cm);
5881 
5882     txfm_info->skip_txfm = 0;
5883     sf_args.num_single_modes_processed += is_single_pred;
5884     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5885 #if CONFIG_COLLECT_COMPONENT_TIMING
5886     start_timing(cpi, skip_inter_mode_time);
5887 #endif
5888     // Apply speed features to decide if this inter mode can be skipped
5889     const int is_skip_inter_mode = skip_inter_mode(
5890         cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5891 #if CONFIG_COLLECT_COMPONENT_TIMING
5892     end_timing(cpi, skip_inter_mode_time);
5893 #endif
5894     if (is_skip_inter_mode) continue;
5895 
5896     // Select prediction reference frames.
5897     for (i = 0; i < num_planes; i++) {
5898       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5899       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5900     }
5901 
5902     mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5903     mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5904     mbmi->filter_intra_mode_info.use_filter_intra = 0;
5905     mbmi->ref_mv_idx = 0;
5906 
5907     const int64_t ref_best_rd = search_state.best_rd;
5908     RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5909     av1_init_rd_stats(&rd_stats);
5910 
5911     const int ref_frame_cost = comp_pred
5912                                    ? ref_costs_comp[ref_frame][second_ref_frame]
5913                                    : ref_costs_single[ref_frame];
5914     const int compmode_cost =
5915         is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
5916     const int real_compmode_cost =
5917         cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
5918             ? compmode_cost
5919             : 0;
5920     // Point to variables that are maintained between loop iterations
5921     args.single_newmv = search_state.single_newmv;
5922     args.single_newmv_rate = search_state.single_newmv_rate;
5923     args.single_newmv_valid = search_state.single_newmv_valid;
5924     args.single_comp_cost = real_compmode_cost;
5925     args.ref_frame_cost = ref_frame_cost;
5926     args.best_pred_sse = search_state.best_pred_sse;
5927 
5928     int64_t skip_rd[2] = { search_state.best_skip_rd[0],
5929                            search_state.best_skip_rd[1] };
5930     int64_t this_yrd = INT64_MAX;
5931 #if CONFIG_COLLECT_COMPONENT_TIMING
5932     start_timing(cpi, handle_inter_mode_time);
5933 #endif
5934     int64_t this_rd = handle_inter_mode(
5935         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
5936         ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
5937         inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
5938         &this_yrd);
5939 #if CONFIG_COLLECT_COMPONENT_TIMING
5940     end_timing(cpi, handle_inter_mode_time);
5941 #endif
5942     if (current_frame->reference_mode != SINGLE_REFERENCE) {
5943       if (sf->inter_sf.prune_comp_search_by_single_result > 0 &&
5944           is_inter_singleref_mode(this_mode)) {
5945         collect_single_states(x, &search_state, mbmi);
5946       }
5947 
5948       if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
5949           is_inter_singleref_mode(this_mode))
5950         update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
5951     }
5952 
5953     if (this_rd == INT64_MAX) continue;
5954 
5955     if (mbmi->skip_txfm) {
5956       rd_stats_y.rate = 0;
5957       rd_stats_uv.rate = 0;
5958     }
5959 
5960     if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
5961         this_rd < ref_frame_rd[ref_frame]) {
5962       ref_frame_rd[ref_frame] = this_rd;
5963     }
5964 
5965     // Did this mode help, i.e., is it the new best mode
5966     if (this_rd < search_state.best_rd) {
5967       assert(IMPLIES(comp_pred,
5968                      cm->current_frame.reference_mode != SINGLE_REFERENCE));
5969       search_state.best_pred_sse = x->pred_sse[ref_frame];
5970       best_inter_yrd = this_yrd;
5971       update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5972                           &rd_stats_uv, mode_enum, x, do_tx_search);
5973       if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
5974       search_state.best_skip_rd[1] = skip_rd[1];
5975     }
5976     if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
5977       // Add this mode to motion mode candidate list for motion mode search
5978       // if using motion_mode_for_winner_cand speed feature
5979       handle_winner_cand(mbmi, &best_motion_mode_cands,
5980                          max_winner_motion_mode_cand, this_rd,
5981                          &motion_mode_cand, args.skip_motion_mode);
5982     }
5983 
5984     /* keep record of best compound/single-only prediction */
5985     record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
5986                          x->rdmult, &search_state, compmode_cost);
5987   }
5988 
5989 #if CONFIG_COLLECT_COMPONENT_TIMING
5990   start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5991 #endif
5992   if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
5993     // For the single ref winner candidates, evaluate other motion modes (non
5994     // simple translation).
5995     evaluate_motion_mode_for_winner_candidates(
5996         cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
5997         &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
5998         &search_state, &best_inter_yrd);
5999   }
6000 #if CONFIG_COLLECT_COMPONENT_TIMING
6001   end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6002 #endif
6003 
6004 #if CONFIG_COLLECT_COMPONENT_TIMING
6005   start_timing(cpi, do_tx_search_time);
6006 #endif
6007   if (do_tx_search != 1) {
6008     // A full tx search has not yet been done, do tx search for
6009     // top mode candidates
6010     tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6011                                     yv12_mb, mi_row, mi_col, &search_state,
6012                                     rd_cost, ctx, &best_inter_yrd);
6013   }
6014 #if CONFIG_COLLECT_COMPONENT_TIMING
6015   end_timing(cpi, do_tx_search_time);
6016 #endif
6017 
6018 #if CONFIG_COLLECT_COMPONENT_TIMING
6019   start_timing(cpi, handle_intra_mode_time);
6020 #endif
6021   // Gate intra mode evaluation if best of inter is skip except when source
6022   // variance is extremely low and also based on max intra bsize.
6023   skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6024                                  intra_cost);
6025 
6026   const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6027   search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6028                                    &sf_args, intra_ref_frame_cost,
6029                                    best_inter_yrd);
6030 #if CONFIG_COLLECT_COMPONENT_TIMING
6031   end_timing(cpi, handle_intra_mode_time);
6032 #endif
6033 
6034 #if CONFIG_COLLECT_COMPONENT_TIMING
6035   start_timing(cpi, refine_winner_mode_tx_time);
6036 #endif
6037   int winner_mode_count =
6038       sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6039   // In effect only when fast tx search speed features are enabled.
6040   refine_winner_mode_tx(
6041       cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6042       &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6043       search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6044 #if CONFIG_COLLECT_COMPONENT_TIMING
6045   end_timing(cpi, refine_winner_mode_tx_time);
6046 #endif
6047 
6048   // Initialize default mode evaluation params
6049   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6050 
6051   // Only try palette mode when the best mode so far is an intra mode.
6052   const int try_palette =
6053       cpi->oxcf.tool_cfg.enable_palette &&
6054       av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6055       !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6056   RD_STATS this_rd_cost;
6057   int this_skippable = 0;
6058   if (try_palette) {
6059 #if CONFIG_COLLECT_COMPONENT_TIMING
6060     start_timing(cpi, av1_search_palette_mode_time);
6061 #endif
6062     this_skippable = av1_search_palette_mode(
6063         &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6064         ctx, &this_rd_cost, search_state.best_rd);
6065 #if CONFIG_COLLECT_COMPONENT_TIMING
6066     end_timing(cpi, av1_search_palette_mode_time);
6067 #endif
6068     if (this_rd_cost.rdcost < search_state.best_rd) {
6069       search_state.best_mode_index = THR_DC;
6070       mbmi->mv[0].as_int = 0;
6071       rd_cost->rate = this_rd_cost.rate;
6072       rd_cost->dist = this_rd_cost.dist;
6073       rd_cost->rdcost = this_rd_cost.rdcost;
6074       search_state.best_rd = rd_cost->rdcost;
6075       search_state.best_mbmode = *mbmi;
6076       search_state.best_skip2 = 0;
6077       search_state.best_mode_skippable = this_skippable;
6078       memcpy(ctx->blk_skip, txfm_info->blk_skip,
6079              sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6080       av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6081     }
6082   }
6083 
6084   search_state.best_mbmode.skip_mode = 0;
6085   if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6086       is_comp_ref_allowed(bsize)) {
6087     const struct segmentation *const seg = &cm->seg;
6088     unsigned char segment_id = mbmi->segment_id;
6089     if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6090       rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6091     }
6092   }
6093 
6094   // Make sure that the ref_mv_idx is only nonzero when we're
6095   // using a mode which can support ref_mv_idx
6096   if (search_state.best_mbmode.ref_mv_idx != 0 &&
6097       !(search_state.best_mbmode.mode == NEWMV ||
6098         search_state.best_mbmode.mode == NEW_NEWMV ||
6099         have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6100     search_state.best_mbmode.ref_mv_idx = 0;
6101   }
6102 
6103   if (search_state.best_mode_index == THR_INVALID ||
6104       search_state.best_rd >= best_rd_so_far) {
6105     rd_cost->rate = INT_MAX;
6106     rd_cost->rdcost = INT64_MAX;
6107     return;
6108   }
6109 
6110   const InterpFilter interp_filter = features->interp_filter;
6111   assert((interp_filter == SWITCHABLE) ||
6112          (interp_filter ==
6113           search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6114          !is_inter_block(&search_state.best_mbmode));
6115   assert((interp_filter == SWITCHABLE) ||
6116          (interp_filter ==
6117           search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6118          !is_inter_block(&search_state.best_mbmode));
6119 
6120   if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6121     av1_update_rd_thresh_fact(
6122         cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6123         search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6124   }
6125 
6126   // macroblock modes
6127   *mbmi = search_state.best_mbmode;
6128   txfm_info->skip_txfm |= search_state.best_skip2;
6129 
6130   // Note: this section is needed since the mode may have been forced to
6131   // GLOBALMV by the all-zero mode handling of ref-mv.
6132   if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6133     // Correct the interp filters for GLOBALMV
6134     if (is_nontrans_global_motion(xd, xd->mi[0])) {
6135       int_interpfilters filters =
6136           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6137       assert(mbmi->interp_filters.as_int == filters.as_int);
6138       (void)filters;
6139     }
6140   }
6141 
6142   txfm_info->skip_txfm |= search_state.best_mode_skippable;
6143 
6144   assert(search_state.best_mode_index != THR_INVALID);
6145 
6146 #if CONFIG_INTERNAL_STATS
6147   store_coding_context(x, ctx, search_state.best_mode_index,
6148                        search_state.best_mode_skippable);
6149 #else
6150   store_coding_context(x, ctx, search_state.best_mode_skippable);
6151 #endif  // CONFIG_INTERNAL_STATS
6152 
6153   if (mbmi->palette_mode_info.palette_size[1] > 0) {
6154     assert(try_palette);
6155     av1_restore_uv_color_map(cpi, x);
6156   }
6157 }
6158 
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)6159 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6160                                         TileDataEnc *tile_data, MACROBLOCK *x,
6161                                         int mi_row, int mi_col,
6162                                         RD_STATS *rd_cost, BLOCK_SIZE bsize,
6163                                         PICK_MODE_CONTEXT *ctx,
6164                                         int64_t best_rd_so_far) {
6165   const AV1_COMMON *const cm = &cpi->common;
6166   const FeatureFlags *const features = &cm->features;
6167   MACROBLOCKD *const xd = &x->e_mbd;
6168   MB_MODE_INFO *const mbmi = xd->mi[0];
6169   unsigned char segment_id = mbmi->segment_id;
6170   const int comp_pred = 0;
6171   int i;
6172   unsigned int ref_costs_single[REF_FRAMES];
6173   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6174   const ModeCosts *mode_costs = &x->mode_costs;
6175   const int *comp_inter_cost =
6176       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6177   InterpFilter best_filter = SWITCHABLE;
6178   int64_t this_rd = INT64_MAX;
6179   int rate2 = 0;
6180   const int64_t distortion2 = 0;
6181   (void)mi_row;
6182   (void)mi_col;
6183   (void)tile_data;
6184 
6185   av1_collect_neighbors_ref_counts(xd);
6186 
6187   estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6188                            ref_costs_comp);
6189 
6190   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6191   for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6192 
6193   rd_cost->rate = INT_MAX;
6194 
6195   assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6196 
6197   mbmi->palette_mode_info.palette_size[0] = 0;
6198   mbmi->palette_mode_info.palette_size[1] = 0;
6199   mbmi->filter_intra_mode_info.use_filter_intra = 0;
6200   mbmi->mode = GLOBALMV;
6201   mbmi->motion_mode = SIMPLE_TRANSLATION;
6202   mbmi->uv_mode = UV_DC_PRED;
6203   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6204     mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6205   else
6206     mbmi->ref_frame[0] = LAST_FRAME;
6207   mbmi->ref_frame[1] = NONE_FRAME;
6208   mbmi->mv[0].as_int =
6209       gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6210                            features->allow_high_precision_mv, bsize, mi_col,
6211                            mi_row, features->cur_frame_force_integer_mv)
6212           .as_int;
6213   mbmi->tx_size = max_txsize_lookup[bsize];
6214   x->txfm_search_info.skip_txfm = 1;
6215 
6216   mbmi->ref_mv_idx = 0;
6217 
6218   mbmi->motion_mode = SIMPLE_TRANSLATION;
6219   av1_count_overlappable_neighbors(cm, xd);
6220   if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6221     int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6222     mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6223     // Select the samples according to motion vector difference
6224     if (mbmi->num_proj_ref > 1) {
6225       mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6226                                              mbmi->num_proj_ref, bsize);
6227     }
6228   }
6229 
6230   const InterpFilter interp_filter = features->interp_filter;
6231   set_default_interp_filters(mbmi, interp_filter);
6232 
6233   if (interp_filter != SWITCHABLE) {
6234     best_filter = interp_filter;
6235   } else {
6236     best_filter = EIGHTTAP_REGULAR;
6237     if (av1_is_interp_needed(xd)) {
6238       int rs;
6239       int best_rs = INT_MAX;
6240       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6241         mbmi->interp_filters = av1_broadcast_interp_filter(i);
6242         rs = av1_get_switchable_rate(x, xd, interp_filter,
6243                                      cm->seq_params->enable_dual_filter);
6244         if (rs < best_rs) {
6245           best_rs = rs;
6246           best_filter = mbmi->interp_filters.as_filters.y_filter;
6247         }
6248       }
6249     }
6250   }
6251   // Set the appropriate filter
6252   mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6253   rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6254                                    cm->seq_params->enable_dual_filter);
6255 
6256   if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6257     rate2 += comp_inter_cost[comp_pred];
6258 
6259   // Estimate the reference frame signaling cost and add it
6260   // to the rolling cost variable.
6261   rate2 += ref_costs_single[LAST_FRAME];
6262   this_rd = RDCOST(x->rdmult, rate2, distortion2);
6263 
6264   rd_cost->rate = rate2;
6265   rd_cost->dist = distortion2;
6266   rd_cost->rdcost = this_rd;
6267 
6268   if (this_rd >= best_rd_so_far) {
6269     rd_cost->rate = INT_MAX;
6270     rd_cost->rdcost = INT64_MAX;
6271     return;
6272   }
6273 
6274   assert((interp_filter == SWITCHABLE) ||
6275          (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6276 
6277   if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6278     av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6279                               cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6280                               THR_GLOBALMV, THR_INTER_MODE_START,
6281                               THR_INTER_MODE_END, THR_DC, MAX_MODES);
6282   }
6283 
6284 #if CONFIG_INTERNAL_STATS
6285   store_coding_context(x, ctx, THR_GLOBALMV, 0);
6286 #else
6287   store_coding_context(x, ctx, 0);
6288 #endif  // CONFIG_INTERNAL_STATS
6289 }
6290 
6291 /*!\cond */
6292 struct calc_target_weighted_pred_ctxt {
6293   const OBMCBuffer *obmc_buffer;
6294   const uint8_t *tmp;
6295   int tmp_stride;
6296   int overlap;
6297 };
6298 /*!\endcond */
6299 
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6300 static INLINE void calc_target_weighted_pred_above(
6301     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6302     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6303   (void)nb_mi;
6304   (void)num_planes;
6305   (void)rel_mi_row;
6306   (void)dir;
6307 
6308   struct calc_target_weighted_pred_ctxt *ctxt =
6309       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6310 
6311   const int bw = xd->width << MI_SIZE_LOG2;
6312   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6313 
6314   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6315   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6316   const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6317   const int is_hbd = is_cur_buf_hbd(xd);
6318 
6319   if (!is_hbd) {
6320     for (int row = 0; row < ctxt->overlap; ++row) {
6321       const uint8_t m0 = mask1d[row];
6322       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6323       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6324         wsrc[col] = m1 * tmp[col];
6325         mask[col] = m0;
6326       }
6327       wsrc += bw;
6328       mask += bw;
6329       tmp += ctxt->tmp_stride;
6330     }
6331   } else {
6332     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6333 
6334     for (int row = 0; row < ctxt->overlap; ++row) {
6335       const uint8_t m0 = mask1d[row];
6336       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6337       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6338         wsrc[col] = m1 * tmp16[col];
6339         mask[col] = m0;
6340       }
6341       wsrc += bw;
6342       mask += bw;
6343       tmp16 += ctxt->tmp_stride;
6344     }
6345   }
6346 }
6347 
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6348 static INLINE void calc_target_weighted_pred_left(
6349     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6350     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6351   (void)nb_mi;
6352   (void)num_planes;
6353   (void)rel_mi_col;
6354   (void)dir;
6355 
6356   struct calc_target_weighted_pred_ctxt *ctxt =
6357       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6358 
6359   const int bw = xd->width << MI_SIZE_LOG2;
6360   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6361 
6362   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6363   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6364   const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6365   const int is_hbd = is_cur_buf_hbd(xd);
6366 
6367   if (!is_hbd) {
6368     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6369       for (int col = 0; col < ctxt->overlap; ++col) {
6370         const uint8_t m0 = mask1d[col];
6371         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6372         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6373                     (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6374         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6375       }
6376       wsrc += bw;
6377       mask += bw;
6378       tmp += ctxt->tmp_stride;
6379     }
6380   } else {
6381     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6382 
6383     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6384       for (int col = 0; col < ctxt->overlap; ++col) {
6385         const uint8_t m0 = mask1d[col];
6386         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6387         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6388                     (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6389         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6390       }
6391       wsrc += bw;
6392       mask += bw;
6393       tmp16 += ctxt->tmp_stride;
6394     }
6395   }
6396 }
6397 
6398 // This function has a structure similar to av1_build_obmc_inter_prediction
6399 //
6400 // The OBMC predictor is computed as:
6401 //
6402 //  PObmc(x,y) =
6403 //    AOM_BLEND_A64(Mh(x),
6404 //                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6405 //                  PLeft(x, y))
6406 //
6407 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6408 // rounding, this can be written as:
6409 //
6410 //  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6411 //    Mh(x) * Mv(y) * P(x,y) +
6412 //      Mh(x) * Cv(y) * Pabove(x,y) +
6413 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6414 //
6415 // Where :
6416 //
6417 //  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6418 //  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6419 //
6420 // This function computes 'wsrc' and 'mask' as:
6421 //
6422 //  wsrc(x, y) =
6423 //    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6424 //      Mh(x) * Cv(y) * Pabove(x,y) +
6425 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6426 //
6427 //  mask(x, y) = Mh(x) * Mv(y)
6428 //
6429 // These can then be used to efficiently approximate the error for any
6430 // predictor P in the context of the provided neighbouring predictors by
6431 // computing:
6432 //
6433 //  error(x, y) =
6434 //    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6435 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6436 static AOM_INLINE void calc_target_weighted_pred(
6437     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6438     const uint8_t *above, int above_stride, const uint8_t *left,
6439     int left_stride) {
6440   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6441   const int bw = xd->width << MI_SIZE_LOG2;
6442   const int bh = xd->height << MI_SIZE_LOG2;
6443   const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6444   int32_t *mask_buf = obmc_buffer->mask;
6445   int32_t *wsrc_buf = obmc_buffer->wsrc;
6446 
6447   const int is_hbd = is_cur_buf_hbd(xd);
6448   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6449 
6450   // plane 0 should not be sub-sampled
6451   assert(xd->plane[0].subsampling_x == 0);
6452   assert(xd->plane[0].subsampling_y == 0);
6453 
6454   av1_zero_array(wsrc_buf, bw * bh);
6455   for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6456 
6457   // handle above row
6458   if (xd->up_available) {
6459     const int overlap =
6460         AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6461     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6462                                                    above_stride, overlap };
6463     foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6464                                   max_neighbor_obmc[mi_size_wide_log2[bsize]],
6465                                   calc_target_weighted_pred_above, &ctxt);
6466   }
6467 
6468   for (int i = 0; i < bw * bh; ++i) {
6469     wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6470     mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6471   }
6472 
6473   // handle left column
6474   if (xd->left_available) {
6475     const int overlap =
6476         AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6477     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6478                                                    left_stride, overlap };
6479     foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6480                                  max_neighbor_obmc[mi_size_high_log2[bsize]],
6481                                  calc_target_weighted_pred_left, &ctxt);
6482   }
6483 
6484   if (!is_hbd) {
6485     const uint8_t *src = x->plane[0].src.buf;
6486 
6487     for (int row = 0; row < bh; ++row) {
6488       for (int col = 0; col < bw; ++col) {
6489         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6490       }
6491       wsrc_buf += bw;
6492       src += x->plane[0].src.stride;
6493     }
6494   } else {
6495     const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6496 
6497     for (int row = 0; row < bh; ++row) {
6498       for (int col = 0; col < bw; ++col) {
6499         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6500       }
6501       wsrc_buf += bw;
6502       src += x->plane[0].src.stride;
6503     }
6504   }
6505 }
6506