• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19 
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44 
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72 #include "av1/encoder/var_based_part.h"
73 
74 #define LAST_NEW_MV_INDEX 6
75 
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81   10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82   9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
83   9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
84   9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
85   9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
86   8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
87   8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
88   8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
89   8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
90   7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
91   7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
92   7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
93   7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
94   6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
95   6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
96   6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
97   5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
98   5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
99   5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
100   5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
101   4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
102   4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
103   4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
104   4144,  4120,  4096
105 };
106 
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108   THR_NEARESTMV,
109   THR_NEARESTL2,
110   THR_NEARESTL3,
111   THR_NEARESTB,
112   THR_NEARESTA2,
113   THR_NEARESTA,
114   THR_NEARESTG,
115 
116   THR_NEWMV,
117   THR_NEWL2,
118   THR_NEWL3,
119   THR_NEWB,
120   THR_NEWA2,
121   THR_NEWA,
122   THR_NEWG,
123 
124   THR_NEARMV,
125   THR_NEARL2,
126   THR_NEARL3,
127   THR_NEARB,
128   THR_NEARA2,
129   THR_NEARA,
130   THR_NEARG,
131 
132   THR_GLOBALMV,
133   THR_GLOBALL2,
134   THR_GLOBALL3,
135   THR_GLOBALB,
136   THR_GLOBALA2,
137   THR_GLOBALA,
138   THR_GLOBALG,
139 
140   THR_COMP_NEAREST_NEARESTLA,
141   THR_COMP_NEAREST_NEARESTL2A,
142   THR_COMP_NEAREST_NEARESTL3A,
143   THR_COMP_NEAREST_NEARESTGA,
144   THR_COMP_NEAREST_NEARESTLB,
145   THR_COMP_NEAREST_NEARESTL2B,
146   THR_COMP_NEAREST_NEARESTL3B,
147   THR_COMP_NEAREST_NEARESTGB,
148   THR_COMP_NEAREST_NEARESTLA2,
149   THR_COMP_NEAREST_NEARESTL2A2,
150   THR_COMP_NEAREST_NEARESTL3A2,
151   THR_COMP_NEAREST_NEARESTGA2,
152   THR_COMP_NEAREST_NEARESTLL2,
153   THR_COMP_NEAREST_NEARESTLL3,
154   THR_COMP_NEAREST_NEARESTLG,
155   THR_COMP_NEAREST_NEARESTBA,
156 
157   THR_COMP_NEAR_NEARLB,
158   THR_COMP_NEW_NEWLB,
159   THR_COMP_NEW_NEARESTLB,
160   THR_COMP_NEAREST_NEWLB,
161   THR_COMP_NEW_NEARLB,
162   THR_COMP_NEAR_NEWLB,
163   THR_COMP_GLOBAL_GLOBALLB,
164 
165   THR_COMP_NEAR_NEARLA,
166   THR_COMP_NEW_NEWLA,
167   THR_COMP_NEW_NEARESTLA,
168   THR_COMP_NEAREST_NEWLA,
169   THR_COMP_NEW_NEARLA,
170   THR_COMP_NEAR_NEWLA,
171   THR_COMP_GLOBAL_GLOBALLA,
172 
173   THR_COMP_NEAR_NEARL2A,
174   THR_COMP_NEW_NEWL2A,
175   THR_COMP_NEW_NEARESTL2A,
176   THR_COMP_NEAREST_NEWL2A,
177   THR_COMP_NEW_NEARL2A,
178   THR_COMP_NEAR_NEWL2A,
179   THR_COMP_GLOBAL_GLOBALL2A,
180 
181   THR_COMP_NEAR_NEARL3A,
182   THR_COMP_NEW_NEWL3A,
183   THR_COMP_NEW_NEARESTL3A,
184   THR_COMP_NEAREST_NEWL3A,
185   THR_COMP_NEW_NEARL3A,
186   THR_COMP_NEAR_NEWL3A,
187   THR_COMP_GLOBAL_GLOBALL3A,
188 
189   THR_COMP_NEAR_NEARGA,
190   THR_COMP_NEW_NEWGA,
191   THR_COMP_NEW_NEARESTGA,
192   THR_COMP_NEAREST_NEWGA,
193   THR_COMP_NEW_NEARGA,
194   THR_COMP_NEAR_NEWGA,
195   THR_COMP_GLOBAL_GLOBALGA,
196 
197   THR_COMP_NEAR_NEARL2B,
198   THR_COMP_NEW_NEWL2B,
199   THR_COMP_NEW_NEARESTL2B,
200   THR_COMP_NEAREST_NEWL2B,
201   THR_COMP_NEW_NEARL2B,
202   THR_COMP_NEAR_NEWL2B,
203   THR_COMP_GLOBAL_GLOBALL2B,
204 
205   THR_COMP_NEAR_NEARL3B,
206   THR_COMP_NEW_NEWL3B,
207   THR_COMP_NEW_NEARESTL3B,
208   THR_COMP_NEAREST_NEWL3B,
209   THR_COMP_NEW_NEARL3B,
210   THR_COMP_NEAR_NEWL3B,
211   THR_COMP_GLOBAL_GLOBALL3B,
212 
213   THR_COMP_NEAR_NEARGB,
214   THR_COMP_NEW_NEWGB,
215   THR_COMP_NEW_NEARESTGB,
216   THR_COMP_NEAREST_NEWGB,
217   THR_COMP_NEW_NEARGB,
218   THR_COMP_NEAR_NEWGB,
219   THR_COMP_GLOBAL_GLOBALGB,
220 
221   THR_COMP_NEAR_NEARLA2,
222   THR_COMP_NEW_NEWLA2,
223   THR_COMP_NEW_NEARESTLA2,
224   THR_COMP_NEAREST_NEWLA2,
225   THR_COMP_NEW_NEARLA2,
226   THR_COMP_NEAR_NEWLA2,
227   THR_COMP_GLOBAL_GLOBALLA2,
228 
229   THR_COMP_NEAR_NEARL2A2,
230   THR_COMP_NEW_NEWL2A2,
231   THR_COMP_NEW_NEARESTL2A2,
232   THR_COMP_NEAREST_NEWL2A2,
233   THR_COMP_NEW_NEARL2A2,
234   THR_COMP_NEAR_NEWL2A2,
235   THR_COMP_GLOBAL_GLOBALL2A2,
236 
237   THR_COMP_NEAR_NEARL3A2,
238   THR_COMP_NEW_NEWL3A2,
239   THR_COMP_NEW_NEARESTL3A2,
240   THR_COMP_NEAREST_NEWL3A2,
241   THR_COMP_NEW_NEARL3A2,
242   THR_COMP_NEAR_NEWL3A2,
243   THR_COMP_GLOBAL_GLOBALL3A2,
244 
245   THR_COMP_NEAR_NEARGA2,
246   THR_COMP_NEW_NEWGA2,
247   THR_COMP_NEW_NEARESTGA2,
248   THR_COMP_NEAREST_NEWGA2,
249   THR_COMP_NEW_NEARGA2,
250   THR_COMP_NEAR_NEWGA2,
251   THR_COMP_GLOBAL_GLOBALGA2,
252 
253   THR_COMP_NEAR_NEARLL2,
254   THR_COMP_NEW_NEWLL2,
255   THR_COMP_NEW_NEARESTLL2,
256   THR_COMP_NEAREST_NEWLL2,
257   THR_COMP_NEW_NEARLL2,
258   THR_COMP_NEAR_NEWLL2,
259   THR_COMP_GLOBAL_GLOBALLL2,
260 
261   THR_COMP_NEAR_NEARLL3,
262   THR_COMP_NEW_NEWLL3,
263   THR_COMP_NEW_NEARESTLL3,
264   THR_COMP_NEAREST_NEWLL3,
265   THR_COMP_NEW_NEARLL3,
266   THR_COMP_NEAR_NEWLL3,
267   THR_COMP_GLOBAL_GLOBALLL3,
268 
269   THR_COMP_NEAR_NEARLG,
270   THR_COMP_NEW_NEWLG,
271   THR_COMP_NEW_NEARESTLG,
272   THR_COMP_NEAREST_NEWLG,
273   THR_COMP_NEW_NEARLG,
274   THR_COMP_NEAR_NEWLG,
275   THR_COMP_GLOBAL_GLOBALLG,
276 
277   THR_COMP_NEAR_NEARBA,
278   THR_COMP_NEW_NEWBA,
279   THR_COMP_NEW_NEARESTBA,
280   THR_COMP_NEAREST_NEWBA,
281   THR_COMP_NEW_NEARBA,
282   THR_COMP_NEAR_NEWBA,
283   THR_COMP_GLOBAL_GLOBALBA,
284 
285   THR_DC,
286   THR_PAETH,
287   THR_SMOOTH,
288   THR_SMOOTH_V,
289   THR_SMOOTH_H,
290   THR_H_PRED,
291   THR_V_PRED,
292   THR_D135_PRED,
293   THR_D203_PRED,
294   THR_D157_PRED,
295   THR_D67_PRED,
296   THR_D113_PRED,
297   THR_D45_PRED,
298 };
299 
300 /*!\cond */
301 typedef struct SingleInterModeState {
302   int64_t rd;
303   MV_REFERENCE_FRAME ref_frame;
304   int valid;
305 } SingleInterModeState;
306 
307 typedef struct InterModeSearchState {
308   int64_t best_rd;
309   int64_t best_skip_rd[2];
310   MB_MODE_INFO best_mbmode;
311   int best_rate_y;
312   int best_rate_uv;
313   int best_mode_skippable;
314   int best_skip2;
315   THR_MODES best_mode_index;
316   int num_available_refs;
317   int64_t dist_refs[REF_FRAMES];
318   int dist_order_refs[REF_FRAMES];
319   int64_t mode_threshold[MAX_MODES];
320   int64_t best_intra_rd;
321   unsigned int best_pred_sse;
322 
323   /*!
324    * \brief Keep track of best intra rd for use in compound mode.
325    */
326   int64_t best_pred_rd[REFERENCE_MODES];
327   // Save a set of single_newmv for each checked ref_mv.
328   int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329   int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330   int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331   int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332   // The rd of simple translation in single inter modes
333   int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334   int64_t best_single_rd[REF_FRAMES];
335   PREDICTION_MODE best_single_mode[REF_FRAMES];
336 
337   // Single search results by [directions][modes][reference frames]
338   SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339   int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340   SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341                                             [FWD_REFS];
342   int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343   MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344   IntraModeSearchState intra_search_state;
345   RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348 
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350   for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351     InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352     md->ready = 0;
353     md->num = 0;
354     md->dist_sum = 0;
355     md->ld_sum = 0;
356     md->sse_sum = 0;
357     md->sse_sse_sum = 0;
358     md->sse_ld_sum = 0;
359   }
360 }
361 
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363                              int64_t sse, int *est_residue_cost,
364                              int64_t *est_dist) {
365   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366   if (md->ready) {
367     if (sse < md->dist_mean) {
368       *est_residue_cost = 0;
369       *est_dist = sse;
370     } else {
371       *est_dist = (int64_t)round(md->dist_mean);
372       const double est_ld = md->a * sse + md->b;
373       // Clamp estimated rate cost by INT_MAX / 2.
374       // TODO(angiebird@google.com): find better solution than clamping.
375       if (fabs(est_ld) < 1e-2) {
376         *est_residue_cost = INT_MAX / 2;
377       } else {
378         double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379         if (est_residue_cost_dbl < 0) {
380           *est_residue_cost = 0;
381         } else {
382           *est_residue_cost =
383               (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384         }
385       }
386       if (*est_residue_cost <= 0) {
387         *est_residue_cost = 0;
388         *est_dist = sse;
389       }
390     }
391     return 1;
392   }
393   return 0;
394 }
395 
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397   for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398     const int block_idx = inter_mode_data_block_idx(bsize);
399     InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400     if (block_idx == -1) continue;
401     if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402       continue;
403     } else {
404       if (md->ready == 0) {
405         md->dist_mean = md->dist_sum / md->num;
406         md->ld_mean = md->ld_sum / md->num;
407         md->sse_mean = md->sse_sum / md->num;
408         md->sse_sse_mean = md->sse_sse_sum / md->num;
409         md->sse_ld_mean = md->sse_ld_sum / md->num;
410       } else {
411         const double factor = 3;
412         md->dist_mean =
413             (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414         md->ld_mean =
415             (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416         md->sse_mean =
417             (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418         md->sse_sse_mean =
419             (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420             (factor + 1);
421         md->sse_ld_mean =
422             (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423             (factor + 1);
424       }
425 
426       const double my = md->ld_mean;
427       const double mx = md->sse_mean;
428       const double dx = sqrt(md->sse_sse_mean);
429       const double dxy = md->sse_ld_mean;
430 
431       md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432       md->b = my - md->a * mx;
433       md->ready = 1;
434 
435       md->num = 0;
436       md->dist_sum = 0;
437       md->ld_sum = 0;
438       md->sse_sum = 0;
439       md->sse_sse_sum = 0;
440       md->sse_ld_sum = 0;
441     }
442     (void)rdmult;
443   }
444 }
445 
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static inline void inter_mode_data_push(TileDataEnc *tile_data,
447                                         BLOCK_SIZE bsize, int64_t sse,
448                                         int64_t dist, int residue_cost) {
449   if (residue_cost == 0 || sse == dist) return;
450   const int block_idx = inter_mode_data_block_idx(bsize);
451   if (block_idx == -1) return;
452   InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453   if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454     const double ld = (sse - dist) * 1. / residue_cost;
455     ++rd_model->num;
456     rd_model->dist_sum += dist;
457     rd_model->ld_sum += ld;
458     rd_model->sse_sum += sse;
459     rd_model->sse_sse_sum += (double)sse * (double)sse;
460     rd_model->sse_ld_sum += sse * ld;
461   }
462 }
463 
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static inline void inter_modes_info_push(InterModesInfo *inter_modes_info,
465                                          int mode_rate, int64_t sse, int64_t rd,
466                                          RD_STATS *rd_cost, RD_STATS *rd_cost_y,
467                                          RD_STATS *rd_cost_uv,
468                                          const MB_MODE_INFO *mbmi) {
469   const int num = inter_modes_info->num;
470   assert(num < MAX_INTER_MODES);
471   inter_modes_info->mbmi_arr[num] = *mbmi;
472   inter_modes_info->mode_rate_arr[num] = mode_rate;
473   inter_modes_info->sse_arr[num] = sse;
474   inter_modes_info->est_rd_arr[num] = rd;
475   inter_modes_info->rd_cost_arr[num] = *rd_cost;
476   inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
477   inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
478   ++inter_modes_info->num;
479 }
480 
compare_rd_idx_pair(const void * a,const void * b)481 static int compare_rd_idx_pair(const void *a, const void *b) {
482   if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
483     // To avoid inconsistency in qsort() ordering when two elements are equal,
484     // using idx as tie breaker. Refer aomedia:2928
485     if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
486       return 0;
487     else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
488       return 1;
489     else
490       return -1;
491   } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
492     return 1;
493   } else {
494     return -1;
495   }
496 }
497 
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)498 static inline void inter_modes_info_sort(const InterModesInfo *inter_modes_info,
499                                          RdIdxPair *rd_idx_pair_arr) {
500   if (inter_modes_info->num == 0) {
501     return;
502   }
503   for (int i = 0; i < inter_modes_info->num; ++i) {
504     rd_idx_pair_arr[i].idx = i;
505     rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
506   }
507   qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
508         compare_rd_idx_pair);
509 }
510 
511 // Similar to get_horver_correlation, but also takes into account first
512 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)513 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
514                                        int width, int height, float *hcorr,
515                                        float *vcorr) {
516   // The following notation is used:
517   // x - current pixel
518   // y - left neighbor pixel
519   // z - top neighbor pixel
520   int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
521   int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
522   int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
523 
524   // First, process horizontal correlation on just the first row
525   x_sum += diff[0];
526   x2_sum += diff[0] * diff[0];
527   x_firstrow += diff[0];
528   x2_firstrow += diff[0] * diff[0];
529   for (int j = 1; j < width; ++j) {
530     const int16_t x = diff[j];
531     const int16_t y = diff[j - 1];
532     x_sum += x;
533     x_firstrow += x;
534     x2_sum += x * x;
535     x2_firstrow += x * x;
536     xy_sum += x * y;
537   }
538 
539   // Process vertical correlation in the first column
540   x_firstcol += diff[0];
541   x2_firstcol += diff[0] * diff[0];
542   for (int i = 1; i < height; ++i) {
543     const int16_t x = diff[i * stride];
544     const int16_t z = diff[(i - 1) * stride];
545     x_sum += x;
546     x_firstcol += x;
547     x2_sum += x * x;
548     x2_firstcol += x * x;
549     xz_sum += x * z;
550   }
551 
552   // Now process horiz and vert correlation through the rest unit
553   for (int i = 1; i < height; ++i) {
554     for (int j = 1; j < width; ++j) {
555       const int16_t x = diff[i * stride + j];
556       const int16_t y = diff[i * stride + j - 1];
557       const int16_t z = diff[(i - 1) * stride + j];
558       x_sum += x;
559       x2_sum += x * x;
560       xy_sum += x * y;
561       xz_sum += x * z;
562     }
563   }
564 
565   for (int j = 0; j < width; ++j) {
566     x_finalrow += diff[(height - 1) * stride + j];
567     x2_finalrow +=
568         diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
569   }
570   for (int i = 0; i < height; ++i) {
571     x_finalcol += diff[i * stride + width - 1];
572     x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
573   }
574 
575   int64_t xhor_sum = x_sum - x_finalcol;
576   int64_t xver_sum = x_sum - x_finalrow;
577   int64_t y_sum = x_sum - x_firstcol;
578   int64_t z_sum = x_sum - x_firstrow;
579   int64_t x2hor_sum = x2_sum - x2_finalcol;
580   int64_t x2ver_sum = x2_sum - x2_finalrow;
581   int64_t y2_sum = x2_sum - x2_firstcol;
582   int64_t z2_sum = x2_sum - x2_firstrow;
583 
584   const float num_hor = (float)(height * (width - 1));
585   const float num_ver = (float)((height - 1) * width);
586 
587   const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
588   const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
589 
590   const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
591   const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
592 
593   const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
594   const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
595 
596   if (xhor_var_n > 0 && y_var_n > 0) {
597     *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
598     *hcorr = *hcorr < 0 ? 0 : *hcorr;
599   } else {
600     *hcorr = 1.0;
601   }
602   if (xver_var_n > 0 && z_var_n > 0) {
603     *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
604     *vcorr = *vcorr < 0 ? 0 : *vcorr;
605   } else {
606     *vcorr = 1.0;
607   }
608 }
609 
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)610 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
611                        int64_t *sse_y) {
612   const AV1_COMMON *cm = &cpi->common;
613   const int num_planes = av1_num_planes(cm);
614   const MACROBLOCKD *xd = &x->e_mbd;
615   const MB_MODE_INFO *mbmi = xd->mi[0];
616   int64_t total_sse = 0;
617   for (int plane = 0; plane < num_planes; ++plane) {
618     if (plane && !xd->is_chroma_ref) break;
619     const struct macroblock_plane *const p = &x->plane[plane];
620     const struct macroblockd_plane *const pd = &xd->plane[plane];
621     const BLOCK_SIZE bs =
622         get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
623     unsigned int sse;
624 
625     cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
626                             pd->dst.stride, &sse);
627     total_sse += sse;
628     if (!plane && sse_y) *sse_y = sse;
629   }
630   total_sse <<= 4;
631   return total_sse;
632 }
633 
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)634 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
635                           intptr_t block_size, int64_t *ssz) {
636   int i;
637   int64_t error = 0, sqcoeff = 0;
638 
639   for (i = 0; i < block_size; i++) {
640     const int diff = coeff[i] - dqcoeff[i];
641     error += diff * diff;
642     sqcoeff += coeff[i] * coeff[i];
643   }
644 
645   *ssz = sqcoeff;
646   return error;
647 }
648 
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)649 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
650                              intptr_t block_size) {
651   int64_t error = 0;
652 
653   for (int i = 0; i < block_size; i++) {
654     const int diff = coeff[i] - dqcoeff[i];
655     error += diff * diff;
656   }
657 
658   return error;
659 }
660 
661 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)662 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
663                                  const tran_low_t *dqcoeff, intptr_t block_size,
664                                  int64_t *ssz, int bd) {
665   int i;
666   int64_t error = 0, sqcoeff = 0;
667   int shift = 2 * (bd - 8);
668   int rounding = (1 << shift) >> 1;
669 
670   for (i = 0; i < block_size; i++) {
671     const int64_t diff = coeff[i] - dqcoeff[i];
672     error += diff * diff;
673     sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
674   }
675   error = (error + rounding) >> shift;
676   sqcoeff = (sqcoeff + rounding) >> shift;
677 
678   *ssz = sqcoeff;
679   return error;
680 }
681 #endif
682 
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)683 static int conditional_skipintra(PREDICTION_MODE mode,
684                                  PREDICTION_MODE best_intra_mode) {
685   if (mode == D113_PRED && best_intra_mode != V_PRED &&
686       best_intra_mode != D135_PRED)
687     return 1;
688   if (mode == D67_PRED && best_intra_mode != V_PRED &&
689       best_intra_mode != D45_PRED)
690     return 1;
691   if (mode == D203_PRED && best_intra_mode != H_PRED &&
692       best_intra_mode != D45_PRED)
693     return 1;
694   if (mode == D157_PRED && best_intra_mode != H_PRED &&
695       best_intra_mode != D135_PRED)
696     return 1;
697   return 0;
698 }
699 
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)700 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
701                        int16_t mode_context) {
702   if (is_inter_compound_mode(mode)) {
703     return mode_costs
704         ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
705   }
706 
707   int mode_cost = 0;
708   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
709 
710   assert(is_inter_mode(mode));
711 
712   if (mode == NEWMV) {
713     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
714     return mode_cost;
715   } else {
716     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
717     mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
718 
719     if (mode == GLOBALMV) {
720       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
721       return mode_cost;
722     } else {
723       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
724       mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
725       mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
726       return mode_cost;
727     }
728   }
729 }
730 
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)731 static inline PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
732                                               int ref_idx) {
733   return ref_idx ? compound_ref1_mode(this_mode)
734                  : compound_ref0_mode(this_mode);
735 }
736 
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])737 static inline void estimate_ref_frame_costs(
738     const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
739     int segment_id, unsigned int *ref_costs_single,
740     unsigned int (*ref_costs_comp)[REF_FRAMES]) {
741   int seg_ref_active =
742       segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
743   if (seg_ref_active) {
744     memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
745     int ref_frame;
746     for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
747       memset(ref_costs_comp[ref_frame], 0,
748              REF_FRAMES * sizeof((*ref_costs_comp)[0]));
749   } else {
750     int intra_inter_ctx = av1_get_intra_inter_context(xd);
751     ref_costs_single[INTRA_FRAME] =
752         mode_costs->intra_inter_cost[intra_inter_ctx][0];
753     unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
754 
755     for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
756       ref_costs_single[i] = base_cost;
757 
758     const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
759     const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
760     const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
761     const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
762     const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
763     const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
764 
765     // Determine cost of a single ref frame, where frame types are represented
766     // by a tree:
767     // Level 0: add cost whether this ref is a forward or backward ref
768     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
769     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
770     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
773     ref_costs_single[ALTREF2_FRAME] +=
774         mode_costs->single_ref_cost[ctx_p1][0][1];
775     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
776 
777     // Level 1: if this ref is forward ref,
778     // add cost whether it is last/last2 or last3/golden
779     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
780     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
781     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
782     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
783 
784     // Level 1: if this ref is backward ref
785     // then add cost whether this ref is altref or backward ref
786     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
787     ref_costs_single[ALTREF2_FRAME] +=
788         mode_costs->single_ref_cost[ctx_p2][1][0];
789     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
790 
791     // Level 2: further add cost whether this ref is last or last2
792     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
793     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
794 
795     // Level 2: last3 or golden
796     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
797     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
798 
799     // Level 2: bwdref or altref2
800     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
801     ref_costs_single[ALTREF2_FRAME] +=
802         mode_costs->single_ref_cost[ctx_p6][5][1];
803 
804     if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
805       // Similar to single ref, determine cost of compound ref frames.
806       // cost_compound_refs = cost_first_ref + cost_second_ref
807       const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
808       const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
809       const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
810       const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
811       const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
812 
813       const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
814       unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
815 
816       ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
817           ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
818               base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
819       ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
820       ref_bicomp_costs[ALTREF_FRAME] = 0;
821 
822       // cost of first ref frame
823       ref_bicomp_costs[LAST_FRAME] +=
824           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
825       ref_bicomp_costs[LAST2_FRAME] +=
826           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827       ref_bicomp_costs[LAST3_FRAME] +=
828           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
829       ref_bicomp_costs[GOLDEN_FRAME] +=
830           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831 
832       ref_bicomp_costs[LAST_FRAME] +=
833           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
834       ref_bicomp_costs[LAST2_FRAME] +=
835           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
836 
837       ref_bicomp_costs[LAST3_FRAME] +=
838           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
839       ref_bicomp_costs[GOLDEN_FRAME] +=
840           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
841 
842       // cost of second ref frame
843       ref_bicomp_costs[BWDREF_FRAME] +=
844           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
845       ref_bicomp_costs[ALTREF2_FRAME] +=
846           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847       ref_bicomp_costs[ALTREF_FRAME] +=
848           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
849 
850       ref_bicomp_costs[BWDREF_FRAME] +=
851           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
852       ref_bicomp_costs[ALTREF2_FRAME] +=
853           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
854 
855       // cost: if one ref frame is forward ref, the other ref is backward ref
856       int ref0, ref1;
857       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
858         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
859           ref_costs_comp[ref0][ref1] =
860               ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
861         }
862       }
863 
864       // cost: if both ref frames are the same side.
865       const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
866       const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
867       const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
868       ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
869           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
870           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
871           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
872       ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
873           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
874           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
875           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
876           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
877       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
878           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
879           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
880           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
881           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
882       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
883           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
884           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
885     } else {
886       int ref0, ref1;
887       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
888         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
889           ref_costs_comp[ref0][ref1] = 512;
890       }
891       ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
892       ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
893       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
894       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
895     }
896   }
897 }
898 
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int skippable)899 static inline void store_coding_context(
900 #if CONFIG_INTERNAL_STATS
901     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
902 #else
903     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
904 #endif  // CONFIG_INTERNAL_STATS
905     int skippable) {
906   MACROBLOCKD *const xd = &x->e_mbd;
907 
908   // Take a snapshot of the coding context so it can be
909   // restored if we decide to encode this way
910   ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
911   ctx->skippable = skippable;
912 #if CONFIG_INTERNAL_STATS
913   ctx->best_mode_index = mode_index;
914 #endif  // CONFIG_INTERNAL_STATS
915   ctx->mic = *xd->mi[0];
916   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
917                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
918 }
919 
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])920 static inline void setup_buffer_ref_mvs_inter(
921     const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
922     BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
923   const AV1_COMMON *cm = &cpi->common;
924   const int num_planes = av1_num_planes(cm);
925   const YV12_BUFFER_CONFIG *scaled_ref_frame =
926       av1_get_scaled_ref_frame(cpi, ref_frame);
927   MACROBLOCKD *const xd = &x->e_mbd;
928   MB_MODE_INFO *const mbmi = xd->mi[0];
929   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
930   const struct scale_factors *const sf =
931       get_ref_scale_factors_const(cm, ref_frame);
932   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
933   assert(yv12 != NULL);
934 
935   if (scaled_ref_frame) {
936     // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
937     // support scaling.
938     av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
939                          num_planes);
940   } else {
941     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
942   }
943 
944   // Gets an initial list of candidate vectors from neighbours and orders them
945   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
946                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
947                    mbmi_ext->mode_context);
948   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
949   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
950   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
951   // Further refinement that is encode side only to test the top few candidates
952   // in full and choose the best as the center point for subsequent searches.
953   // The current implementation doesn't support scaling.
954   av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
955               ref_frame, block_size);
956 
957   // Go back to unscaled reference.
958   if (scaled_ref_frame) {
959     // We had temporarily setup pred block based on scaled reference above. Go
960     // back to unscaled reference now, for subsequent use.
961     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
962   }
963 }
964 
965 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
966 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
967 
968 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)969 static inline void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
970   const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
971                                      xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
972                                      xd->mb_to_top_edge - LEFT_TOP_MARGIN,
973                                      xd->mb_to_bottom_edge +
974                                          RIGHT_BOTTOM_MARGIN };
975   clamp_mv(mv, &mv_limits);
976 }
977 
978 /* If the current mode shares the same mv with other modes with higher cost,
979  * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)980 static int skip_repeated_mv(const AV1_COMMON *const cm,
981                             const MACROBLOCK *const x,
982                             PREDICTION_MODE this_mode,
983                             const MV_REFERENCE_FRAME ref_frames[2],
984                             InterModeSearchState *search_state) {
985   const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
986   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
987   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
988   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
989   PREDICTION_MODE compare_mode = MB_MODE_COUNT;
990   if (!is_comp_pred) {
991     if (this_mode == NEARMV) {
992       if (ref_mv_count == 0) {
993         // NEARMV has the same motion vector as NEARESTMV
994         compare_mode = NEARESTMV;
995       }
996       if (ref_mv_count == 1 &&
997           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
998         // NEARMV has the same motion vector as GLOBALMV
999         compare_mode = GLOBALMV;
1000       }
1001     }
1002     if (this_mode == GLOBALMV) {
1003       if (ref_mv_count == 0 &&
1004           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1005         // GLOBALMV has the same motion vector as NEARESTMV
1006         compare_mode = NEARESTMV;
1007       }
1008       if (ref_mv_count == 1) {
1009         // GLOBALMV has the same motion vector as NEARMV
1010         compare_mode = NEARMV;
1011       }
1012     }
1013 
1014     if (compare_mode != MB_MODE_COUNT) {
1015       // Use modelled_rd to check whether compare mode was searched
1016       if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1017           INT64_MAX) {
1018         const int16_t mode_ctx =
1019             av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1020         const int compare_cost =
1021             cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1022         const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1023 
1024         // Only skip if the mode cost is larger than compare mode cost
1025         if (this_cost > compare_cost) {
1026           search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1027               search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1028           return 1;
1029         }
1030       }
1031     }
1032   }
1033   return 0;
1034 }
1035 
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1036 static inline int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1037                                      const AV1_COMMON *cm,
1038                                      const MACROBLOCK *x) {
1039   const MACROBLOCKD *const xd = &x->e_mbd;
1040   *out_mv = in_mv;
1041   lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1042                      cm->features.cur_frame_force_integer_mv);
1043   clamp_mv2(&out_mv->as_mv, xd);
1044   return av1_is_fullmv_in_range(&x->mv_limits,
1045                                 get_fullmv_from_mv(&out_mv->as_mv));
1046 }
1047 
1048 // To use single newmv directly for compound modes, need to clamp the mv to the
1049 // valid mv range. Without this, encoder would generate out of range mv, and
1050 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1051 static inline void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1052                                      int ref_idx) {
1053   const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1054   SubpelMvLimits mv_limits;
1055 
1056   av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1057   clamp_mv(&mv->as_mv, &mv_limits);
1058 }
1059 
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1060 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1061                             const BLOCK_SIZE bsize, int_mv *cur_mv,
1062                             int *const rate_mv, HandleInterModeArgs *const args,
1063                             inter_mode_info *mode_info) {
1064   MACROBLOCKD *const xd = &x->e_mbd;
1065   MB_MODE_INFO *const mbmi = xd->mi[0];
1066   const int is_comp_pred = has_second_ref(mbmi);
1067   const PREDICTION_MODE this_mode = mbmi->mode;
1068   const int refs[2] = { mbmi->ref_frame[0],
1069                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1070   const int ref_mv_idx = mbmi->ref_mv_idx;
1071 
1072   if (is_comp_pred) {
1073     const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1074     const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1075     if (this_mode == NEW_NEWMV) {
1076       if (valid_mv0) {
1077         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1078         clamp_mv_in_range(x, &cur_mv[0], 0);
1079       }
1080       if (valid_mv1) {
1081         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1082         clamp_mv_in_range(x, &cur_mv[1], 1);
1083       }
1084       *rate_mv = 0;
1085       for (int i = 0; i < 2; ++i) {
1086         const int_mv ref_mv = av1_get_ref_mv(x, i);
1087         *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1088                                     x->mv_costs->nmv_joint_cost,
1089                                     x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1090       }
1091     } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1092       if (valid_mv1) {
1093         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1094         clamp_mv_in_range(x, &cur_mv[1], 1);
1095       }
1096       const int_mv ref_mv = av1_get_ref_mv(x, 1);
1097       *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1098                                  x->mv_costs->nmv_joint_cost,
1099                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1100     } else {
1101       assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1102       if (valid_mv0) {
1103         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1104         clamp_mv_in_range(x, &cur_mv[0], 0);
1105       }
1106       const int_mv ref_mv = av1_get_ref_mv(x, 0);
1107       *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1108                                  x->mv_costs->nmv_joint_cost,
1109                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1110     }
1111   } else {
1112     // Single ref case.
1113     const int ref_idx = 0;
1114     int search_range = INT_MAX;
1115 
1116     if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1117       const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1118       int min_mv_diff = INT_MAX;
1119       int best_match = -1;
1120       MV prev_ref_mv[2] = { { 0 } };
1121       for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1122         prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1123                                                      idx, &x->mbmi_ext)
1124                                .as_mv;
1125         const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1126                                        abs(ref_mv.col - prev_ref_mv[idx].col));
1127 
1128         if (min_mv_diff > ref_mv_diff) {
1129           min_mv_diff = ref_mv_diff;
1130           best_match = idx;
1131         }
1132       }
1133 
1134       if (min_mv_diff < (16 << 3)) {
1135         if (args->single_newmv_valid[best_match][refs[0]]) {
1136           search_range = min_mv_diff;
1137           search_range +=
1138               AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1139                          prev_ref_mv[best_match].row),
1140                      abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1141                          prev_ref_mv[best_match].col));
1142           // Get full pixel search range.
1143           search_range = (search_range + 4) >> 3;
1144         }
1145       }
1146     }
1147 
1148     int_mv best_mv;
1149     av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1150                              mode_info, &best_mv, args);
1151     if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1152 
1153     args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1154     args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1155     args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1156     cur_mv[0].as_int = best_mv.as_int;
1157 
1158     // Return after single_newmv is set.
1159     if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1160   }
1161 
1162   return 0;
1163 }
1164 
update_mode_start_end_index(const AV1_COMP * const cpi,const MB_MODE_INFO * const mbmi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1165 static inline void update_mode_start_end_index(
1166     const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1167     int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1168     int interintra_allowed, int eval_motion_mode) {
1169   *mode_index_start = (int)SIMPLE_TRANSLATION;
1170   *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1171   if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1172     if (!eval_motion_mode) {
1173       *mode_index_end = (int)SIMPLE_TRANSLATION;
1174     } else {
1175       // Set the start index appropriately to process motion modes other than
1176       // simple translation
1177       *mode_index_start = 1;
1178     }
1179   }
1180   if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1181     *mode_index_end = SIMPLE_TRANSLATION;
1182 }
1183 
1184 /*!\brief AV1 motion mode search
1185  *
1186  * \ingroup inter_mode_search
1187  * Function to search over and determine the motion mode. It will update
1188  * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1189  * WARPED_CAUSAL and determine any necessary side information for the selected
1190  * motion mode. It will also perform the full transform search, unless the
1191  * input parameter do_tx_search indicates to do an estimation of the RD rather
1192  * than an RD corresponding to a full transform search. It will return the
1193  * RD for the final motion_mode.
1194  * Do the RD search for a given inter mode and compute all information relevant
1195  * to the input mode. It will compute the best MV,
1196  * compound parameters (if the mode is a compound mode) and interpolation filter
1197  * parameters.
1198  *
1199  * \param[in]     cpi               Top-level encoder structure.
1200  * \param[in]     tile_data         Pointer to struct holding adaptive
1201  *                                  data/contexts/models for the tile during
1202  *                                  encoding.
1203  * \param[in]     x                 Pointer to struct holding all the data for
1204  *                                  the current macroblock.
1205  * \param[in]     bsize             Current block size.
1206  * \param[in,out] rd_stats          Struct to keep track of the overall RD
1207  *                                  information.
1208  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1209  *                                  for only the Y plane.
1210  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1211  *                                  for only the UV planes.
1212  * \param[in]     args              HandleInterModeArgs struct holding
1213  *                                  miscellaneous arguments for inter mode
1214  *                                  search. See the documentation for this
1215  *                                  struct for a description of each member.
1216  * \param[in]     ref_best_rd       Best RD found so far for this block.
1217  *                                  It is used for early termination of this
1218  *                                  search if the RD exceeds this value.
1219  * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1220  *                                  best total RD for a skip mode so far, and
1221  *                                  skip_rd[1] is the best RD for a skip mode so
1222  *                                  far in luma. This is used as a speed feature
1223  *                                  to skip the transform search if the computed
1224  *                                  skip RD for the current mode is not better
1225  *                                  than the best skip_rd so far.
1226  * \param[in,out] rate_mv           The rate associated with the motion vectors.
1227  *                                  This will be modified if a motion search is
1228  *                                  done in the motion mode search.
1229  * \param[in,out] orig_dst          A prediction buffer to hold a computed
1230  *                                  prediction. This will eventually hold the
1231  *                                  final prediction, and the tmp_dst info will
1232  *                                  be copied here.
1233  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1234  *                                  do_tx_search (see below) is 0.
1235  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1236  *                                  a full transform search. This will compute
1237  *                                  an estimated RD for the modes without the
1238  *                                  transform search and later perform the full
1239  *                                  transform search on the best candidates.
1240  * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1241  *                                  information to perform a full transform
1242  *                                  search only on winning candidates searched
1243  *                                  with an estimate for transform coding RD.
1244  * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1245  *                                  motion modes other than SIMPLE_TRANSLATION.
1246  * \param[out]    yrd               Stores the rdcost corresponding to encoding
1247  *                                  the luma plane.
1248  * \return Returns INT64_MAX if the determined motion mode is invalid and the
1249  * current motion mode being tested should be skipped. It returns 0 if the
1250  * motion mode search is a success.
1251  */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1252 static int64_t motion_mode_rd(
1253     const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1254     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1255     RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1256     int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1257     int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1258     int eval_motion_mode, int64_t *yrd) {
1259   const AV1_COMMON *const cm = &cpi->common;
1260   const FeatureFlags *const features = &cm->features;
1261   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1262   const int num_planes = av1_num_planes(cm);
1263   MACROBLOCKD *xd = &x->e_mbd;
1264   MB_MODE_INFO *mbmi = xd->mi[0];
1265   const int is_comp_pred = has_second_ref(mbmi);
1266   const PREDICTION_MODE this_mode = mbmi->mode;
1267   const int rate2_nocoeff = rd_stats->rate;
1268   int best_xskip_txfm = 0;
1269   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1270   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1271   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1272   const int rate_mv0 = *rate_mv;
1273   const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1274                                  is_interintra_allowed(mbmi) &&
1275                                  mbmi->compound_idx;
1276   WARP_SAMPLE_INFO *const warp_sample_info =
1277       &x->warp_sample_info[mbmi->ref_frame[0]];
1278   int *pts0 = warp_sample_info->pts;
1279   int *pts_inref0 = warp_sample_info->pts_inref;
1280 
1281   assert(mbmi->ref_frame[1] != INTRA_FRAME);
1282   const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1283   av1_invalid_rd_stats(&best_rd_stats);
1284   mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1285   MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1286   *yrd = INT64_MAX;
1287   if (features->switchable_motion_mode) {
1288     // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1289     // is allowed.
1290     last_motion_mode_allowed = motion_mode_allowed(
1291         xd->global_motion, xd, mbmi, features->allow_warped_motion);
1292   }
1293 
1294   if (last_motion_mode_allowed == WARPED_CAUSAL) {
1295     // Collect projection samples used in least squares approximation of
1296     // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1297     if (warp_sample_info->num < 0) {
1298       warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1299     }
1300     mbmi->num_proj_ref = warp_sample_info->num;
1301   }
1302   const int total_samples = mbmi->num_proj_ref;
1303   if (total_samples == 0) {
1304     // Do not search WARPED_CAUSAL if there are no samples to use to determine
1305     // warped parameters.
1306     last_motion_mode_allowed = OBMC_CAUSAL;
1307   }
1308 
1309   const MB_MODE_INFO base_mbmi = *mbmi;
1310   MB_MODE_INFO best_mbmi;
1311   const int interp_filter = features->interp_filter;
1312   const int switchable_rate =
1313       av1_is_interp_needed(xd)
1314           ? av1_get_switchable_rate(x, xd, interp_filter,
1315                                     cm->seq_params->enable_dual_filter)
1316           : 0;
1317   int64_t best_rd = INT64_MAX;
1318   int best_rate_mv = rate_mv0;
1319   const int mi_row = xd->mi_row;
1320   const int mi_col = xd->mi_col;
1321   int mode_index_start, mode_index_end;
1322   const int txfm_rd_gate_level =
1323       get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1324                              cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1325                              TX_SEARCH_MOTION_MODE, eval_motion_mode);
1326 
1327   // Modify the start and end index according to speed features. For example,
1328   // if SIMPLE_TRANSLATION has already been searched according to
1329   // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1330   // to avoid searching it again.
1331   update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1332                               last_motion_mode_allowed, interintra_allowed,
1333                               eval_motion_mode);
1334   // Main function loop. This loops over all of the possible motion modes and
1335   // computes RD to determine the best one. This process includes computing
1336   // any necessary side information for the motion mode and performing the
1337   // transform search.
1338   for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1339        mode_index++) {
1340     if (args->skip_motion_mode && mode_index) continue;
1341     int tmp_rate2 = rate2_nocoeff;
1342     const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1343     int tmp_rate_mv = rate_mv0;
1344 
1345     *mbmi = base_mbmi;
1346     if (is_interintra_mode) {
1347       // Only use SIMPLE_TRANSLATION for interintra
1348       mbmi->motion_mode = SIMPLE_TRANSLATION;
1349     } else {
1350       mbmi->motion_mode = (MOTION_MODE)mode_index;
1351       assert(mbmi->ref_frame[1] != INTRA_FRAME);
1352     }
1353 
1354     // Do not search OBMC if the probability of selecting it is below a
1355     // predetermined threshold for this update_type and block size.
1356     const FRAME_UPDATE_TYPE update_type =
1357         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1358     int use_actual_frame_probs = 1;
1359     int prune_obmc;
1360 #if CONFIG_FPMT_TEST
1361     use_actual_frame_probs =
1362         (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1363     if (!use_actual_frame_probs) {
1364       prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1365                    cpi->sf.inter_sf.prune_obmc_prob_thresh;
1366     }
1367 #endif
1368     if (use_actual_frame_probs) {
1369       prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1370                    cpi->sf.inter_sf.prune_obmc_prob_thresh;
1371     }
1372     if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1373         mbmi->motion_mode == OBMC_CAUSAL)
1374       continue;
1375 
1376     if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1377       // SIMPLE_TRANSLATION mode: no need to recalculate.
1378       // The prediction is calculated before motion_mode_rd() is called in
1379       // handle_inter_mode()
1380     } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1381       const uint32_t cur_mv = mbmi->mv[0].as_int;
1382       // OBMC_CAUSAL not allowed for compound prediction
1383       assert(!is_comp_pred);
1384       if (have_newmv_in_inter_mode(this_mode)) {
1385         av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1386                                  &mbmi->mv[0], NULL);
1387         tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1388       }
1389       if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1390         // Build the predictor according to the current motion vector if it has
1391         // not already been built
1392         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1393                                       0, av1_num_planes(cm) - 1);
1394       }
1395       // Build the inter predictor by blending the predictor corresponding to
1396       // this MV, and the neighboring blocks using the OBMC model
1397       av1_build_obmc_inter_prediction(
1398           cm, xd, args->above_pred_buf, args->above_pred_stride,
1399           args->left_pred_buf, args->left_pred_stride);
1400 #if !CONFIG_REALTIME_ONLY
1401     } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1402       int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1403       mbmi->motion_mode = WARPED_CAUSAL;
1404       mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1405       mbmi->interp_filters =
1406           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1407 
1408       memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1409       memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1410       // Select the samples according to motion vector difference
1411       if (mbmi->num_proj_ref > 1) {
1412         mbmi->num_proj_ref = av1_selectSamples(
1413             &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1414       }
1415 
1416       // Compute the warped motion parameters with a least squares fit
1417       //  using the collected samples
1418       if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1419                                mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1420                                &mbmi->wm_params, mi_row, mi_col)) {
1421         assert(!is_comp_pred);
1422         if (have_newmv_in_inter_mode(this_mode)) {
1423           // Refine MV for NEWMV mode
1424           const int_mv mv0 = mbmi->mv[0];
1425           const WarpedMotionParams wm_params0 = mbmi->wm_params;
1426           const int num_proj_ref0 = mbmi->num_proj_ref;
1427 
1428           const int_mv ref_mv = av1_get_ref_mv(x, 0);
1429           SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1430           av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1431                                             &ref_mv.as_mv, NULL);
1432 
1433           // Refine MV in a small range.
1434           av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1435                                total_samples, cpi->sf.mv_sf.warp_search_method,
1436                                cpi->sf.mv_sf.warp_search_iters);
1437 
1438           if (mv0.as_int != mbmi->mv[0].as_int) {
1439             // Keep the refined MV and WM parameters.
1440             tmp_rate_mv = av1_mv_bit_cost(
1441                 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1442                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1443             tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1444           } else {
1445             // Restore the old MV and WM parameters.
1446             mbmi->mv[0] = mv0;
1447             mbmi->wm_params = wm_params0;
1448             mbmi->num_proj_ref = num_proj_ref0;
1449           }
1450         }
1451 
1452         // Build the warped predictor
1453         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1454                                       av1_num_planes(cm) - 1);
1455       } else {
1456         continue;
1457       }
1458 #endif  // !CONFIG_REALTIME_ONLY
1459     } else if (is_interintra_mode) {
1460       const int ret =
1461           av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1462                                       &tmp_rate_mv, &tmp_rate2, orig_dst);
1463       if (ret < 0) continue;
1464     }
1465 
1466     // If we are searching newmv and the mv is the same as refmv, skip the
1467     // current mode
1468     if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1469 
1470     // Update rd_stats for the current motion mode
1471     txfm_info->skip_txfm = 0;
1472     rd_stats->dist = 0;
1473     rd_stats->sse = 0;
1474     rd_stats->skip_txfm = 1;
1475     rd_stats->rate = tmp_rate2;
1476     const ModeCosts *mode_costs = &x->mode_costs;
1477     if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1478     if (interintra_allowed) {
1479       rd_stats->rate +=
1480           mode_costs->interintra_cost[size_group_lookup[bsize]]
1481                                      [mbmi->ref_frame[1] == INTRA_FRAME];
1482     }
1483     if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1484         (mbmi->ref_frame[1] != INTRA_FRAME)) {
1485       if (last_motion_mode_allowed == WARPED_CAUSAL) {
1486         rd_stats->rate +=
1487             mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1488       } else {
1489         rd_stats->rate +=
1490             mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1491       }
1492     }
1493 
1494     int64_t this_yrd = INT64_MAX;
1495 
1496     if (!do_tx_search) {
1497       // Avoid doing a transform search here to speed up the overall mode
1498       // search. It will be done later in the mode search if the current
1499       // motion mode seems promising.
1500       int64_t curr_sse = -1;
1501       int64_t sse_y = -1;
1502       int est_residue_cost = 0;
1503       int64_t est_dist = 0;
1504       int64_t est_rd = 0;
1505       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1506         curr_sse = get_sse(cpi, x, &sse_y);
1507         const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1508                                                  &est_residue_cost, &est_dist);
1509         (void)has_est_rd;
1510         assert(has_est_rd);
1511       } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1512                  cpi->sf.rt_sf.use_nonrd_pick_mode) {
1513         model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1514             cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1515             NULL, &curr_sse, NULL, NULL, NULL);
1516         sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1517       }
1518       est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1519       if (est_rd * 0.80 > *best_est_rd) {
1520         mbmi->ref_frame[1] = ref_frame_1;
1521         continue;
1522       }
1523       const int mode_rate = rd_stats->rate;
1524       rd_stats->rate += est_residue_cost;
1525       rd_stats->dist = est_dist;
1526       rd_stats->rdcost = est_rd;
1527       if (rd_stats->rdcost < *best_est_rd) {
1528         *best_est_rd = rd_stats->rdcost;
1529         assert(sse_y >= 0);
1530         ref_skip_rd[1] = txfm_rd_gate_level
1531                              ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1532                              : INT64_MAX;
1533       }
1534       if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1535         if (!is_comp_pred) {
1536           assert(curr_sse >= 0);
1537           inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1538                                 rd_stats->rdcost, rd_stats, rd_stats_y,
1539                                 rd_stats_uv, mbmi);
1540         }
1541       } else {
1542         assert(curr_sse >= 0);
1543         inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1544                               rd_stats->rdcost, rd_stats, rd_stats_y,
1545                               rd_stats_uv, mbmi);
1546       }
1547       mbmi->skip_txfm = 0;
1548     } else {
1549       // Perform full transform search
1550       int64_t skip_rd = INT64_MAX;
1551       int64_t skip_rdy = INT64_MAX;
1552       if (txfm_rd_gate_level) {
1553         // Check if the mode is good enough based on skip RD
1554         int64_t sse_y = INT64_MAX;
1555         int64_t curr_sse = get_sse(cpi, x, &sse_y);
1556         skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1557         skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1558         int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1559                                         txfm_rd_gate_level, 0);
1560         if (!eval_txfm) continue;
1561       }
1562 
1563       // Do transform search
1564       const int mode_rate = rd_stats->rate;
1565       if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1566                            rd_stats->rate, ref_best_rd)) {
1567         if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1568           return INT64_MAX;
1569         }
1570         continue;
1571       }
1572       const int skip_ctx = av1_get_skip_txfm_context(xd);
1573       const int y_rate =
1574           rd_stats->skip_txfm
1575               ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1576               : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1577       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1578 
1579       const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1580       if (curr_rd < ref_best_rd) {
1581         ref_best_rd = curr_rd;
1582         ref_skip_rd[0] = skip_rd;
1583         ref_skip_rd[1] = skip_rdy;
1584       }
1585       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1586         inter_mode_data_push(
1587             tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1588             rd_stats_y->rate + rd_stats_uv->rate +
1589                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1590       }
1591     }
1592 
1593     if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1594       if (is_nontrans_global_motion(xd, xd->mi[0])) {
1595         mbmi->interp_filters =
1596             av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1597       }
1598     }
1599 
1600     const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1601     if (mode_index == 0) {
1602       args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1603     }
1604     if (mode_index == 0 || tmp_rd < best_rd) {
1605       // Update best_rd data if this is the best motion mode so far
1606       best_mbmi = *mbmi;
1607       best_rd = tmp_rd;
1608       best_rd_stats = *rd_stats;
1609       best_rd_stats_y = *rd_stats_y;
1610       best_rate_mv = tmp_rate_mv;
1611       *yrd = this_yrd;
1612       if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1613       memcpy(best_blk_skip, txfm_info->blk_skip,
1614              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1615       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1616       best_xskip_txfm = mbmi->skip_txfm;
1617     }
1618   }
1619   // Update RD and mbmi stats for selected motion mode
1620   mbmi->ref_frame[1] = ref_frame_1;
1621   *rate_mv = best_rate_mv;
1622   if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1623     av1_invalid_rd_stats(rd_stats);
1624     restore_dst_buf(xd, *orig_dst, num_planes);
1625     return INT64_MAX;
1626   }
1627   *mbmi = best_mbmi;
1628   *rd_stats = best_rd_stats;
1629   *rd_stats_y = best_rd_stats_y;
1630   if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1631   memcpy(txfm_info->blk_skip, best_blk_skip,
1632          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1633   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1634   txfm_info->skip_txfm = best_xskip_txfm;
1635 
1636   restore_dst_buf(xd, *orig_dst, num_planes);
1637   return 0;
1638 }
1639 
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t best_rd)1640 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1641                             MACROBLOCK *const x, BLOCK_SIZE bsize,
1642                             const BUFFER_SET *const orig_dst, int64_t best_rd) {
1643   assert(bsize < BLOCK_SIZES_ALL);
1644   const AV1_COMMON *cm = &cpi->common;
1645   const int num_planes = av1_num_planes(cm);
1646   MACROBLOCKD *const xd = &x->e_mbd;
1647   const int mi_row = xd->mi_row;
1648   const int mi_col = xd->mi_col;
1649   int64_t total_sse = 0;
1650   int64_t this_rd = INT64_MAX;
1651   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1652   rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1653 
1654   for (int plane = 0; plane < num_planes; ++plane) {
1655     // Call av1_enc_build_inter_predictor() for one plane at a time.
1656     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1657                                   plane, plane);
1658     const struct macroblockd_plane *const pd = &xd->plane[plane];
1659     const BLOCK_SIZE plane_bsize =
1660         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1661 
1662     av1_subtract_plane(x, plane_bsize, plane);
1663 
1664     int64_t sse =
1665         av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1666     if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1667     sse <<= 4;
1668     total_sse += sse;
1669     // When current rd cost is more than the best rd, skip evaluation of
1670     // remaining planes.
1671     this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1672     if (this_rd > best_rd) break;
1673   }
1674 
1675   rd_stats->dist = rd_stats->sse = total_sse;
1676   rd_stats->rdcost = this_rd;
1677 
1678   restore_dst_buf(xd, *orig_dst, num_planes);
1679   return 0;
1680 }
1681 
1682 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1683 // mode
1684 // Note(rachelbarker): This speed feature currently does not interact correctly
1685 // with global motion. The issue is that, when global motion is used, GLOBALMV
1686 // produces a different prediction to NEARESTMV/NEARMV even if the motion
1687 // vectors are the same. Thus GLOBALMV should not be pruned in this case.
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1688 static inline int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1689                                       int ref_idx,
1690                                       const MV_REFERENCE_FRAME *ref_frame,
1691                                       PREDICTION_MODE single_mode) {
1692   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1693   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1694   assert(single_mode != NEWMV);
1695   if (single_mode == NEARESTMV) {
1696     return 0;
1697   } else if (single_mode == NEARMV) {
1698     // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1699     // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1700     if (ref_mv_count < 2) return 1;
1701   } else if (single_mode == GLOBALMV) {
1702     // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1703     if (ref_mv_count == 0) return 1;
1704     // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1705     else if (ref_mv_count == 1)
1706       return 0;
1707 
1708     int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1709     // Check GLOBALMV is matching with any mv in ref_mv_stack
1710     for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1711       int_mv this_mv;
1712 
1713       if (ref_idx == 0)
1714         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1715       else
1716         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1717 
1718       if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1719         return 1;
1720     }
1721   }
1722   return 0;
1723 }
1724 
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1725 static inline int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1726                               int ref_idx, int ref_mv_idx,
1727                               int skip_repeated_ref_mv,
1728                               const MV_REFERENCE_FRAME *ref_frame,
1729                               const MB_MODE_INFO_EXT *mbmi_ext) {
1730   const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1731   assert(is_inter_singleref_mode(single_mode));
1732   if (single_mode == NEWMV) {
1733     this_mv->as_int = INVALID_MV;
1734   } else if (single_mode == GLOBALMV) {
1735     if (skip_repeated_ref_mv &&
1736         check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1737       return 0;
1738     *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1739   } else {
1740     assert(single_mode == NEARMV || single_mode == NEARESTMV);
1741     const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1742     const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1743     if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1744       assert(ref_mv_offset >= 0);
1745       if (ref_idx == 0) {
1746         *this_mv =
1747             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1748       } else {
1749         *this_mv =
1750             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1751       }
1752     } else {
1753       if (skip_repeated_ref_mv &&
1754           check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1755         return 0;
1756       *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1757     }
1758   }
1759   return 1;
1760 }
1761 
1762 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1763 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type,PREDICTION_MODE best_mode)1764 static inline int skip_nearest_near_mv_using_refmv_weight(
1765     const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1766     const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
1767   if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1768   // Do not skip the mode if the current block has not yet obtained a valid
1769   // inter mode.
1770   if (!is_inter_mode(best_mode)) return 0;
1771 
1772   const MACROBLOCKD *xd = &x->e_mbd;
1773   // Do not skip the mode if both the top and left neighboring blocks are not
1774   // available.
1775   if (!xd->left_available || !xd->up_available) return 0;
1776   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1777   const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1778   const int ref_mv_count =
1779       AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1780 
1781   if (ref_mv_count == 0) return 0;
1782   // If ref mv list has at least one nearest candidate do not prune NEARESTMV
1783   if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1784 
1785   // Count number of ref mvs populated from nearest candidates
1786   int nearest_refmv_count = 0;
1787   for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1788     if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1789   }
1790 
1791   // nearest_refmv_count indicates the closeness of block motion characteristics
1792   // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1793   // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1794   // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1795   // mode since these modes work well for blocks that shares similar motion
1796   // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1797   // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1798   // mode is pruned if none of the ref mvs are populated from nearest candidate.
1799   const int prune_thresh = 1 + (ref_mv_count >= 2);
1800   if (nearest_refmv_count < prune_thresh) return 1;
1801   return 0;
1802 }
1803 
1804 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1805 static inline int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1806                                const AV1_COMMON *cm, const MACROBLOCK *x,
1807                                int skip_repeated_ref_mv) {
1808   const MACROBLOCKD *xd = &x->e_mbd;
1809   const MB_MODE_INFO *mbmi = xd->mi[0];
1810   const int is_comp_pred = has_second_ref(mbmi);
1811 
1812   int ret = 1;
1813   for (int i = 0; i < is_comp_pred + 1; ++i) {
1814     int_mv this_mv;
1815     this_mv.as_int = INVALID_MV;
1816     ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1817                       skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1818     if (!ret) return 0;
1819     const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1820     if (single_mode == NEWMV) {
1821       const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1822       cur_mv[i] =
1823           (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1824                          .this_mv
1825                    : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1826                          .comp_mv;
1827     } else {
1828       ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1829     }
1830   }
1831   return ret;
1832 }
1833 
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1834 static inline int get_drl_cost(const MB_MODE_INFO *mbmi,
1835                                const MB_MODE_INFO_EXT *mbmi_ext,
1836                                const int (*const drl_mode_cost0)[2],
1837                                int8_t ref_frame_type) {
1838   int cost = 0;
1839   if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1840     for (int idx = 0; idx < 2; ++idx) {
1841       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1842         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1843         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1844         if (mbmi->ref_mv_idx == idx) return cost;
1845       }
1846     }
1847     return cost;
1848   }
1849 
1850   if (have_nearmv_in_inter_mode(mbmi->mode)) {
1851     for (int idx = 1; idx < 3; ++idx) {
1852       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1853         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1854         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1855         if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1856       }
1857     }
1858     return cost;
1859   }
1860   return cost;
1861 }
1862 
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1863 static inline int is_single_newmv_valid(const HandleInterModeArgs *const args,
1864                                         const MB_MODE_INFO *const mbmi,
1865                                         PREDICTION_MODE this_mode) {
1866   for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1867     const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1868     const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1869     if (single_mode == NEWMV &&
1870         args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1871       return 0;
1872     }
1873   }
1874   return 1;
1875 }
1876 
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1877 static int get_drl_refmv_count(const MACROBLOCK *const x,
1878                                const MV_REFERENCE_FRAME *ref_frame,
1879                                PREDICTION_MODE mode) {
1880   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1881   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1882   const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1883   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1884   const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1885   const int has_drl =
1886       (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1887   const int ref_set =
1888       has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1889 
1890   return ref_set;
1891 }
1892 
1893 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1894 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1895                                          const int qindex,
1896                                          const int ref_mv_idx) {
1897   if (reduce_inter_modes >= 3) return 1;
1898   // Q-index logic based pruning is enabled only for
1899   // reduce_inter_modes = 2.
1900   assert(reduce_inter_modes == 2);
1901   // When reduce_inter_modes=2, pruning happens as below based on q index.
1902   // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1903   // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1904   // For q index range between 171 and 255: no pruning.
1905   const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1906   return (ref_mv_idx >= min_prune_ref_mv_idx);
1907 }
1908 
1909 // Whether this reference motion vector can be skipped, based on initial
1910 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1911 static bool ref_mv_idx_early_breakout(
1912     const SPEED_FEATURES *const sf,
1913     const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1914     const HandleInterModeArgs *const args, int64_t ref_best_rd,
1915     int ref_mv_idx) {
1916   MACROBLOCKD *xd = &x->e_mbd;
1917   MB_MODE_INFO *mbmi = xd->mi[0];
1918   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1919   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1920   const int is_comp_pred = has_second_ref(mbmi);
1921   if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1922     if (mbmi->ref_frame[0] == LAST2_FRAME ||
1923         mbmi->ref_frame[0] == LAST3_FRAME ||
1924         mbmi->ref_frame[1] == LAST2_FRAME ||
1925         mbmi->ref_frame[1] == LAST3_FRAME) {
1926       const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1927       if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1928           REF_CAT_LEVEL) {
1929         return true;
1930       }
1931     }
1932     // TODO(any): Experiment with reduce_inter_modes for compound prediction
1933     if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1934         have_newmv_in_inter_mode(mbmi->mode)) {
1935       if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1936           mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1937         const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1938         const int do_prune = prune_ref_mv_idx_using_qindex(
1939             sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1940         if (do_prune &&
1941             (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1942              REF_CAT_LEVEL)) {
1943           return true;
1944         }
1945       }
1946     }
1947   }
1948 
1949   mbmi->ref_mv_idx = ref_mv_idx;
1950   if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1951     return true;
1952   }
1953   size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1954   const int drl_cost = get_drl_cost(
1955       mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1956   est_rd_rate += drl_cost;
1957   if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1958       mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1959     return true;
1960   }
1961   return false;
1962 }
1963 
1964 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1965 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1966                                           RD_STATS *rd_stats,
1967                                           HandleInterModeArgs *args,
1968                                           int ref_mv_idx, int64_t ref_best_rd,
1969                                           BLOCK_SIZE bsize) {
1970   MACROBLOCKD *xd = &x->e_mbd;
1971   MB_MODE_INFO *mbmi = xd->mi[0];
1972   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1973   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1974   const AV1_COMMON *cm = &cpi->common;
1975   const int is_comp_pred = has_second_ref(mbmi);
1976   const ModeCosts *mode_costs = &x->mode_costs;
1977 
1978   struct macroblockd_plane *p = xd->plane;
1979   const BUFFER_SET orig_dst = {
1980     { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1981     { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1982   };
1983   av1_init_rd_stats(rd_stats);
1984 
1985   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1986   mbmi->comp_group_idx = 0;
1987   mbmi->compound_idx = 1;
1988   if (mbmi->ref_frame[1] == INTRA_FRAME) {
1989     mbmi->ref_frame[1] = NONE_FRAME;
1990   }
1991   int16_t mode_ctx =
1992       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1993 
1994   mbmi->num_proj_ref = 0;
1995   mbmi->motion_mode = SIMPLE_TRANSLATION;
1996   mbmi->ref_mv_idx = ref_mv_idx;
1997 
1998   rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1999   const int drl_cost =
2000       get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2001   rd_stats->rate += drl_cost;
2002 
2003   int_mv cur_mv[2];
2004   if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2005     return INT64_MAX;
2006   }
2007   assert(have_nearmv_in_inter_mode(mbmi->mode));
2008   for (int i = 0; i < is_comp_pred + 1; ++i) {
2009     mbmi->mv[i].as_int = cur_mv[i].as_int;
2010   }
2011   const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2012   rd_stats->rate += ref_mv_cost;
2013 
2014   if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
2015     return INT64_MAX;
2016   }
2017 
2018   mbmi->motion_mode = SIMPLE_TRANSLATION;
2019   mbmi->num_proj_ref = 0;
2020   if (is_comp_pred) {
2021     // Only compound_average
2022     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2023     mbmi->comp_group_idx = 0;
2024     mbmi->compound_idx = 1;
2025   }
2026   set_default_interp_filters(mbmi, cm->features.interp_filter);
2027 
2028   const int mi_row = xd->mi_row;
2029   const int mi_col = xd->mi_col;
2030   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2031                                 AOM_PLANE_Y, AOM_PLANE_Y);
2032   int est_rate;
2033   int64_t est_dist;
2034   model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2035                                   NULL, NULL, NULL, NULL, NULL);
2036   return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2037 }
2038 
2039 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2040 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2041 // it is included.
mask_set_bit(int * mask,int index)2042 static inline void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2043 
mask_check_bit(int mask,int index)2044 static inline bool mask_check_bit(int mask, int index) {
2045   return (mask >> index) & 0x1;
2046 }
2047 
2048 // Before performing the full MV search in handle_inter_mode, do a simple
2049 // translation search and see if we can eliminate any motion vectors.
2050 // Returns an integer where, if the i-th bit is set, it means that the i-th
2051 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2052 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2053                                 RD_STATS *rd_stats,
2054                                 HandleInterModeArgs *const args,
2055                                 int64_t ref_best_rd, BLOCK_SIZE bsize,
2056                                 const int ref_set) {
2057   // If the number of ref mv count is equal to 1, do not prune the same. It
2058   // is better to evaluate the same than to prune it.
2059   if (ref_set == 1) return 1;
2060   AV1_COMMON *const cm = &cpi->common;
2061   const MACROBLOCKD *const xd = &x->e_mbd;
2062   const MB_MODE_INFO *const mbmi = xd->mi[0];
2063   const PREDICTION_MODE this_mode = mbmi->mode;
2064 
2065   // Only search indices if they have some chance of being good.
2066   int good_indices = 0;
2067   for (int i = 0; i < ref_set; ++i) {
2068     if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2069                                   ref_best_rd, i)) {
2070       continue;
2071     }
2072     mask_set_bit(&good_indices, i);
2073   }
2074 
2075   // Only prune in NEARMV mode, if the speed feature is set, and the block size
2076   // is large enough. If these conditions are not met, return all good indices
2077   // found so far.
2078   if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2079     return good_indices;
2080   if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2081   if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2082   // Do not prune when there is internal resizing. TODO(elliottk) fix this
2083   // so b/2384 can be resolved.
2084   if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2085       (mbmi->ref_frame[1] > 0 &&
2086        av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2087     return good_indices;
2088   }
2089 
2090   // Calculate the RD cost for the motion vectors using simple translation.
2091   int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2092   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2093     // If this index is bad, ignore it.
2094     if (!mask_check_bit(good_indices, ref_mv_idx)) {
2095       continue;
2096     }
2097     idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2098         cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2099   }
2100   // Find the index with the best RD cost.
2101   int best_idx = 0;
2102   for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2103     if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2104       best_idx = i;
2105     }
2106   }
2107   // Only include indices that are good and within a % of the best.
2108   const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2109   // If the simple translation cost is not within this multiple of the
2110   // best RD, skip it. Note that the cutoff is derived experimentally.
2111   const double ref_dth = 5;
2112   int result = 0;
2113   for (int i = 0; i < ref_set; ++i) {
2114     if (mask_check_bit(good_indices, i) &&
2115         (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2116         (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2117       mask_set_bit(&result, i);
2118     }
2119   }
2120   return result;
2121 }
2122 
2123 /*!\brief Motion mode information for inter mode search speedup.
2124  *
2125  * Used in a speed feature to search motion modes other than
2126  * SIMPLE_TRANSLATION only on winning candidates.
2127  */
2128 typedef struct motion_mode_candidate {
2129   /*!
2130    * Mode info for the motion mode candidate.
2131    */
2132   MB_MODE_INFO mbmi;
2133   /*!
2134    * Rate describing the cost of the motion vectors for this candidate.
2135    */
2136   int rate_mv;
2137   /*!
2138    * Rate before motion mode search and transform coding is applied.
2139    */
2140   int rate2_nocoeff;
2141   /*!
2142    * An integer value 0 or 1 which indicates whether or not to skip the motion
2143    * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2144    * candidate.
2145    */
2146   int skip_motion_mode;
2147   /*!
2148    * Total RD cost for this candidate.
2149    */
2150   int64_t rd_cost;
2151 } motion_mode_candidate;
2152 
2153 /*!\cond */
2154 typedef struct motion_mode_best_st_candidate {
2155   motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2156   int num_motion_mode_cand;
2157 } motion_mode_best_st_candidate;
2158 
2159 // Checks if the current reference frame matches with neighbouring block's
2160 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2161 static inline int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2162                                                MB_MODE_INFO *nb_mbmi) {
2163   MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2164                                           nb_mbmi->ref_frame[1] };
2165   MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2166                                            cur_mbmi->ref_frame[1] };
2167   const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2168   int match_found = 0;
2169 
2170   for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2171     if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2172         (cur_ref_frames[i] == nb_ref_frames[1]))
2173       match_found = 1;
2174   }
2175   return match_found;
2176 }
2177 
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2178 static inline int find_ref_match_in_above_nbs(const int total_mi_cols,
2179                                               MACROBLOCKD *xd) {
2180   if (!xd->up_available) return 1;
2181   const int mi_col = xd->mi_col;
2182   MB_MODE_INFO **cur_mbmi = xd->mi;
2183   // prev_row_mi points into the mi array, starting at the beginning of the
2184   // previous row.
2185   MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2186   const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2187   uint8_t mi_step;
2188   for (int above_mi_col = mi_col; above_mi_col < end_col;
2189        above_mi_col += mi_step) {
2190     MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2191     mi_step = mi_size_wide[above_mi[0]->bsize];
2192     int match_found = 0;
2193     if (is_inter_block(*above_mi))
2194       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2195     if (match_found) return 1;
2196   }
2197   return 0;
2198 }
2199 
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2200 static inline int find_ref_match_in_left_nbs(const int total_mi_rows,
2201                                              MACROBLOCKD *xd) {
2202   if (!xd->left_available) return 1;
2203   const int mi_row = xd->mi_row;
2204   MB_MODE_INFO **cur_mbmi = xd->mi;
2205   // prev_col_mi points into the mi array, starting at the top of the
2206   // previous column
2207   MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2208   const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2209   uint8_t mi_step;
2210   for (int left_mi_row = mi_row; left_mi_row < end_row;
2211        left_mi_row += mi_step) {
2212     MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2213     mi_step = mi_size_high[left_mi[0]->bsize];
2214     int match_found = 0;
2215     if (is_inter_block(*left_mi))
2216       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2217     if (match_found) return 1;
2218   }
2219   return 0;
2220 }
2221 /*!\endcond */
2222 
2223 /*! \brief Struct used to hold TPL data to
2224  * narrow down parts of the inter mode search.
2225  */
2226 typedef struct {
2227   /*!
2228    * The best inter cost out of all of the reference frames.
2229    */
2230   int64_t best_inter_cost;
2231   /*!
2232    * The inter cost for each reference frame.
2233    */
2234   int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2235 } PruneInfoFromTpl;
2236 
2237 #if !CONFIG_REALTIME_ONLY
2238 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2239 static inline void get_block_level_tpl_stats(
2240     AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2241     PruneInfoFromTpl *inter_cost_info_from_tpl) {
2242   AV1_COMMON *const cm = &cpi->common;
2243 
2244   assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2245                  cpi->gf_frame_index < cpi->ppi->gf_group.size));
2246   const int tpl_idx = cpi->gf_frame_index;
2247   TplParams *const tpl_data = &cpi->ppi->tpl_data;
2248   if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2249   const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2250   const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2251   const int mi_wide = mi_size_wide[bsize];
2252   const int mi_high = mi_size_high[bsize];
2253   const int tpl_stride = tpl_frame->stride;
2254   const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2255   const int mi_col_sr =
2256       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2257   const int mi_col_end_sr =
2258       coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2259   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2260 
2261   const int row_step = step;
2262   const int col_step_sr =
2263       coded_to_superres_mi(step, cm->superres_scale_denominator);
2264   for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2265        row += row_step) {
2266     for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2267          col += col_step_sr) {
2268       const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2269           row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2270 
2271       // Sums up the inter cost of corresponding ref frames
2272       for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2273         inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2274             this_stats->pred_error[ref_idx];
2275       }
2276     }
2277   }
2278 
2279   // Computes the best inter cost (minimum inter_cost)
2280   int64_t best_inter_cost = INT64_MAX;
2281   for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2282     const int64_t cur_inter_cost =
2283         inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2284     // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2285     // calculating the minimum inter_cost
2286     if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2287         valid_refs[ref_idx])
2288       best_inter_cost = cur_inter_cost;
2289   }
2290   inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2291 }
2292 #endif
2293 
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2294 static inline int prune_modes_based_on_tpl_stats(
2295     PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2296     const PREDICTION_MODE this_mode, int prune_mode_level) {
2297   const int have_newmv = have_newmv_in_inter_mode(this_mode);
2298   if ((prune_mode_level < 2) && have_newmv) return 0;
2299 
2300   const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2301   if (best_inter_cost == INT64_MAX) return 0;
2302 
2303   const int prune_level = prune_mode_level - 1;
2304   int64_t cur_inter_cost;
2305 
2306   const int is_globalmv =
2307       (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2308   const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2309 
2310   // Thresholds used for pruning:
2311   // Lower value indicates aggressive pruning and higher value indicates
2312   // conservative pruning which is set based on ref_mv_idx and speed feature.
2313   // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2314   // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2315   static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2316     { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2317   };
2318 
2319   const int is_comp_pred = (refs[1] > INTRA_FRAME);
2320   if (!is_comp_pred) {
2321     cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2322   } else {
2323     const int64_t inter_cost_ref0 =
2324         inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2325     const int64_t inter_cost_ref1 =
2326         inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2327     // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2328     // more aggressive pruning
2329     cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2330   }
2331 
2332   // Prune the mode if cur_inter_cost is greater than threshold times
2333   // best_inter_cost
2334   if (cur_inter_cost >
2335       ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2336         best_inter_cost) >>
2337        2))
2338     return 1;
2339   return 0;
2340 }
2341 
2342 /*!\brief High level function to select parameters for compound mode.
2343  *
2344  * \ingroup inter_mode_search
2345  * The main search functionality is done in the call to av1_compound_type_rd().
2346  *
2347  * \param[in]     cpi               Top-level encoder structure.
2348  * \param[in]     x                 Pointer to struct holding all the data for
2349  *                                  the current macroblock.
2350  * \param[in]     args              HandleInterModeArgs struct holding
2351  *                                  miscellaneous arguments for inter mode
2352  *                                  search. See the documentation for this
2353  *                                  struct for a description of each member.
2354  * \param[in]     ref_best_rd       Best RD found so far for this block.
2355  *                                  It is used for early termination of this
2356  *                                  search if the RD exceeds this value.
2357  * \param[in,out] cur_mv            Current motion vector.
2358  * \param[in]     bsize             Current block size.
2359  * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2360                                     compound mode.
2361  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2362  *                                  allocated buffers for the compound
2363  *                                  predictors and masks in the compound type
2364  *                                  search.
2365  * \param[in,out] orig_dst          A prediction buffer to hold a computed
2366  *                                  prediction. This will eventually hold the
2367  *                                  final prediction, and the tmp_dst info will
2368  *                                  be copied here.
2369  * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2370  *                                  computed prediction.
2371  * \param[in,out] rate_mv           The rate associated with the motion vectors.
2372  *                                  This will be modified if a motion search is
2373  *                                  done in the motion mode search.
2374  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2375  *                                  information.
2376  * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2377  *                                  best total RD for a skip mode so far, and
2378  *                                  skip_rd[1] is the best RD for a skip mode so
2379  *                                  far in luma. This is used as a speed feature
2380  *                                  to skip the transform search if the computed
2381  *                                  skip RD for the current mode is not better
2382  *                                  than the best skip_rd so far.
2383  * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2384  *                                  predictor. If this is 0, the inter predictor
2385  *                                  has already been built and thus we can avoid
2386  *                                  repeating computation.
2387  * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2388  * a viable candidate.
2389  */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2390 static int process_compound_inter_mode(
2391     AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2392     int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2393     int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2394     const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2395     RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2396   MACROBLOCKD *xd = &x->e_mbd;
2397   MB_MODE_INFO *mbmi = xd->mi[0];
2398   const AV1_COMMON *cm = &cpi->common;
2399   const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2400                                    cm->seq_params->enable_masked_compound;
2401   int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2402                          (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2403 
2404   const int num_planes = av1_num_planes(cm);
2405   const int mi_row = xd->mi_row;
2406   const int mi_col = xd->mi_col;
2407   int is_luma_interp_done = 0;
2408   set_default_interp_filters(mbmi, cm->features.interp_filter);
2409 
2410   int64_t best_rd_compound;
2411   int64_t rd_thresh;
2412   const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2413   const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2414   rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2415                                          comp_type_rd_scale);
2416   // Select compound type and any parameters related to that type
2417   // (for example, the mask parameters if it is a masked mode) and compute
2418   // the RD
2419   *compmode_interinter_cost = av1_compound_type_rd(
2420       cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2421       orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2422       ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2423   if (ref_best_rd < INT64_MAX &&
2424       (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2425           ref_best_rd) {
2426     restore_dst_buf(xd, *orig_dst, num_planes);
2427     return 1;
2428   }
2429 
2430   // Build only uv predictor for COMPOUND_AVERAGE.
2431   // Note there is no need to call av1_enc_build_inter_predictor
2432   // for luma if COMPOUND_AVERAGE is selected because it is the first
2433   // candidate in av1_compound_type_rd, which means it used the dst_buf
2434   // rather than the tmp_buf.
2435   if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2436     if (num_planes > 1) {
2437       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2438                                     AOM_PLANE_U, num_planes - 1);
2439     }
2440     *skip_build_pred = 1;
2441   }
2442   return 0;
2443 }
2444 
2445 // Speed feature to prune out MVs that are similar to previous MVs if they
2446 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2447 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2448                                    int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2449                                    MB_MODE_INFO *mbmi, int pruning_factor) {
2450   int i;
2451   const int is_comp_pred = has_second_ref(mbmi);
2452   const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2453 
2454   // Skip the evaluation if an MV match is found.
2455   if (ref_mv_idx > 0) {
2456     for (int idx = 0; idx < ref_mv_idx; ++idx) {
2457       if (save_mv[idx][0].as_int == INVALID_MV) continue;
2458 
2459       int mv_diff = 0;
2460       for (i = 0; i < 1 + is_comp_pred; ++i) {
2461         mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2462                    abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2463       }
2464 
2465       // If this mode is not the best one, and current MV is similar to
2466       // previous stored MV, terminate this ref_mv_idx evaluation.
2467       if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2468     }
2469   }
2470 
2471   if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2472     for (i = 0; i < is_comp_pred + 1; ++i)
2473       save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2474   }
2475 
2476   return 0;
2477 }
2478 
2479 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2480  *
2481  * \ingroup inter_mode_search
2482  *
2483  * Compares the sse of zero mv and the best sse found in single new_mv. If the
2484  * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2485  * Else returns 0.
2486  *
2487  * Note that the sse of here comes from single_motion_search. So it is
2488  * interpolated with the filter in motion search, not the actual interpolation
2489  * filter used in encoding.
2490  *
2491  * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2492  * \param[in]     x                 Pointer to struct holding all the data for
2493  *                                  the current macroblock.
2494  * \param[in]     bsize             The current block_size.
2495  * \param[in]     args              The args to handle_inter_mode, used to track
2496  *                                  the best SSE.
2497  * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2498  *                                       prune_zero_mv_with_sse value
2499  * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2500  */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args,int prune_zero_mv_with_sse)2501 static inline int prune_zero_mv_with_sse(const aom_variance_fn_ptr_t *fn_ptr,
2502                                          const MACROBLOCK *x, BLOCK_SIZE bsize,
2503                                          const HandleInterModeArgs *args,
2504                                          int prune_zero_mv_with_sse) {
2505   const MACROBLOCKD *xd = &x->e_mbd;
2506   const MB_MODE_INFO *mbmi = xd->mi[0];
2507 
2508   const int is_comp_pred = has_second_ref(mbmi);
2509   const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2510 
2511   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2512     if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2513       // Pruning logic only works for IDENTITY type models
2514       // Note: In theory we could apply similar logic for TRANSLATION
2515       // type models, but we do not code these due to a spec bug
2516       // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2517       assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2518       return 0;
2519     }
2520 
2521     // Don't prune if we have invalid data
2522     assert(mbmi->mv[idx].as_int == 0);
2523     if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2524       return 0;
2525     }
2526   }
2527 
2528   // Sum up the sse of ZEROMV and best NEWMV
2529   unsigned int this_sse_sum = 0;
2530   unsigned int best_sse_sum = 0;
2531   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2532     const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2533     const struct macroblockd_plane *pd = xd->plane;
2534     const struct buf_2d *src_buf = &p->src;
2535     const struct buf_2d *ref_buf = &pd->pre[idx];
2536     const uint8_t *src = src_buf->buf;
2537     const uint8_t *ref = ref_buf->buf;
2538     const int src_stride = src_buf->stride;
2539     const int ref_stride = ref_buf->stride;
2540 
2541     unsigned int this_sse;
2542     fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2543     this_sse_sum += this_sse;
2544 
2545     const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2546     best_sse_sum += best_sse;
2547   }
2548 
2549   const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2550   if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2551     return 1;
2552   }
2553 
2554   return 0;
2555 }
2556 
2557 /*!\brief Searches for interpolation filter in realtime mode during winner eval
2558  *
2559  * \ingroup inter_mode_search
2560  *
2561  * Does a simple interpolation filter search during winner mode evaluation. This
2562  * is currently only used by realtime mode as \ref
2563  * av1_interpolation_filter_search is not called during realtime encoding.
2564  *
2565  * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2566  * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2567  * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2568  *  *
2569  * \param[in]     cpi               Pointer to the compressor. Used for feature
2570  *                                  flags.
2571  * \param[in,out] x                 Pointer to macroblock. This is primarily
2572  *                                  used to access the buffers.
2573  * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2574  * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2575  * \param[in]     bsize             The current block_size.
2576  * \return Returns true if a predictor is built in xd->dst, false otherwise.
2577  */
fast_interp_search(const AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize)2578 static inline bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2579                                       int mi_row, int mi_col,
2580                                       BLOCK_SIZE bsize) {
2581   static const InterpFilters filters_ref_set[3] = {
2582     { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2583     { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2584     { MULTITAP_SHARP, MULTITAP_SHARP }
2585   };
2586 
2587   const AV1_COMMON *const cm = &cpi->common;
2588   MACROBLOCKD *const xd = &x->e_mbd;
2589   MB_MODE_INFO *const mi = xd->mi[0];
2590   int64_t best_cost = INT64_MAX;
2591   int best_filter_index = -1;
2592   // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2593   const int num_planes = av1_num_planes(cm);
2594   const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2595   assert(is_inter_mode(mi->mode));
2596   assert(mi->motion_mode == SIMPLE_TRANSLATION);
2597   assert(!is_inter_compound_mode(mi->mode));
2598 
2599   if (!av1_is_interp_needed(xd)) {
2600     return false;
2601   }
2602 
2603   struct macroblockd_plane *pd = xd->plane;
2604   const BUFFER_SET orig_dst = {
2605     { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2606     { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2607   };
2608   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2609   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2610                                  tmp_buf + 2 * MAX_SB_SQUARE },
2611                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2612   const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2613 
2614   for (int i = 0; i < 3; ++i) {
2615     if (is_240p_or_lesser) {
2616       if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2617         continue;
2618       }
2619     } else {
2620       if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2621         continue;
2622       }
2623     }
2624     int64_t cost;
2625     RD_STATS tmp_rd = { 0 };
2626 
2627     mi->interp_filters.as_filters = filters_ref_set[i];
2628     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2629 
2630     model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2631                        ? MODELRD_LEGACY
2632                        : MODELRD_TYPE_INTERP_FILTER](
2633         cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2634         &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2635 
2636     tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2637                                            cm->seq_params->enable_dual_filter);
2638     cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2639     if (cost < best_cost) {
2640       best_filter_index = i;
2641       best_cost = cost;
2642       swap_dst_buf(xd, dst_bufs, num_planes);
2643     }
2644   }
2645   assert(best_filter_index >= 0);
2646 
2647   mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2648 
2649   const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2650 
2651   if (is_best_pred_in_orig) {
2652     swap_dst_buf(xd, dst_bufs, num_planes);
2653   } else {
2654     // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2655     // is_best_pred_in_orig is false, that means the current buffer is the
2656     // original one.
2657     assert(&orig_dst == dst_bufs[0]);
2658     assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2659     const int width = block_size_wide[bsize];
2660     const int height = block_size_high[bsize];
2661 #if CONFIG_AV1_HIGHBITDEPTH
2662     const bool is_hbd = is_cur_buf_hbd(xd);
2663     if (is_hbd) {
2664       aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2665                                tmp_dst.stride[AOM_PLANE_Y],
2666                                CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2667                                orig_dst.stride[AOM_PLANE_Y], width, height);
2668     } else {
2669       aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2670                         orig_dst.plane[AOM_PLANE_Y],
2671                         orig_dst.stride[AOM_PLANE_Y], width, height);
2672     }
2673 #else
2674     aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2675                       orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2676                       width, height);
2677 #endif
2678   }
2679 
2680   // Build the YUV predictor.
2681   if (num_planes > 1) {
2682     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2683                                   AOM_PLANE_U, AOM_PLANE_V);
2684   }
2685 
2686   return true;
2687 }
2688 
2689 /*!\brief AV1 inter mode RD computation
2690  *
2691  * \ingroup inter_mode_search
2692  * Do the RD search for a given inter mode and compute all information relevant
2693  * to the input mode. It will compute the best MV,
2694  * compound parameters (if the mode is a compound mode) and interpolation filter
2695  * parameters.
2696  *
2697  * \param[in]     cpi               Top-level encoder structure.
2698  * \param[in]     tile_data         Pointer to struct holding adaptive
2699  *                                  data/contexts/models for the tile during
2700  *                                  encoding.
2701  * \param[in]     x                 Pointer to structure holding all the data
2702  *                                  for the current macroblock.
2703  * \param[in]     bsize             Current block size.
2704  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2705  *                                  information.
2706  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2707  *                                  for only the Y plane.
2708  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2709  *                                  for only the UV planes.
2710  * \param[in]     args              HandleInterModeArgs struct holding
2711  *                                  miscellaneous arguments for inter mode
2712  *                                  search. See the documentation for this
2713  *                                  struct for a description of each member.
2714  * \param[in]     ref_best_rd       Best RD found so far for this block.
2715  *                                  It is used for early termination of this
2716  *                                  search if the RD exceeds this value.
2717  * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2718  *                                  built in this search.
2719  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2720  *                                  allocated buffers for the compound
2721  *                                  predictors and masks in the compound type
2722  *                                  search.
2723  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2724  *                                  do_tx_search (see below) is 0.
2725  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2726  *                                  a full transform search. This will compute
2727  *                                  an estimated RD for the modes without the
2728  *                                  transform search and later perform the full
2729  *                                  transform search on the best candidates.
2730  * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2731  *                                  information to perform a full transform
2732  *                                  search only on winning candidates searched
2733  *                                  with an estimate for transform coding RD.
2734  * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2735  *                                  motion mode information used in a speed
2736  *                                  feature to search motion modes other than
2737  *                                  SIMPLE_TRANSLATION only on winning
2738  *                                  candidates.
2739  * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
2740  *                                  best total RD for a skip mode so far, and
2741  *                                  skip_rd[1] is the best RD for a skip mode so
2742  *                                  far in luma. This is used as a speed feature
2743  *                                  to skip the transform search if the computed
2744  *                                  skip RD for the current mode is not better
2745  *                                  than the best skip_rd so far.
2746  * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2747  *                                         narrow down the search based on data
2748  *                                         collected in the TPL model.
2749  * \param[out]    yrd               Stores the rdcost corresponding to encoding
2750  *                                  the luma plane.
2751  *
2752  * \return The RD cost for the mode being searched.
2753  */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2754 static int64_t handle_inter_mode(
2755     AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2756     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2757     RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2758     uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2759     int64_t *best_est_rd, const int do_tx_search,
2760     InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2761     int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2762     int64_t *yrd) {
2763   const AV1_COMMON *cm = &cpi->common;
2764   const int num_planes = av1_num_planes(cm);
2765   MACROBLOCKD *xd = &x->e_mbd;
2766   MB_MODE_INFO *mbmi = xd->mi[0];
2767   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2768   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2769   const int is_comp_pred = has_second_ref(mbmi);
2770   const PREDICTION_MODE this_mode = mbmi->mode;
2771 
2772 #if CONFIG_REALTIME_ONLY
2773   const int prune_modes_based_on_tpl = 0;
2774 #else   // CONFIG_REALTIME_ONLY
2775   const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2776   const int prune_modes_based_on_tpl =
2777       cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2778       av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2779 #endif  // CONFIG_REALTIME_ONLY
2780   int i;
2781   // Reference frames for this mode
2782   const int refs[2] = { mbmi->ref_frame[0],
2783                         (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2784   int rate_mv = 0;
2785   int64_t rd = INT64_MAX;
2786   // Do first prediction into the destination buffer. Do the next
2787   // prediction into a temporary buffer. Then keep track of which one
2788   // of these currently holds the best predictor, and use the other
2789   // one for future predictions. In the end, copy from tmp_buf to
2790   // dst if necessary.
2791   struct macroblockd_plane *pd = xd->plane;
2792   const BUFFER_SET orig_dst = {
2793     { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2794     { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2795   };
2796   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2797                                  tmp_buf + 2 * MAX_SB_SQUARE },
2798                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2799 
2800   int64_t ret_val = INT64_MAX;
2801   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2802   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2803   int64_t best_rd = INT64_MAX;
2804   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2805   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2806   int64_t best_yrd = INT64_MAX;
2807   MB_MODE_INFO best_mbmi = *mbmi;
2808   int best_xskip_txfm = 0;
2809   int64_t newmv_ret_val = INT64_MAX;
2810   inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2811 
2812   // Do not prune the mode based on inter cost from tpl if the current ref frame
2813   // is the winner ref in neighbouring blocks.
2814   int ref_match_found_in_above_nb = 0;
2815   int ref_match_found_in_left_nb = 0;
2816   if (prune_modes_based_on_tpl) {
2817     ref_match_found_in_above_nb =
2818         find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2819     ref_match_found_in_left_nb =
2820         find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2821   }
2822 
2823   // First, perform a simple translation search for each of the indices. If
2824   // an index performs well, it will be fully searched in the main loop
2825   // of this function.
2826   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2827   // Save MV results from first 2 ref_mv_idx.
2828   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2829   int best_ref_mv_idx = -1;
2830   const int idx_mask =
2831       ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2832   const int16_t mode_ctx =
2833       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2834   const ModeCosts *mode_costs = &x->mode_costs;
2835   const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2836   const int base_rate =
2837       args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2838 
2839   for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2840     save_mv[i][0].as_int = INVALID_MV;
2841     save_mv[i][1].as_int = INVALID_MV;
2842   }
2843   args->start_mv_cnt = 0;
2844 
2845   // Main loop of this function. This will  iterate over all of the ref mvs
2846   // in the dynamic reference list and do the following:
2847   //    1.) Get the current MV. Create newmv MV if necessary
2848   //    2.) Search compound type and parameters if applicable
2849   //    3.) Do interpolation filter search
2850   //    4.) Build the inter predictor
2851   //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2852   //        WARPED_CAUSAL)
2853   //    6.) Update stats if best so far
2854   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2855     mbmi->ref_mv_idx = ref_mv_idx;
2856 
2857     mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2858     mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2859     const int drl_cost = get_drl_cost(
2860         mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2861     mode_info[ref_mv_idx].drl_cost = drl_cost;
2862     mode_info[ref_mv_idx].skip = 0;
2863 
2864     if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2865       // MV did not perform well in simple translation search. Skip it.
2866       continue;
2867     }
2868     if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2869         !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2870       // Skip mode if TPL model indicates it will not be beneficial.
2871       if (prune_modes_based_on_tpl_stats(
2872               inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2873               cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2874         continue;
2875     }
2876     av1_init_rd_stats(rd_stats);
2877 
2878     // Initialize compound mode data
2879     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2880     mbmi->comp_group_idx = 0;
2881     mbmi->compound_idx = 1;
2882     if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2883 
2884     mbmi->num_proj_ref = 0;
2885     mbmi->motion_mode = SIMPLE_TRANSLATION;
2886 
2887     // Compute cost for signalling this DRL index
2888     rd_stats->rate = base_rate;
2889     rd_stats->rate += drl_cost;
2890 
2891     int rs = 0;
2892     int compmode_interinter_cost = 0;
2893 
2894     int_mv cur_mv[2];
2895 
2896     // TODO(Cherma): Extend this speed feature to support compound mode
2897     int skip_repeated_ref_mv =
2898         is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2899     // Generate the current mv according to the prediction mode
2900     if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2901       continue;
2902     }
2903 
2904     // The above call to build_cur_mv does not handle NEWMV modes. Build
2905     // the mv here if we have NEWMV for any predictors.
2906     if (have_newmv_in_inter_mode(this_mode)) {
2907 #if CONFIG_COLLECT_COMPONENT_TIMING
2908       start_timing(cpi, handle_newmv_time);
2909 #endif
2910       newmv_ret_val =
2911           handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2912 #if CONFIG_COLLECT_COMPONENT_TIMING
2913       end_timing(cpi, handle_newmv_time);
2914 #endif
2915 
2916       if (newmv_ret_val != 0) continue;
2917 
2918       if (is_inter_singleref_mode(this_mode) &&
2919           cur_mv[0].as_int != INVALID_MV) {
2920         const MV_REFERENCE_FRAME ref = refs[0];
2921         const unsigned int this_sse = x->pred_sse[ref];
2922         if (this_sse < args->best_single_sse_in_refs[ref]) {
2923           args->best_single_sse_in_refs[ref] = this_sse;
2924         }
2925 
2926         if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2927           const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
2928           const int pix_idx = num_pels_log2_lookup[bsize] - 4;
2929           const double scale_factor[3][11] = {
2930             { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
2931             { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
2932             { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
2933           };
2934           assert(pix_idx >= 0);
2935           assert(th_idx <= 2);
2936           if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
2937             continue;
2938         }
2939       }
2940 
2941       rd_stats->rate += rate_mv;
2942     }
2943     // Copy the motion vector for this mode into mbmi struct
2944     for (i = 0; i < is_comp_pred + 1; ++i) {
2945       mbmi->mv[i].as_int = cur_mv[i].as_int;
2946     }
2947 
2948     if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2949         mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2950       continue;
2951     }
2952 
2953     // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2954     // is enabled, and the current MV is similar to a previous one.
2955     if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2956         prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2957                                 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2958       continue;
2959 
2960     if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2961         (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2962       if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2963                                  cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2964         continue;
2965       }
2966     }
2967 
2968     int skip_build_pred = 0;
2969     const int mi_row = xd->mi_row;
2970     const int mi_col = xd->mi_col;
2971 
2972     // Handle a compound predictor, continue if it is determined this
2973     // cannot be the best compound mode
2974     if (is_comp_pred) {
2975 #if CONFIG_COLLECT_COMPONENT_TIMING
2976       start_timing(cpi, compound_type_rd_time);
2977 #endif
2978       const int not_best_mode = process_compound_inter_mode(
2979           cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2980           rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2981           &skip_build_pred);
2982 #if CONFIG_COLLECT_COMPONENT_TIMING
2983       end_timing(cpi, compound_type_rd_time);
2984 #endif
2985       if (not_best_mode) continue;
2986     }
2987 
2988     if (!args->skip_ifs) {
2989 #if CONFIG_COLLECT_COMPONENT_TIMING
2990       start_timing(cpi, interpolation_filter_search_time);
2991 #endif
2992       // Determine the interpolation filter for this mode
2993       ret_val = av1_interpolation_filter_search(
2994           x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2995           &skip_build_pred, args, ref_best_rd);
2996 #if CONFIG_COLLECT_COMPONENT_TIMING
2997       end_timing(cpi, interpolation_filter_search_time);
2998 #endif
2999       if (args->modelled_rd != NULL && !is_comp_pred) {
3000         args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3001       }
3002       if (ret_val != 0) {
3003         restore_dst_buf(xd, orig_dst, num_planes);
3004         continue;
3005       } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3006                  ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3007         restore_dst_buf(xd, orig_dst, num_planes);
3008         continue;
3009       }
3010 
3011       // Compute modelled RD if enabled
3012       if (args->modelled_rd != NULL) {
3013         if (is_comp_pred) {
3014           const int mode0 = compound_ref0_mode(this_mode);
3015           const int mode1 = compound_ref1_mode(this_mode);
3016           const int64_t mrd =
3017               AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3018                      args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3019           if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3020             restore_dst_buf(xd, orig_dst, num_planes);
3021             continue;
3022           }
3023         }
3024       }
3025     }
3026 
3027     rd_stats->rate += compmode_interinter_cost;
3028     if (skip_build_pred != 1) {
3029       // Build this inter predictor if it has not been previously built
3030       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3031                                     av1_num_planes(cm) - 1);
3032     }
3033 
3034 #if CONFIG_COLLECT_COMPONENT_TIMING
3035     start_timing(cpi, motion_mode_rd_time);
3036 #endif
3037     int rate2_nocoeff = rd_stats->rate;
3038     // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3039     // OBMC_CAUSAL or WARPED_CAUSAL
3040     int64_t this_yrd;
3041     ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3042                              rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3043                              &orig_dst, best_est_rd, do_tx_search,
3044                              inter_modes_info, 0, &this_yrd);
3045 #if CONFIG_COLLECT_COMPONENT_TIMING
3046     end_timing(cpi, motion_mode_rd_time);
3047 #endif
3048     assert(
3049         IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3050 
3051     if (ret_val != INT64_MAX) {
3052       int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3053       const THR_MODES mode_enum = get_prediction_mode_idx(
3054           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3055       // Collect mode stats for multiwinner mode processing
3056       store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3057                               rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3058                               cpi->sf.winner_mode_sf.multi_winner_mode_type,
3059                               do_tx_search);
3060       if (tmp_rd < best_rd) {
3061         best_yrd = this_yrd;
3062         // Update the best rd stats if we found the best mode so far
3063         best_rd_stats = *rd_stats;
3064         best_rd_stats_y = *rd_stats_y;
3065         best_rd_stats_uv = *rd_stats_uv;
3066         best_rd = tmp_rd;
3067         best_mbmi = *mbmi;
3068         best_xskip_txfm = txfm_info->skip_txfm;
3069         memcpy(best_blk_skip, txfm_info->blk_skip,
3070                sizeof(best_blk_skip[0]) * xd->height * xd->width);
3071         av1_copy_array(best_tx_type_map, xd->tx_type_map,
3072                        xd->height * xd->width);
3073         motion_mode_cand->rate_mv = rate_mv;
3074         motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3075       }
3076 
3077       if (tmp_rd < ref_best_rd) {
3078         ref_best_rd = tmp_rd;
3079         best_ref_mv_idx = ref_mv_idx;
3080       }
3081     }
3082     restore_dst_buf(xd, orig_dst, num_planes);
3083   }
3084 
3085   if (best_rd == INT64_MAX) return INT64_MAX;
3086 
3087   // re-instate status of the best choice
3088   *rd_stats = best_rd_stats;
3089   *rd_stats_y = best_rd_stats_y;
3090   *rd_stats_uv = best_rd_stats_uv;
3091   *yrd = best_yrd;
3092   *mbmi = best_mbmi;
3093   txfm_info->skip_txfm = best_xskip_txfm;
3094   assert(IMPLIES(mbmi->comp_group_idx == 1,
3095                  mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3096   memcpy(txfm_info->blk_skip, best_blk_skip,
3097          sizeof(best_blk_skip[0]) * xd->height * xd->width);
3098   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3099 
3100   rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3101 
3102   return rd_stats->rdcost;
3103 }
3104 
3105 /*!\brief Search for the best intrabc predictor
3106  *
3107  * \ingroup intra_mode_search
3108  * \callergraph
3109  * This function performs a motion search to find the best intrabc predictor.
3110  *
3111  * \returns Returns the best overall rdcost (including the non-intrabc modes
3112  * search before this function).
3113  */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)3114 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3115                                        PICK_MODE_CONTEXT *ctx,
3116                                        RD_STATS *rd_stats, BLOCK_SIZE bsize,
3117                                        int64_t best_rd) {
3118   const AV1_COMMON *const cm = &cpi->common;
3119   if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3120       !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3121     return INT64_MAX;
3122   const int num_planes = av1_num_planes(cm);
3123 
3124   MACROBLOCKD *const xd = &x->e_mbd;
3125   const TileInfo *tile = &xd->tile;
3126   MB_MODE_INFO *mbmi = xd->mi[0];
3127   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3128 
3129   const int mi_row = xd->mi_row;
3130   const int mi_col = xd->mi_col;
3131   const int w = block_size_wide[bsize];
3132   const int h = block_size_high[bsize];
3133   const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3134   const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3135 
3136   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3137   const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3138   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3139                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3140                    mbmi_ext->mode_context);
3141   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3142   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3143   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3144   int_mv nearestmv, nearmv;
3145   av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3146                                    0);
3147 
3148   if (nearestmv.as_int == INVALID_MV) {
3149     nearestmv.as_int = 0;
3150   }
3151   if (nearmv.as_int == INVALID_MV) {
3152     nearmv.as_int = 0;
3153   }
3154 
3155   int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3156   if (dv_ref.as_int == 0) {
3157     av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3158   }
3159   // Ref DV should not have sub-pel.
3160   assert((dv_ref.as_mv.col & 7) == 0);
3161   assert((dv_ref.as_mv.row & 7) == 0);
3162   mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3163 
3164   struct buf_2d yv12_mb[MAX_MB_PLANE];
3165   av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3166   for (int i = 0; i < num_planes; ++i) {
3167     xd->plane[i].pre[0] = yv12_mb[i];
3168   }
3169 
3170   enum IntrabcMotionDirection {
3171     IBC_MOTION_ABOVE,
3172     IBC_MOTION_LEFT,
3173     IBC_MOTION_DIRECTIONS
3174   };
3175 
3176   MB_MODE_INFO best_mbmi = *mbmi;
3177   RD_STATS best_rdstats = *rd_stats;
3178   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3179   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3180   av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3181 
3182   FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3183   const SEARCH_METHODS search_method =
3184       av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3185   const search_site_config *lookahead_search_sites =
3186       cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3187   const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3188   av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3189                                      &dv_ref.as_mv, start_mv,
3190                                      lookahead_search_sites, search_method,
3191                                      /*fine_search_interval=*/0);
3192   const IntraBCMVCosts *const dv_costs = x->dv_costs;
3193   av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3194 
3195   for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3196        dir < IBC_MOTION_DIRECTIONS; ++dir) {
3197     switch (dir) {
3198       case IBC_MOTION_ABOVE:
3199         fullms_params.mv_limits.col_min =
3200             (tile->mi_col_start - mi_col) * MI_SIZE;
3201         fullms_params.mv_limits.col_max =
3202             (tile->mi_col_end - mi_col) * MI_SIZE - w;
3203         fullms_params.mv_limits.row_min =
3204             (tile->mi_row_start - mi_row) * MI_SIZE;
3205         fullms_params.mv_limits.row_max =
3206             (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3207         break;
3208       case IBC_MOTION_LEFT:
3209         fullms_params.mv_limits.col_min =
3210             (tile->mi_col_start - mi_col) * MI_SIZE;
3211         fullms_params.mv_limits.col_max =
3212             (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3213         // TODO(aconverse@google.com): Minimize the overlap between above and
3214         // left areas.
3215         fullms_params.mv_limits.row_min =
3216             (tile->mi_row_start - mi_row) * MI_SIZE;
3217         int bottom_coded_mi_edge =
3218             AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3219         fullms_params.mv_limits.row_max =
3220             (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3221         break;
3222       default: assert(0);
3223     }
3224     assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3225     assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3226     assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3227     assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3228 
3229     av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3230 
3231     if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3232         fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3233       continue;
3234     }
3235 
3236     const int step_param = cpi->mv_search_params.mv_step_param;
3237     IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3238     int_mv best_mv, best_hash_mv;
3239     FULLPEL_MV_STATS best_mv_stats;
3240 
3241     int bestsme =
3242         av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3243                               &best_mv.as_fullmv, &best_mv_stats, NULL);
3244     const int hashsme = av1_intrabc_hash_search(
3245         cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3246     if (hashsme < bestsme) {
3247       best_mv = best_hash_mv;
3248       bestsme = hashsme;
3249     }
3250 
3251     if (bestsme == INT_MAX) continue;
3252     const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3253     if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3254                                 get_fullmv_from_mv(&dv)))
3255       continue;
3256     if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3257                          cm->seq_params->mib_size_log2))
3258       continue;
3259 
3260     // DV should not have sub-pel.
3261     assert((dv.col & 7) == 0);
3262     assert((dv.row & 7) == 0);
3263     memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3264     mbmi->filter_intra_mode_info.use_filter_intra = 0;
3265     mbmi->use_intrabc = 1;
3266     mbmi->mode = DC_PRED;
3267     mbmi->uv_mode = UV_DC_PRED;
3268     mbmi->motion_mode = SIMPLE_TRANSLATION;
3269     mbmi->mv[0].as_mv = dv;
3270     mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3271     mbmi->skip_txfm = 0;
3272     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3273                                   av1_num_planes(cm) - 1);
3274 
3275     // TODO(aconverse@google.com): The full motion field defining discount
3276     // in MV_COST_WEIGHT is too large. Explore other values.
3277     const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3278                                         dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3279     const int rate_mode = x->mode_costs.intrabc_cost[1];
3280     RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3281     if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3282                          &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3283       continue;
3284     rd_stats_yuv.rdcost =
3285         RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3286     if (rd_stats_yuv.rdcost < best_rd) {
3287       best_rd = rd_stats_yuv.rdcost;
3288       best_mbmi = *mbmi;
3289       best_rdstats = rd_stats_yuv;
3290       memcpy(best_blk_skip, txfm_info->blk_skip,
3291              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3292       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3293     }
3294   }
3295   *mbmi = best_mbmi;
3296   *rd_stats = best_rdstats;
3297   memcpy(txfm_info->blk_skip, best_blk_skip,
3298          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3299   av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3300 #if CONFIG_RD_DEBUG
3301   mbmi->rd_stats = *rd_stats;
3302 #endif
3303   return best_rd;
3304 }
3305 
3306 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3307 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3308 // the typedef will prevent doxygen from finding this function and generating
3309 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3310 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3311 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3312                                struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3313                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3314   const AV1_COMMON *const cm = &cpi->common;
3315   MACROBLOCKD *const xd = &x->e_mbd;
3316   MB_MODE_INFO *const mbmi = xd->mi[0];
3317   const int num_planes = av1_num_planes(cm);
3318   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3319   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3320   uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3321   int64_t dist_y = 0, dist_uv = 0;
3322 
3323   ctx->rd_stats.skip_txfm = 0;
3324   mbmi->ref_frame[0] = INTRA_FRAME;
3325   mbmi->ref_frame[1] = NONE_FRAME;
3326   mbmi->use_intrabc = 0;
3327   mbmi->mv[0].as_int = 0;
3328   mbmi->skip_mode = 0;
3329 
3330   const int64_t intra_yrd =
3331       av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3332                                  &y_skip_txfm, bsize, best_rd, ctx);
3333 
3334   // Initialize default mode evaluation params
3335   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3336 
3337   if (intra_yrd < best_rd) {
3338     // Search intra modes for uv planes if needed
3339     if (num_planes > 1) {
3340       // Set up the tx variables for reproducing the y predictions in case we
3341       // need it for chroma-from-luma.
3342       if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3343         memcpy(txfm_info->blk_skip, ctx->blk_skip,
3344                sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3345         av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3346       }
3347       const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3348       av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3349                                   &dist_uv, &uv_skip_txfm, bsize,
3350                                   max_uv_tx_size);
3351     }
3352 
3353     // Intra block is always coded as non-skip
3354     rd_cost->rate =
3355         rate_y + rate_uv +
3356         x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3357     rd_cost->dist = dist_y + dist_uv;
3358     rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3359     rd_cost->skip_txfm = 0;
3360   } else {
3361     rd_cost->rate = INT_MAX;
3362   }
3363 
3364   if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3365     best_rd = rd_cost->rdcost;
3366   if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3367     ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3368     memcpy(ctx->blk_skip, txfm_info->blk_skip,
3369            sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3370     assert(rd_cost->rate != INT_MAX);
3371   }
3372   if (rd_cost->rate == INT_MAX) return;
3373 
3374   ctx->mic = *xd->mi[0];
3375   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3376                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
3377   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3378 }
3379 
3380 static inline void calc_target_weighted_pred(
3381     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3382     const uint8_t *above, int above_stride, const uint8_t *left,
3383     int left_stride);
3384 
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3385 static inline void rd_pick_skip_mode(
3386     RD_STATS *rd_cost, InterModeSearchState *search_state,
3387     const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3388     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3389   const AV1_COMMON *const cm = &cpi->common;
3390   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3391   const int num_planes = av1_num_planes(cm);
3392   MACROBLOCKD *const xd = &x->e_mbd;
3393   MB_MODE_INFO *const mbmi = xd->mi[0];
3394 
3395   x->compound_idx = 1;  // COMPOUND_AVERAGE
3396   RD_STATS skip_mode_rd_stats;
3397   av1_invalid_rd_stats(&skip_mode_rd_stats);
3398 
3399   if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3400       skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3401     return;
3402   }
3403 
3404   const MV_REFERENCE_FRAME ref_frame =
3405       LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3406   const MV_REFERENCE_FRAME second_ref_frame =
3407       LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3408   const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3409   const THR_MODES mode_index =
3410       get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3411 
3412   if (mode_index == THR_INVALID) {
3413     return;
3414   }
3415 
3416   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3417        cpi->sf.inter_sf.disable_onesided_comp) &&
3418       cpi->all_one_sided_refs) {
3419     return;
3420   }
3421 
3422   mbmi->mode = this_mode;
3423   mbmi->uv_mode = UV_DC_PRED;
3424   mbmi->ref_frame[0] = ref_frame;
3425   mbmi->ref_frame[1] = second_ref_frame;
3426   const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3427   if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3428     MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3429     if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3430         mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3431       return;
3432     }
3433     av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3434                      xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3435                      mbmi_ext->mode_context);
3436     // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3437     // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3438     av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3439   }
3440 
3441   assert(this_mode == NEAREST_NEARESTMV);
3442   if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3443     return;
3444   }
3445 
3446   mbmi->filter_intra_mode_info.use_filter_intra = 0;
3447   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3448   mbmi->comp_group_idx = 0;
3449   mbmi->compound_idx = x->compound_idx;
3450   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3451   mbmi->motion_mode = SIMPLE_TRANSLATION;
3452   mbmi->ref_mv_idx = 0;
3453   mbmi->skip_mode = mbmi->skip_txfm = 1;
3454   mbmi->palette_mode_info.palette_size[0] = 0;
3455   mbmi->palette_mode_info.palette_size[1] = 0;
3456 
3457   set_default_interp_filters(mbmi, cm->features.interp_filter);
3458 
3459   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3460   for (int i = 0; i < num_planes; i++) {
3461     xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3462     xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3463   }
3464 
3465   BUFFER_SET orig_dst;
3466   for (int i = 0; i < num_planes; i++) {
3467     orig_dst.plane[i] = xd->plane[i].dst.buf;
3468     orig_dst.stride[i] = xd->plane[i].dst.stride;
3469   }
3470 
3471   // Compare the use of skip_mode with the best intra/inter mode obtained.
3472   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3473   int64_t best_intra_inter_mode_cost = INT64_MAX;
3474   if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3475     const ModeCosts *mode_costs = &x->mode_costs;
3476     best_intra_inter_mode_cost = RDCOST(
3477         x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3478         rd_cost->dist);
3479     // Account for non-skip mode rate in total rd stats
3480     rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3481     av1_rd_cost_update(x->rdmult, rd_cost);
3482   }
3483 
3484   // Obtain the rdcost for skip_mode.
3485   skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3486                best_intra_inter_mode_cost);
3487 
3488   if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3489       (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3490     assert(mode_index != THR_INVALID);
3491     search_state->best_mbmode.skip_mode = 1;
3492     search_state->best_mbmode = *mbmi;
3493     memset(search_state->best_mbmode.inter_tx_size,
3494            search_state->best_mbmode.tx_size,
3495            sizeof(search_state->best_mbmode.inter_tx_size));
3496     set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3497                   search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3498                   xd);
3499     search_state->best_mode_index = mode_index;
3500 
3501     // Update rd_cost
3502     rd_cost->rate = skip_mode_rd_stats.rate;
3503     rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3504     rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3505 
3506     search_state->best_rd = rd_cost->rdcost;
3507     search_state->best_skip2 = 1;
3508     search_state->best_mode_skippable = 1;
3509 
3510     x->txfm_search_info.skip_txfm = 1;
3511   }
3512 }
3513 
3514 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3515 static inline MB_MODE_INFO *get_winner_mode_stats(
3516     MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3517     int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3518     RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3519     THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3520     int mode_idx) {
3521   MB_MODE_INFO *winner_mbmi;
3522   if (multi_winner_mode_type) {
3523     assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3524     WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3525     winner_mbmi = &winner_mode_stat->mbmi;
3526 
3527     *winner_rd_cost = &winner_mode_stat->rd_cost;
3528     *winner_rate_y = winner_mode_stat->rate_y;
3529     *winner_rate_uv = winner_mode_stat->rate_uv;
3530     *winner_mode_index = winner_mode_stat->mode_index;
3531   } else {
3532     winner_mbmi = best_mbmode;
3533     *winner_rd_cost = best_rd_cost;
3534     *winner_rate_y = best_rate_y;
3535     *winner_rate_uv = best_rate_uv;
3536     *winner_mode_index = *best_mode_index;
3537   }
3538   return winner_mbmi;
3539 }
3540 
3541 // speed feature: fast intra/inter transform type search
3542 // Used for speed >= 2
3543 // When this speed feature is on, in rd mode search, only DCT is used.
3544 // After the mode is determined, this function is called, to select
3545 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3546 static inline void refine_winner_mode_tx(
3547     const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3548     PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3549     MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3550     int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3551   const AV1_COMMON *const cm = &cpi->common;
3552   MACROBLOCKD *const xd = &x->e_mbd;
3553   MB_MODE_INFO *const mbmi = xd->mi[0];
3554   TxfmSearchParams *txfm_params = &x->txfm_search_params;
3555   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3556   int64_t best_rd;
3557   const int num_planes = av1_num_planes(cm);
3558 
3559   if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3560                                          rd_cost->skip_txfm))
3561     return;
3562 
3563   // Set params for winner mode evaluation
3564   set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3565 
3566   // No best mode identified so far
3567   if (*best_mode_index == THR_INVALID) return;
3568 
3569   best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3570   for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3571     RD_STATS *winner_rd_stats = NULL;
3572     int winner_rate_y = 0, winner_rate_uv = 0;
3573     THR_MODES winner_mode_index = 0;
3574 
3575     // TODO(any): Combine best mode and multi-winner mode processing paths
3576     // Get winner mode stats for current mode index
3577     MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3578         x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3579         &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3580         cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3581 
3582     if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3583         winner_mode_index != THR_INVALID &&
3584         is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3585                                           rd_cost->skip_txfm)) {
3586       RD_STATS rd_stats = *winner_rd_stats;
3587       int skip_blk = 0;
3588       RD_STATS rd_stats_y, rd_stats_uv;
3589       const int skip_ctx = av1_get_skip_txfm_context(xd);
3590 
3591       *mbmi = *winner_mbmi;
3592 
3593       set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3594 
3595       // Select prediction reference frames.
3596       for (int i = 0; i < num_planes; i++) {
3597         xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3598         if (has_second_ref(mbmi))
3599           xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3600       }
3601 
3602       if (is_inter_mode(mbmi->mode)) {
3603         const int mi_row = xd->mi_row;
3604         const int mi_col = xd->mi_col;
3605         bool is_predictor_built = false;
3606         const PREDICTION_MODE prediction_mode = mbmi->mode;
3607         // Do interpolation filter search for realtime mode if applicable.
3608         if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3609             cpi->oxcf.mode == REALTIME &&
3610             cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3611             is_inter_mode(prediction_mode) &&
3612             mbmi->motion_mode == SIMPLE_TRANSLATION &&
3613             !is_inter_compound_mode(prediction_mode)) {
3614           is_predictor_built =
3615               fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3616         }
3617         if (!is_predictor_built) {
3618           av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3619                                         av1_num_planes(cm) - 1);
3620         }
3621         if (mbmi->motion_mode == OBMC_CAUSAL)
3622           av1_build_obmc_inter_predictors_sb(cm, xd);
3623 
3624         av1_subtract_plane(x, bsize, 0);
3625         if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3626             !xd->lossless[mbmi->segment_id]) {
3627           av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3628                                               INT64_MAX);
3629           assert(rd_stats_y.rate != INT_MAX);
3630         } else {
3631           av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3632                                             INT64_MAX);
3633           memset(mbmi->inter_tx_size, mbmi->tx_size,
3634                  sizeof(mbmi->inter_tx_size));
3635           for (int i = 0; i < xd->height * xd->width; ++i)
3636             set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3637         }
3638       } else {
3639         av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3640                                           INT64_MAX);
3641       }
3642 
3643       if (num_planes > 1) {
3644         av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3645       } else {
3646         av1_init_rd_stats(&rd_stats_uv);
3647       }
3648 
3649       const ModeCosts *mode_costs = &x->mode_costs;
3650       if (is_inter_mode(mbmi->mode) &&
3651           RDCOST(x->rdmult,
3652                  mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3653                      rd_stats_uv.rate,
3654                  (rd_stats_y.dist + rd_stats_uv.dist)) >
3655               RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3656                      (rd_stats_y.sse + rd_stats_uv.sse))) {
3657         skip_blk = 1;
3658         rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3659         rd_stats_uv.rate = 0;
3660         rd_stats_y.dist = rd_stats_y.sse;
3661         rd_stats_uv.dist = rd_stats_uv.sse;
3662       } else {
3663         skip_blk = 0;
3664         rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3665       }
3666       int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3667                       winner_rate_y - winner_rate_uv;
3668       int64_t this_rd =
3669           RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3670       if (best_rd > this_rd) {
3671         *best_mbmode = *mbmi;
3672         *best_mode_index = winner_mode_index;
3673         av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3674         av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3675         rd_cost->rate = this_rate;
3676         rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3677         rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3678         rd_cost->rdcost = this_rd;
3679         best_rd = this_rd;
3680         *best_skip2 = skip_blk;
3681       }
3682     }
3683   }
3684 }
3685 
3686 /*!\cond */
3687 typedef struct {
3688   // Mask for each reference frame, specifying which prediction modes to NOT try
3689   // during search.
3690   uint32_t pred_modes[REF_FRAMES];
3691   // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3692   // reference frames (i, j).
3693   // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3694   // (NONE_FRAME).
3695   bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3696 } mode_skip_mask_t;
3697 /*!\endcond */
3698 
3699 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3700 static inline void disable_reference(
3701     MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3702   for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3703     ref_combo[ref][ref2 + 1] = true;
3704   }
3705 }
3706 
3707 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3708 static inline void disable_inter_references_except_altref(
3709     bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3710   disable_reference(LAST_FRAME, ref_combo);
3711   disable_reference(LAST2_FRAME, ref_combo);
3712   disable_reference(LAST3_FRAME, ref_combo);
3713   disable_reference(GOLDEN_FRAME, ref_combo);
3714   disable_reference(BWDREF_FRAME, ref_combo);
3715   disable_reference(ALTREF2_FRAME, ref_combo);
3716 }
3717 
3718 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3719   { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
3720   { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
3721   { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
3722   { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
3723   { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
3724   { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3725   { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
3726   { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
3727 };
3728 
3729 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3730 
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3731 static inline void default_skip_mask(mode_skip_mask_t *mask, REF_SET ref_set) {
3732   if (ref_set == REF_SET_FULL) {
3733     // Everything available by default.
3734     memset(mask, 0, sizeof(*mask));
3735   } else {
3736     // All modes available by default.
3737     memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3738     // All references disabled first.
3739     for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3740       for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3741         mask->ref_combo[ref1][ref2 + 1] = true;
3742       }
3743     }
3744     const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3745     int num_ref_combos;
3746 
3747     // Then enable reduced set of references explicitly.
3748     switch (ref_set) {
3749       case REF_SET_REDUCED:
3750         ref_set_combos = reduced_ref_combos;
3751         num_ref_combos =
3752             (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3753         break;
3754       case REF_SET_REALTIME:
3755         ref_set_combos = real_time_ref_combos;
3756         num_ref_combos =
3757             (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3758         break;
3759       default: assert(0); num_ref_combos = 0;
3760     }
3761 
3762     for (int i = 0; i < num_ref_combos; ++i) {
3763       const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3764       mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3765     }
3766   }
3767 }
3768 
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3769 static inline void init_mode_skip_mask(mode_skip_mask_t *mask,
3770                                        const AV1_COMP *cpi, MACROBLOCK *x,
3771                                        BLOCK_SIZE bsize) {
3772   const AV1_COMMON *const cm = &cpi->common;
3773   const struct segmentation *const seg = &cm->seg;
3774   MACROBLOCKD *const xd = &x->e_mbd;
3775   MB_MODE_INFO *const mbmi = xd->mi[0];
3776   unsigned char segment_id = mbmi->segment_id;
3777   const SPEED_FEATURES *const sf = &cpi->sf;
3778   const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
3779   REF_SET ref_set = REF_SET_FULL;
3780 
3781   if (sf->rt_sf.use_real_time_ref_set)
3782     ref_set = REF_SET_REALTIME;
3783   else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3784     ref_set = REF_SET_REDUCED;
3785 
3786   default_skip_mask(mask, ref_set);
3787 
3788   int min_pred_mv_sad = INT_MAX;
3789   MV_REFERENCE_FRAME ref_frame;
3790   if (ref_set == REF_SET_REALTIME) {
3791     // For real-time encoding, we only look at a subset of ref frames. So the
3792     // threshold for pruning should be computed from this subset as well.
3793     const int num_rt_refs =
3794         sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3795     for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3796       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3797       if (ref != INTRA_FRAME) {
3798         min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3799       }
3800     }
3801   } else {
3802     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3803       min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3804   }
3805 
3806   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3807     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3808       // Skip checking missing reference in both single and compound reference
3809       // modes.
3810       disable_reference(ref_frame, mask->ref_combo);
3811     } else {
3812       // Skip fixed mv modes for poor references
3813       if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3814         mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3815       }
3816     }
3817     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3818         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3819       // Reference not used for the segment.
3820       disable_reference(ref_frame, mask->ref_combo);
3821     }
3822   }
3823   // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3824   // is disabled for this segment. This is to prevent the possibility that we
3825   // end up unable to pick any mode.
3826   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3827     // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3828     // unless ARNR filtering is enabled in which case we want
3829     // an unfiltered alternative. We allow near/nearest as well
3830     // because they may result in zero-zero MVs but be cheaper.
3831     if (cpi->rc.is_src_frame_alt_ref &&
3832         (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3833       disable_inter_references_except_altref(mask->ref_combo);
3834 
3835       mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3836       const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3837       int_mv near_mv, nearest_mv, global_mv;
3838       get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3839                   &x->mbmi_ext);
3840       get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3841       get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3842 
3843       if (near_mv.as_int != global_mv.as_int)
3844         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3845       if (nearest_mv.as_int != global_mv.as_int)
3846         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3847     }
3848   }
3849 
3850   if (cpi->rc.is_src_frame_alt_ref) {
3851     if (inter_sf->alt_ref_search_fp &&
3852         (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3853       mask->pred_modes[ALTREF_FRAME] = 0;
3854       disable_inter_references_except_altref(mask->ref_combo);
3855       disable_reference(INTRA_FRAME, mask->ref_combo);
3856     }
3857   }
3858 
3859   if (inter_sf->alt_ref_search_fp) {
3860     if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
3861       int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
3862       // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3863       // those are past frames
3864       MV_REFERENCE_FRAME start_frame =
3865           inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3866       for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3867         if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3868             0) {
3869           // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3870           // to the relative dist of LAST_FRAME.
3871           if (inter_sf->alt_ref_search_fp == 1 &&
3872               (abs(cpi->ref_frame_dist_info
3873                        .ref_relative_dist[ref_frame - LAST_FRAME]) >
3874                1.5 * abs(cpi->ref_frame_dist_info
3875                              .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3876             continue;
3877           }
3878           if (x->pred_mv_sad[ref_frame] > sad_thresh)
3879             mask->pred_modes[ref_frame] |= INTER_ALL;
3880         }
3881       }
3882     }
3883   }
3884 
3885   if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3886     if (x->best_pred_mv_sad[0] < INT_MAX) {
3887       int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
3888       const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3889 
3890       // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3891       for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3892         ref_frame = prune_ref_list[ref_idx];
3893         if (x->pred_mv_sad[ref_frame] > sad_thresh)
3894           mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3895       }
3896     }
3897   }
3898 
3899   if (bsize > sf->part_sf.max_intra_bsize) {
3900     disable_reference(INTRA_FRAME, mask->ref_combo);
3901   }
3902 
3903   if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3904     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3905       mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3906       mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3907     }
3908   }
3909 
3910   mask->pred_modes[INTRA_FRAME] |=
3911       ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3912 
3913   // Prune reference frames which are not the closest to the current
3914   // frame and with large pred_mv_sad.
3915   if (inter_sf->prune_single_ref) {
3916     assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
3917     const double prune_threshes[2] = { 1.20, 1.05 };
3918 
3919     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3920       const RefFrameDistanceInfo *const ref_frame_dist_info =
3921           &cpi->ref_frame_dist_info;
3922       const int is_closest_ref =
3923           (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
3924           (ref_frame == ref_frame_dist_info->nearest_future_ref);
3925 
3926       if (!is_closest_ref) {
3927         const int dir =
3928             (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3929                 ? 0
3930                 : 1;
3931         if (x->best_pred_mv_sad[dir] < INT_MAX &&
3932             x->pred_mv_sad[ref_frame] >
3933                 prune_threshes[inter_sf->prune_single_ref - 1] *
3934                     x->best_pred_mv_sad[dir])
3935           mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
3936       }
3937     }
3938   }
3939 }
3940 
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3941 static inline void init_neighbor_pred_buf(const OBMCBuffer *const obmc_buffer,
3942                                           HandleInterModeArgs *const args,
3943                                           int is_hbd) {
3944   if (is_hbd) {
3945     const int len = sizeof(uint16_t);
3946     args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3947     args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3948                                                  (MAX_SB_SQUARE >> 1) * len);
3949     args->above_pred_buf[2] =
3950         CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3951     args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3952     args->left_pred_buf[1] =
3953         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3954     args->left_pred_buf[2] =
3955         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3956   } else {
3957     args->above_pred_buf[0] = obmc_buffer->above_pred;
3958     args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3959     args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3960     args->left_pred_buf[0] = obmc_buffer->left_pred;
3961     args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3962     args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3963   }
3964 }
3965 
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3966 static inline int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3967                                   MV_REFERENCE_FRAME ref_frame) {
3968   const AV1_COMMON *const cm = &cpi->common;
3969   MV_REFERENCE_FRAME rf[2];
3970   av1_set_ref_frame(rf, ref_frame);
3971 
3972   if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3973 
3974   if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3975                                        cm->cur_frame->ref_display_order_hint)) {
3976     return 1;
3977   }
3978 
3979   return 0;
3980 }
3981 
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3982 static inline int is_ref_frame_used_by_compound_ref(int ref_frame,
3983                                                     int skip_ref_frame_mask) {
3984   for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3985     if (!(skip_ref_frame_mask & (1 << r))) {
3986       const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3987       if (rf[0] == ref_frame || rf[1] == ref_frame) {
3988         return 1;
3989       }
3990     }
3991   }
3992   return 0;
3993 }
3994 
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3995 static inline int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3996                                              const MB_MODE_INFO *mi_cache) {
3997   if (!mi_cache) {
3998     return 0;
3999   }
4000 
4001   if (ref_frame < REF_FRAMES) {
4002     return (ref_frame == mi_cache->ref_frame[0] ||
4003             ref_frame == mi_cache->ref_frame[1]);
4004   }
4005 
4006   // if we are here, then the current mode is compound.
4007   MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4008   return ref_frame == cached_ref_type;
4009 }
4010 
4011 // Please add/modify parameter setting in this function, making it consistent
4012 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])4013 static inline void set_params_rd_pick_inter_mode(
4014     const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4015     BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4016     unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4017     struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4018   const AV1_COMMON *const cm = &cpi->common;
4019   MACROBLOCKD *const xd = &x->e_mbd;
4020   MB_MODE_INFO *const mbmi = xd->mi[0];
4021   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4022   unsigned char segment_id = mbmi->segment_id;
4023 
4024   init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4025   av1_collect_neighbors_ref_counts(xd);
4026   estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4027                            ref_costs_comp);
4028 
4029   const int mi_row = xd->mi_row;
4030   const int mi_col = xd->mi_col;
4031   x->best_pred_mv_sad[0] = INT_MAX;
4032   x->best_pred_mv_sad[1] = INT_MAX;
4033 
4034   for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4035        ++ref_frame) {
4036     x->pred_mv_sad[ref_frame] = INT_MAX;
4037     mbmi_ext->mode_context[ref_frame] = 0;
4038     mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4039     if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4040       // Skip the ref frame if the mask says skip and the ref is not used by
4041       // compound ref.
4042       if (skip_ref_frame_mask & (1 << ref_frame) &&
4043           !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4044           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4045         continue;
4046       }
4047       assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4048       setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4049     }
4050     if (cpi->sf.inter_sf.alt_ref_search_fp ||
4051         cpi->sf.inter_sf.prune_single_ref ||
4052         cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4053       // Store the best pred_mv_sad across all past frames
4054       if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4055           0)
4056         x->best_pred_mv_sad[0] =
4057             AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4058       else
4059         // Store the best pred_mv_sad across all future frames
4060         x->best_pred_mv_sad[1] =
4061             AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4062     }
4063   }
4064 
4065   if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4066     // No second reference on RT ref set, so no need to initialize
4067     for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4068          ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4069       mbmi_ext->mode_context[ref_frame] = 0;
4070       mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4071       const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4072       if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4073             (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4074         continue;
4075       }
4076 
4077       if (skip_ref_frame_mask & (1 << ref_frame) &&
4078           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4079         continue;
4080       }
4081       // Ref mv list population is not required, when compound references are
4082       // pruned.
4083       if (prune_ref_frame(cpi, x, ref_frame)) continue;
4084 
4085       av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4086                        xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4087                        mbmi_ext->mode_context);
4088       // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4089       // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4090       av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4091     }
4092   }
4093 
4094   av1_count_overlappable_neighbors(cm, xd);
4095   const FRAME_UPDATE_TYPE update_type =
4096       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4097   int use_actual_frame_probs = 1;
4098   int prune_obmc;
4099 #if CONFIG_FPMT_TEST
4100   use_actual_frame_probs =
4101       (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4102   if (!use_actual_frame_probs) {
4103     prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4104                  cpi->sf.inter_sf.prune_obmc_prob_thresh;
4105   }
4106 #endif
4107   if (use_actual_frame_probs) {
4108     prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4109                  cpi->sf.inter_sf.prune_obmc_prob_thresh;
4110   }
4111   if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4112     if (check_num_overlappable_neighbors(mbmi) &&
4113         is_motion_variation_allowed_bsize(bsize)) {
4114       int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4115       int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4116                                        MAX_SB_SIZE >> 1 };
4117       int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4118                                         MAX_SB_SIZE >> 1 };
4119       int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4120       av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4121                                           dst_width1, dst_height1,
4122                                           args->above_pred_stride);
4123       av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4124                                          dst_width2, dst_height2,
4125                                          args->left_pred_stride);
4126       const int num_planes = av1_num_planes(cm);
4127       av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4128                            mi_col, 0, num_planes);
4129       calc_target_weighted_pred(
4130           cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4131           args->left_pred_buf[0], args->left_pred_stride[0]);
4132     }
4133   }
4134 
4135   init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4136 
4137   // Set params for mode evaluation
4138   set_mode_eval_params(cpi, x, MODE_EVAL);
4139 
4140   x->comp_rd_stats_idx = 0;
4141 
4142   for (int idx = 0; idx < REF_FRAMES; idx++) {
4143     args->best_single_sse_in_refs[idx] = INT32_MAX;
4144   }
4145 }
4146 
init_single_inter_mode_search_state(InterModeSearchState * search_state)4147 static inline void init_single_inter_mode_search_state(
4148     InterModeSearchState *search_state) {
4149   for (int dir = 0; dir < 2; ++dir) {
4150     for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4151       for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4152         SingleInterModeState *state;
4153 
4154         state = &search_state->single_state[dir][mode][ref_frame];
4155         state->ref_frame = NONE_FRAME;
4156         state->rd = INT64_MAX;
4157 
4158         state = &search_state->single_state_modelled[dir][mode][ref_frame];
4159         state->ref_frame = NONE_FRAME;
4160         state->rd = INT64_MAX;
4161 
4162         search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4163       }
4164     }
4165   }
4166 
4167   for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4168     search_state->best_single_rd[ref_frame] = INT64_MAX;
4169     search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4170   }
4171   av1_zero(search_state->single_state_cnt);
4172   av1_zero(search_state->single_state_modelled_cnt);
4173 }
4174 
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)4175 static inline void init_inter_mode_search_state(
4176     InterModeSearchState *search_state, const AV1_COMP *cpi,
4177     const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4178   init_intra_mode_search_state(&search_state->intra_search_state);
4179   av1_invalid_rd_stats(&search_state->best_y_rdcost);
4180 
4181   search_state->best_rd = best_rd_so_far;
4182   search_state->best_skip_rd[0] = INT64_MAX;
4183   search_state->best_skip_rd[1] = INT64_MAX;
4184 
4185   av1_zero(search_state->best_mbmode);
4186 
4187   search_state->best_rate_y = INT_MAX;
4188 
4189   search_state->best_rate_uv = INT_MAX;
4190 
4191   search_state->best_mode_skippable = 0;
4192 
4193   search_state->best_skip2 = 0;
4194 
4195   search_state->best_mode_index = THR_INVALID;
4196 
4197   const MACROBLOCKD *const xd = &x->e_mbd;
4198   const MB_MODE_INFO *const mbmi = xd->mi[0];
4199   const unsigned char segment_id = mbmi->segment_id;
4200 
4201   search_state->num_available_refs = 0;
4202   memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4203   memset(search_state->dist_order_refs, -1,
4204          sizeof(search_state->dist_order_refs));
4205 
4206   for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4207     search_state->mode_threshold[i] = 0;
4208   const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4209   for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4210     search_state->mode_threshold[i] =
4211         ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4212         RD_THRESH_FAC_FRAC_BITS;
4213 
4214   search_state->best_intra_rd = INT64_MAX;
4215 
4216   search_state->best_pred_sse = UINT_MAX;
4217 
4218   av1_zero(search_state->single_newmv);
4219   av1_zero(search_state->single_newmv_rate);
4220   av1_zero(search_state->single_newmv_valid);
4221   for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4222     for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4223       for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4224         search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4225         search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4226       }
4227     }
4228   }
4229 
4230   for (int i = 0; i < REFERENCE_MODES; ++i) {
4231     search_state->best_pred_rd[i] = INT64_MAX;
4232   }
4233 
4234   if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4235     for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4236       search_state->mode_threshold[i] =
4237           ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4238           RD_THRESH_FAC_FRAC_BITS;
4239 
4240     for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4241       for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4242         for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4243           search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4244           search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4245         }
4246       }
4247     }
4248 
4249     init_single_inter_mode_search_state(search_state);
4250   }
4251 }
4252 
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)4253 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4254                            const MV_REFERENCE_FRAME *ref_frame,
4255                            const PREDICTION_MODE this_mode) {
4256   if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4257     return true;
4258   }
4259 
4260   return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4261 }
4262 
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)4263 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4264                                       BLOCK_SIZE bsize,
4265                                       PREDICTION_MODE curr_mode,
4266                                       const MV_REFERENCE_FRAME *ref_frames) {
4267   const int comp_pred = ref_frames[1] > INTRA_FRAME;
4268   if (comp_pred) {
4269     if (!is_comp_ref_allowed(bsize)) return 1;
4270     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4271       return 1;
4272     }
4273 
4274     const AV1_COMMON *const cm = &cpi->common;
4275     if (frame_is_intra_only(cm)) return 1;
4276 
4277     const CurrentFrame *const current_frame = &cm->current_frame;
4278     if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4279 
4280     const struct segmentation *const seg = &cm->seg;
4281     const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4282     // Do not allow compound prediction if the segment level reference frame
4283     // feature is in use as in this case there can only be one reference.
4284     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4285   }
4286 
4287   if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4288     // Mode must be compatible
4289     if (!is_interintra_allowed_bsize(bsize)) return 1;
4290     if (!is_interintra_allowed_mode(curr_mode)) return 1;
4291   }
4292 
4293   return 0;
4294 }
4295 
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4296 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4297                                         BLOCK_SIZE bsize, int mib_size) {
4298   const int sb_size_mask = mib_size - 1;
4299   const MACROBLOCKD *const xd = &x->e_mbd;
4300   const int mi_row = xd->mi_row;
4301   const int mi_col = xd->mi_col;
4302   const int mi_row_in_sb = mi_row & sb_size_mask;
4303   const int mi_col_in_sb = mi_col & sb_size_mask;
4304   const int mi_w = mi_size_wide[bsize];
4305   const int mi_h = mi_size_high[bsize];
4306   int picked_ref_frames_mask = 0;
4307   for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4308     for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4309       picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4310     }
4311   }
4312   return picked_ref_frames_mask;
4313 }
4314 
4315 // Check if reference frame pair of the current block matches with the given
4316 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4317 static inline int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4318                                        const MV_REFERENCE_FRAME *ref_frames) {
4319   return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4320           (ref_frames[1] == mbmi->ref_frame[1]));
4321 }
4322 
4323 // Case 1: return 0, means don't skip this mode
4324 // Case 2: return 1, means skip this mode completely
4325 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4326 static int inter_mode_search_order_independent_skip(
4327     const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4328     InterModeSearchState *search_state, int skip_ref_frame_mask,
4329     PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4330   if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4331     return 1;
4332   }
4333 
4334   const int ref_type = av1_ref_frame_type(ref_frame);
4335   if (!cpi->sf.rt_sf.use_real_time_ref_set)
4336     if (prune_ref_frame(cpi, x, ref_type)) return 1;
4337 
4338   // This is only used in motion vector unit test.
4339   if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4340       ref_frame[0] == INTRA_FRAME)
4341     return 1;
4342 
4343   const AV1_COMMON *const cm = &cpi->common;
4344   if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4345     return 1;
4346   }
4347 
4348   // Reuse the prediction mode in cache
4349   if (x->use_mb_mode_cache) {
4350     const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4351     const PREDICTION_MODE cached_mode = cached_mi->mode;
4352     const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4353     const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4354 
4355     // If the cached mode is intra, then we just need to match the mode.
4356     if (is_mode_intra(cached_mode) && mode != cached_mode) {
4357       return 1;
4358     }
4359 
4360     // If the cached mode is single inter mode, then we match the mode and
4361     // reference frame.
4362     if (cached_mode_is_single) {
4363       if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4364         return 1;
4365       }
4366     } else {
4367       // If the cached mode is compound, then we need to consider several cases.
4368       const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4369       if (mode_is_single) {
4370         // If the mode is single, we know the modes can't match. But we might
4371         // still want to search it if compound mode depends on the current mode.
4372         int skip_motion_mode_only = 0;
4373         if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4374           skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4375         } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4376           skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4377         } else if (cached_mode == NEW_NEWMV) {
4378           skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4379                                    ref_frame[0] == cached_frame[1]);
4380         }
4381 
4382         return 1 + skip_motion_mode_only;
4383       } else {
4384         // If both modes are compound, then everything must match.
4385         if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4386             ref_frame[1] != cached_frame[1]) {
4387           return 1;
4388         }
4389       }
4390     }
4391   }
4392 
4393   const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4394   // If no valid mode has been found so far in PARTITION_NONE when finding a
4395   // valid partition is required, do not skip mode.
4396   if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4397       x->must_find_valid_partition)
4398     return 0;
4399 
4400   const SPEED_FEATURES *const sf = &cpi->sf;
4401   // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4402   // frames
4403   if (sf->inter_sf.prune_nearmv_using_neighbors &&
4404       (mode == NEAR_NEARMV || mode == NEARMV)) {
4405     const MACROBLOCKD *const xd = &x->e_mbd;
4406     if (search_state->best_rd != INT64_MAX && xd->left_available &&
4407         xd->up_available) {
4408       const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4409                                                     { 1, 1, 0 },
4410                                                     { 2, 1, 0 } };
4411       const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4412 
4413       assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4414              qindex_sub_range < 3);
4415       const int num_ref_frame_pair_match_thresh =
4416           thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4417                     [qindex_sub_range];
4418 
4419       assert(num_ref_frame_pair_match_thresh <= 2 &&
4420              num_ref_frame_pair_match_thresh >= 0);
4421       int num_ref_frame_pair_match = 0;
4422 
4423       num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4424       num_ref_frame_pair_match +=
4425           match_ref_frame_pair(xd->above_mbmi, ref_frame);
4426 
4427       // Pruning based on ref frame pair match with neighbors.
4428       if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4429     }
4430   }
4431 
4432   int skip_motion_mode = 0;
4433   if (mbmi->partition != PARTITION_NONE) {
4434     int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4435     if (ref_type <= ALTREF_FRAME && skip_ref) {
4436       // Since the compound ref modes depends on the motion estimation result of
4437       // two single ref modes (best mv of single ref modes as the start point),
4438       // if current single ref mode is marked skip, we need to check if it will
4439       // be used in compound ref modes.
4440       if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4441         // Found a not skipped compound ref mode which contains current
4442         // single ref. So this single ref can't be skipped completely
4443         // Just skip its motion mode search, still try its simple
4444         // transition mode.
4445         skip_motion_mode = 1;
4446         skip_ref = 0;
4447       }
4448     }
4449     // If we are reusing the prediction from cache, and the current frame is
4450     // required by the cache, then we cannot prune it.
4451     if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4452       skip_ref = 0;
4453       // If the cache only needs the current reference type for compound
4454       // prediction, then we can skip motion mode search.
4455       skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4456                           x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4457     }
4458     if (skip_ref) return 1;
4459   }
4460 
4461   if (ref_frame[0] == INTRA_FRAME) {
4462     if (mode != DC_PRED) {
4463       // Disable intra modes other than DC_PRED for blocks with low variance
4464       // Threshold for intra skipping based on source variance
4465       // TODO(debargha): Specialize the threshold for super block sizes
4466       const unsigned int skip_intra_var_thresh = 64;
4467       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4468           x->source_variance < skip_intra_var_thresh)
4469         return 1;
4470     }
4471   }
4472 
4473   if (skip_motion_mode) return 2;
4474 
4475   return 0;
4476 }
4477 
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4478 static inline void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4479                              const MV_REFERENCE_FRAME *ref_frames,
4480                              const AV1_COMMON *cm) {
4481   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4482   mbmi->ref_mv_idx = 0;
4483   mbmi->mode = curr_mode;
4484   mbmi->uv_mode = UV_DC_PRED;
4485   mbmi->ref_frame[0] = ref_frames[0];
4486   mbmi->ref_frame[1] = ref_frames[1];
4487   pmi->palette_size[0] = 0;
4488   pmi->palette_size[1] = 0;
4489   mbmi->filter_intra_mode_info.use_filter_intra = 0;
4490   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4491   mbmi->motion_mode = SIMPLE_TRANSLATION;
4492   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4493   set_default_interp_filters(mbmi, cm->features.interp_filter);
4494 }
4495 
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4496 static inline void collect_single_states(MACROBLOCK *x,
4497                                          InterModeSearchState *search_state,
4498                                          const MB_MODE_INFO *const mbmi) {
4499   int i, j;
4500   const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4501   const PREDICTION_MODE this_mode = mbmi->mode;
4502   const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4503   const int mode_offset = INTER_OFFSET(this_mode);
4504   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4505 
4506   // Simple rd
4507   int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4508   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4509     const int64_t rd =
4510         search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4511     if (rd < simple_rd) simple_rd = rd;
4512   }
4513 
4514   // Insertion sort of single_state
4515   const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4516   SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4517   i = search_state->single_state_cnt[dir][mode_offset];
4518   for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4519     state_s[j] = state_s[j - 1];
4520   state_s[j] = this_state_s;
4521   search_state->single_state_cnt[dir][mode_offset]++;
4522 
4523   // Modelled rd
4524   int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4525   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4526     const int64_t rd =
4527         search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4528     if (rd < modelled_rd) modelled_rd = rd;
4529   }
4530 
4531   // Insertion sort of single_state_modelled
4532   const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4533   SingleInterModeState *state_m =
4534       search_state->single_state_modelled[dir][mode_offset];
4535   i = search_state->single_state_modelled_cnt[dir][mode_offset];
4536   for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4537     state_m[j] = state_m[j - 1];
4538   state_m[j] = this_state_m;
4539   search_state->single_state_modelled_cnt[dir][mode_offset]++;
4540 }
4541 
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4542 static inline void analyze_single_states(const AV1_COMP *cpi,
4543                                          InterModeSearchState *search_state) {
4544   const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4545   assert(prune_level >= 1);
4546   int i, j, dir, mode;
4547 
4548   for (dir = 0; dir < 2; ++dir) {
4549     int64_t best_rd;
4550     SingleInterModeState(*state)[FWD_REFS];
4551     const int prune_factor = prune_level >= 2 ? 6 : 5;
4552 
4553     // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4554     // reference frames for all the modes (NEARESTMV and NEARMV may not
4555     // have same motion vectors). Always keep the best of each mode
4556     // because it might form the best possible combination with other mode.
4557     state = search_state->single_state[dir];
4558     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4559                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4560     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4561       for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4562         if (state[mode][i].rd != INT64_MAX &&
4563             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4564           state[mode][i].valid = 0;
4565         }
4566       }
4567     }
4568 
4569     state = search_state->single_state_modelled[dir];
4570     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4571                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4572     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4573       for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4574         if (state[mode][i].rd != INT64_MAX &&
4575             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4576           state[mode][i].valid = 0;
4577         }
4578       }
4579     }
4580   }
4581 
4582   // Ordering by simple rd first, then by modelled rd
4583   for (dir = 0; dir < 2; ++dir) {
4584     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4585       const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4586       const int state_cnt_m =
4587           search_state->single_state_modelled_cnt[dir][mode];
4588       SingleInterModeState *state_s = search_state->single_state[dir][mode];
4589       SingleInterModeState *state_m =
4590           search_state->single_state_modelled[dir][mode];
4591       int count = 0;
4592       const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4593       for (i = 0; i < state_cnt_s; ++i) {
4594         if (state_s[i].rd == INT64_MAX) break;
4595         if (state_s[i].valid) {
4596           search_state->single_rd_order[dir][mode][count++] =
4597               state_s[i].ref_frame;
4598         }
4599       }
4600       if (count >= max_candidates) continue;
4601 
4602       for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4603         if (state_m[i].rd == INT64_MAX) break;
4604         if (!state_m[i].valid) continue;
4605         const int ref_frame = state_m[i].ref_frame;
4606         int match = 0;
4607         // Check if existing already
4608         for (j = 0; j < count; ++j) {
4609           if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4610             match = 1;
4611             break;
4612           }
4613         }
4614         if (match) continue;
4615         // Check if this ref_frame is removed in simple rd
4616         int valid = 1;
4617         for (j = 0; j < state_cnt_s; ++j) {
4618           if (ref_frame == state_s[j].ref_frame) {
4619             valid = state_s[j].valid;
4620             break;
4621           }
4622         }
4623         if (valid) {
4624           search_state->single_rd_order[dir][mode][count++] = ref_frame;
4625         }
4626       }
4627     }
4628   }
4629 }
4630 
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4631 static int compound_skip_get_candidates(
4632     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4633     const int dir, const PREDICTION_MODE mode) {
4634   const int mode_offset = INTER_OFFSET(mode);
4635   const SingleInterModeState *state =
4636       search_state->single_state[dir][mode_offset];
4637   const SingleInterModeState *state_modelled =
4638       search_state->single_state_modelled[dir][mode_offset];
4639 
4640   int max_candidates = 0;
4641   for (int i = 0; i < FWD_REFS; ++i) {
4642     if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4643     max_candidates++;
4644   }
4645 
4646   int candidates = max_candidates;
4647   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4648     candidates = AOMMIN(2, max_candidates);
4649   }
4650   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4651     if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4652         state[0].ref_frame == state_modelled[0].ref_frame)
4653       candidates = 1;
4654     if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4655   }
4656 
4657   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4658     // Limit the number of candidates to 1 in each direction for compound
4659     // prediction
4660     candidates = AOMMIN(1, candidates);
4661   }
4662   return candidates;
4663 }
4664 
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4665 static int compound_skip_by_single_states(
4666     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4667     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4668     const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4669   const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4670   const int mode[2] = { compound_ref0_mode(this_mode),
4671                         compound_ref1_mode(this_mode) };
4672   const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4673   const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4674                             refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4675   int ref_searched[2] = { 0, 0 };
4676   int ref_mv_match[2] = { 1, 1 };
4677   int i, j;
4678 
4679   for (i = 0; i < 2; ++i) {
4680     const SingleInterModeState *state =
4681         search_state->single_state[mode_dir[i]][mode_offset[i]];
4682     const int state_cnt =
4683         search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4684     for (j = 0; j < state_cnt; ++j) {
4685       if (state[j].ref_frame == refs[i]) {
4686         ref_searched[i] = 1;
4687         break;
4688       }
4689     }
4690   }
4691 
4692   const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4693   for (i = 0; i < 2; ++i) {
4694     if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4695       continue;
4696     }
4697     const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4698     for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4699       int_mv single_mv;
4700       int_mv comp_mv;
4701       get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4702                   &x->mbmi_ext);
4703       get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4704       if (single_mv.as_int != comp_mv.as_int) {
4705         ref_mv_match[i] = 0;
4706         break;
4707       }
4708     }
4709   }
4710 
4711   for (i = 0; i < 2; ++i) {
4712     if (!ref_searched[i] || !ref_mv_match[i]) continue;
4713     const int candidates =
4714         compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4715     const MV_REFERENCE_FRAME *ref_order =
4716         search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4717     int match = 0;
4718     for (j = 0; j < candidates; ++j) {
4719       if (refs[i] == ref_order[j]) {
4720         match = 1;
4721         break;
4722       }
4723     }
4724     if (!match) return 1;
4725   }
4726 
4727   return 0;
4728 }
4729 
4730 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4731 static inline void match_ref_frame(const MB_MODE_INFO *const mbmi,
4732                                    const MV_REFERENCE_FRAME *ref_frames,
4733                                    int *const is_ref_match) {
4734   if (is_inter_block(mbmi)) {
4735     is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4736     is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4737     if (has_second_ref(mbmi)) {
4738       is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4739       is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4740     }
4741   }
4742 }
4743 
4744 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4745 static inline int compound_skip_using_neighbor_refs(
4746     MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4747     const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4748   // Exclude non-extended compound modes from pruning
4749   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4750       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4751     return 0;
4752 
4753   if (prune_ext_comp_using_neighbors >= 3) return 1;
4754 
4755   int is_ref_match[2] = { 0 };  // 0 - match for forward refs
4756                                 // 1 - match for backward refs
4757   // Check if ref frames of this block matches with left neighbor.
4758   if (xd->left_available)
4759     match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4760 
4761   // Check if ref frames of this block matches with above neighbor.
4762   if (xd->up_available)
4763     match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4764 
4765   // Combine ref frame match with neighbors in forward and backward refs.
4766   const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4767 
4768   // Pruning based on ref frame match with neighbors.
4769   if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4770   return 1;
4771 }
4772 
4773 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4774 static inline void update_best_single_mode(InterModeSearchState *search_state,
4775                                            const PREDICTION_MODE this_mode,
4776                                            const MV_REFERENCE_FRAME ref_frame,
4777                                            int64_t this_rd) {
4778   if (this_rd < search_state->best_single_rd[ref_frame]) {
4779     search_state->best_single_rd[ref_frame] = this_rd;
4780     search_state->best_single_mode[ref_frame] = this_mode;
4781   }
4782 }
4783 
4784 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4785 static inline int skip_compound_using_best_single_mode_ref(
4786     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4787     const PREDICTION_MODE *best_single_mode,
4788     int prune_comp_using_best_single_mode_ref) {
4789   // Exclude non-extended compound modes from pruning
4790   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4791       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4792     return 0;
4793 
4794   assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4795   const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4796   // Get ref frame direction corresponding to NEWMV
4797   // 0 - NEWMV corresponding to forward direction
4798   // 1 - NEWMV corresponding to backward direction
4799   const int newmv_dir = comp_mode_ref0 != NEWMV;
4800 
4801   // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4802   // have NEWMV as single mode winner.
4803   // Example: For an extended-compound mode,
4804   // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4805   // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4806   // - Avoid pruning this mode, if best single mode corresponding to ref frame
4807   //   ALTREF_FRAME is NEWMV
4808   const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4809   if (single_mode == NEWMV) return 0;
4810 
4811   // Avoid pruning the compound mode when best single mode is not available
4812   if (prune_comp_using_best_single_mode_ref == 1)
4813     if (single_mode == MB_MODE_COUNT) return 0;
4814   return 1;
4815 }
4816 
compare_int64(const void * a,const void * b)4817 static int compare_int64(const void *a, const void *b) {
4818   int64_t a64 = *((int64_t *)a);
4819   int64_t b64 = *((int64_t *)b);
4820   if (a64 < b64) {
4821     return -1;
4822   } else if (a64 == b64) {
4823     return 0;
4824   } else {
4825     return 1;
4826   }
4827 }
4828 
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4829 static inline void update_search_state(
4830     InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4831     PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4832     const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4833     THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4834   const MACROBLOCKD *xd = &x->e_mbd;
4835   const MB_MODE_INFO *mbmi = xd->mi[0];
4836   const int skip_ctx = av1_get_skip_txfm_context(xd);
4837   const int skip_txfm =
4838       mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4839   const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4840 
4841   search_state->best_rd = new_best_rd_stats->rdcost;
4842   search_state->best_mode_index = new_best_mode;
4843   *best_rd_stats_dst = *new_best_rd_stats;
4844   search_state->best_mbmode = *mbmi;
4845   search_state->best_skip2 = skip_txfm;
4846   search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4847   // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4848   // rate_uv because av1_txfm_search process is replaced by rd estimation.
4849   // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4850   // These two values will be updated when av1_txfm_search is called.
4851   if (txfm_search_done) {
4852     search_state->best_rate_y =
4853         new_best_rd_stats_y->rate +
4854         x->mode_costs.skip_txfm_cost[skip_ctx]
4855                                     [new_best_rd_stats->skip_txfm || skip_txfm];
4856     search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4857   }
4858   search_state->best_y_rdcost = *new_best_rd_stats_y;
4859   memcpy(ctx->blk_skip, txfm_info->blk_skip,
4860          sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4861   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4862 }
4863 
4864 // Find the best RD for a reference frame (among single reference modes)
4865 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4866 static inline void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4867   assert(ref_frame_rd[0] == INT64_MAX);
4868   int64_t ref_copy[REF_FRAMES - 1];
4869   memcpy(ref_copy, ref_frame_rd + 1,
4870          sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4871   qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4872 
4873   int64_t cutoff = ref_copy[0];
4874   // The cut-off is within 10% of the best.
4875   if (cutoff != INT64_MAX) {
4876     assert(cutoff < INT64_MAX / 200);
4877     cutoff = (110 * cutoff) / 100;
4878   }
4879   ref_frame_rd[0] = cutoff;
4880 }
4881 
4882 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4883 static inline bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4884                                         MV_REFERENCE_FRAME frame1,
4885                                         MV_REFERENCE_FRAME frame2) {
4886   assert(frame2 > 0);
4887   return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4888          ref_frame_rd[frame2] <= ref_frame_rd[0];
4889 }
4890 
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4891 static inline void evaluate_motion_mode_for_winner_candidates(
4892     const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4893     HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4894     PICK_MODE_CONTEXT *const ctx,
4895     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4896     const motion_mode_best_st_candidate *const best_motion_mode_cands,
4897     int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4898     InterModeSearchState *const search_state, int64_t *yrd) {
4899   const AV1_COMMON *const cm = &cpi->common;
4900   const int num_planes = av1_num_planes(cm);
4901   MACROBLOCKD *const xd = &x->e_mbd;
4902   MB_MODE_INFO *const mbmi = xd->mi[0];
4903   InterModesInfo *const inter_modes_info = x->inter_modes_info;
4904   const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4905 
4906   for (int cand = 0; cand < num_best_cand; cand++) {
4907     RD_STATS rd_stats;
4908     RD_STATS rd_stats_y;
4909     RD_STATS rd_stats_uv;
4910     av1_init_rd_stats(&rd_stats);
4911     av1_init_rd_stats(&rd_stats_y);
4912     av1_init_rd_stats(&rd_stats_uv);
4913     int rate_mv;
4914 
4915     rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4916     args->skip_motion_mode =
4917         best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4918     *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4919     rd_stats.rate =
4920         best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4921 
4922     // Continue if the best candidate is compound.
4923     if (!is_inter_singleref_mode(mbmi->mode)) continue;
4924 
4925     x->txfm_search_info.skip_txfm = 0;
4926     struct macroblockd_plane *pd = xd->plane;
4927     const BUFFER_SET orig_dst = {
4928       { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4929       { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4930     };
4931 
4932     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4933     // Initialize motion mode to simple translation
4934     // Calculation of switchable rate depends on it.
4935     mbmi->motion_mode = 0;
4936     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4937     for (int i = 0; i < num_planes; i++) {
4938       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4939       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4940     }
4941 
4942     int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4943                            search_state->best_skip_rd[1] };
4944     int64_t this_yrd = INT64_MAX;
4945     int64_t ret_value = motion_mode_rd(
4946         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4947         search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4948         do_tx_search, inter_modes_info, 1, &this_yrd);
4949 
4950     if (ret_value != INT64_MAX) {
4951       rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4952       const THR_MODES mode_enum = get_prediction_mode_idx(
4953           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4954       // Collect mode stats for multiwinner mode processing
4955       store_winner_mode_stats(
4956           &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4957           mode_enum, NULL, bsize, rd_stats.rdcost,
4958           cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4959       if (rd_stats.rdcost < search_state->best_rd) {
4960         *yrd = this_yrd;
4961         update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4962                             &rd_stats_uv, mode_enum, x, do_tx_search);
4963         if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4964       }
4965     }
4966   }
4967 }
4968 
4969 /*!\cond */
4970 // Arguments for speed feature pruning of inter mode search
4971 typedef struct {
4972   int *skip_motion_mode;
4973   mode_skip_mask_t *mode_skip_mask;
4974   InterModeSearchState *search_state;
4975   int skip_ref_frame_mask;
4976   int reach_first_comp_mode;
4977   int mode_thresh_mul_fact;
4978   int num_single_modes_processed;
4979   int prune_cpd_using_sr_stats_ready;
4980 } InterModeSFArgs;
4981 /*!\endcond */
4982 
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args,int is_low_temp_var)4983 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4984                            int64_t *ref_frame_rd, int midx,
4985                            InterModeSFArgs *args, int is_low_temp_var) {
4986   const SPEED_FEATURES *const sf = &cpi->sf;
4987   MACROBLOCKD *const xd = &x->e_mbd;
4988   // Get the actual prediction mode we are trying in this iteration
4989   const THR_MODES mode_enum = av1_default_mode_order[midx];
4990   const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4991   const PREDICTION_MODE this_mode = mode_def->mode;
4992   const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4993   const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4994   const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4995   const int comp_pred = second_ref_frame > INTRA_FRAME;
4996 
4997   if (ref_frame == INTRA_FRAME) return 1;
4998 
4999   const FRAME_UPDATE_TYPE update_type =
5000       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5001   if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5002       comp_pred) {
5003     return 1;
5004   }
5005 
5006   // This is for real time encoding.
5007   if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5008       this_mode != NEARESTMV)
5009     return 1;
5010 
5011   // Check if this mode should be skipped because it is incompatible with the
5012   // current frame
5013   if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5014     return 1;
5015   const int ret = inter_mode_search_order_independent_skip(
5016       cpi, x, args->mode_skip_mask, args->search_state,
5017       args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5018   if (ret == 1) return 1;
5019   *(args->skip_motion_mode) = (ret == 2);
5020 
5021   // We've reached the first compound prediction mode, get stats from the
5022   // single reference predictors to help with pruning.
5023   // Disable this pruning logic if interpolation filter search was skipped for
5024   // single prediction modes as it can result in aggressive pruning of compound
5025   // prediction modes due to the absence of modelled_rd populated by
5026   // av1_interpolation_filter_search().
5027   // TODO(Remya): Check the impact of the sf
5028   // 'prune_comp_search_by_single_result' if compound prediction modes are
5029   // enabled in future for REALTIME encode.
5030   if (!sf->interp_sf.skip_interp_filter_search &&
5031       sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5032       args->reach_first_comp_mode == 0) {
5033     analyze_single_states(cpi, args->search_state);
5034     args->reach_first_comp_mode = 1;
5035   }
5036 
5037   // Prune aggressively when best mode is skippable.
5038   int mul_fact = args->search_state->best_mode_skippable
5039                      ? args->mode_thresh_mul_fact
5040                      : (1 << MODE_THRESH_QBITS);
5041   int64_t mode_threshold =
5042       (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5043       MODE_THRESH_QBITS;
5044 
5045   if (args->search_state->best_rd < mode_threshold) return 1;
5046 
5047   // Skip this compound mode based on the RD results from the single prediction
5048   // modes
5049   if (!sf->interp_sf.skip_interp_filter_search &&
5050       sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5051     if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5052                                        ref_frame, second_ref_frame, x))
5053       return 1;
5054   }
5055 
5056   if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5057     // After we done with single reference modes, find the 2nd best RD
5058     // for a reference frame. Only search compound modes that have a reference
5059     // frame at least as good as the 2nd best.
5060     if (!args->prune_cpd_using_sr_stats_ready &&
5061         args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5062       find_top_ref(ref_frame_rd);
5063       args->prune_cpd_using_sr_stats_ready = 1;
5064     }
5065     if (args->prune_cpd_using_sr_stats_ready &&
5066         !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5067       return 1;
5068   }
5069 
5070   // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5071   if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5072       (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5073     return 1;
5074   }
5075 
5076   if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5077     if (compound_skip_using_neighbor_refs(
5078             xd, this_mode, ref_frames,
5079             sf->inter_sf.prune_ext_comp_using_neighbors))
5080       return 1;
5081   }
5082 
5083   if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5084     if (skip_compound_using_best_single_mode_ref(
5085             this_mode, ref_frames, args->search_state->best_single_mode,
5086             sf->inter_sf.prune_comp_using_best_single_mode_ref))
5087       return 1;
5088   }
5089 
5090   if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5091     const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5092     if (skip_nearest_near_mv_using_refmv_weight(
5093             x, this_mode, ref_frame_type,
5094             args->search_state->best_mbmode.mode)) {
5095       // Ensure the mode is pruned only when the current block has obtained a
5096       // valid inter mode.
5097       assert(is_inter_mode(args->search_state->best_mbmode.mode));
5098       return 1;
5099     }
5100   }
5101 
5102   if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5103       ref_frame == GOLDEN_FRAME && !comp_pred) {
5104     const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5105     if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5106         args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5107       if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5108         return 1;
5109     }
5110   }
5111 
5112   return 0;
5113 }
5114 
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)5115 static void record_best_compound(REFERENCE_MODE reference_mode,
5116                                  RD_STATS *rd_stats, int comp_pred, int rdmult,
5117                                  InterModeSearchState *search_state,
5118                                  int compmode_cost) {
5119   int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5120 
5121   if (reference_mode == REFERENCE_MODE_SELECT) {
5122     single_rate = rd_stats->rate - compmode_cost;
5123     hybrid_rate = rd_stats->rate;
5124   } else {
5125     single_rate = rd_stats->rate;
5126     hybrid_rate = rd_stats->rate + compmode_cost;
5127   }
5128 
5129   single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5130   hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5131 
5132   if (!comp_pred) {
5133     if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5134       search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5135   } else {
5136     if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5137       search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5138   }
5139   if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5140     search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5141 }
5142 
5143 // Does a transform search over a list of the best inter mode candidates.
5144 // This is called if the original mode search computed an RD estimate
5145 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)5146 static void tx_search_best_inter_candidates(
5147     AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5148     int64_t best_rd_so_far, BLOCK_SIZE bsize,
5149     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5150     InterModeSearchState *search_state, RD_STATS *rd_cost,
5151     PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5152   AV1_COMMON *const cm = &cpi->common;
5153   MACROBLOCKD *const xd = &x->e_mbd;
5154   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5155   const ModeCosts *mode_costs = &x->mode_costs;
5156   const int num_planes = av1_num_planes(cm);
5157   const int skip_ctx = av1_get_skip_txfm_context(xd);
5158   MB_MODE_INFO *const mbmi = xd->mi[0];
5159   InterModesInfo *inter_modes_info = x->inter_modes_info;
5160   inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5161   search_state->best_rd = best_rd_so_far;
5162   search_state->best_mode_index = THR_INVALID;
5163   // Initialize best mode stats for winner mode processing
5164   x->winner_mode_count = 0;
5165   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5166                           NULL, bsize, best_rd_so_far,
5167                           cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5168   inter_modes_info->num =
5169       inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5170           ? inter_modes_info->num
5171           : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5172   const int64_t top_est_rd =
5173       inter_modes_info->num > 0
5174           ? inter_modes_info
5175                 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5176           : INT64_MAX;
5177   *yrd = INT64_MAX;
5178   int64_t best_rd_in_this_partition = INT64_MAX;
5179   int num_inter_mode_cands = inter_modes_info->num;
5180   int newmv_mode_evaled = 0;
5181   int max_allowed_cands = INT_MAX;
5182   if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5183     // The bound on the no. of inter mode candidates, beyond which the
5184     // candidates are limited if a newmv mode got evaluated, is set as
5185     // max_allowed_cands + 1.
5186     const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5187     assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5188     max_allowed_cands =
5189         num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5190   }
5191 
5192   int num_mode_thresh = INT_MAX;
5193   if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5194     // Bound the no. of transform searches per prediction mode beyond a
5195     // threshold.
5196     const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5197     assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5198     num_mode_thresh =
5199         num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5200   }
5201 
5202   int num_tx_cands = 0;
5203   int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5204   // Iterate over best inter mode candidates and perform tx search
5205   for (int j = 0; j < num_inter_mode_cands; ++j) {
5206     const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5207     *mbmi = inter_modes_info->mbmi_arr[data_idx];
5208     const PREDICTION_MODE prediction_mode = mbmi->mode;
5209     int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5210     if (curr_est_rd * 0.80 > top_est_rd) break;
5211 
5212     if (num_tx_cands > num_mode_thresh) {
5213       if ((prediction_mode != NEARESTMV &&
5214            num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5215           (prediction_mode == NEARESTMV &&
5216            num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5217         continue;
5218     }
5219 
5220     txfm_info->skip_txfm = 0;
5221     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5222 
5223     // Select prediction reference frames.
5224     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5225     for (int i = 0; i < num_planes; i++) {
5226       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5227       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5228     }
5229 
5230     bool is_predictor_built = false;
5231 
5232     // Initialize RD stats
5233     RD_STATS rd_stats;
5234     RD_STATS rd_stats_y;
5235     RD_STATS rd_stats_uv;
5236     const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5237     int64_t skip_rd = INT64_MAX;
5238     const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5239         cm->seq_params->enable_masked_compound,
5240         cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5241         /*eval_motion_mode=*/0);
5242     if (txfm_rd_gate_level) {
5243       // Check if the mode is good enough based on skip RD
5244       int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5245       skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5246       int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5247                                       skip_rd, txfm_rd_gate_level, 0);
5248       if (!eval_txfm) continue;
5249     }
5250 
5251     // Build the prediction for this mode
5252     if (!is_predictor_built) {
5253       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5254                                     av1_num_planes(cm) - 1);
5255     }
5256     if (mbmi->motion_mode == OBMC_CAUSAL) {
5257       av1_build_obmc_inter_predictors_sb(cm, xd);
5258     }
5259 
5260     num_tx_cands++;
5261     if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5262     num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5263     int64_t this_yrd = INT64_MAX;
5264     // Do the transform search
5265     if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5266                          mode_rate, search_state->best_rd)) {
5267       continue;
5268     } else {
5269       const int y_rate =
5270           rd_stats.skip_txfm
5271               ? mode_costs->skip_txfm_cost[skip_ctx][1]
5272               : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5273       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5274 
5275       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5276         inter_mode_data_push(
5277             tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5278             rd_stats_y.rate + rd_stats_uv.rate +
5279                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5280       }
5281     }
5282     rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5283     if (rd_stats.rdcost < best_rd_in_this_partition) {
5284       best_rd_in_this_partition = rd_stats.rdcost;
5285       *yrd = this_yrd;
5286     }
5287 
5288     const THR_MODES mode_enum = get_prediction_mode_idx(
5289         prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5290 
5291     // Collect mode stats for multiwinner mode processing
5292     const int txfm_search_done = 1;
5293     store_winner_mode_stats(
5294         &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5295         NULL, bsize, rd_stats.rdcost,
5296         cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5297 
5298     if (rd_stats.rdcost < search_state->best_rd) {
5299       update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5300                           &rd_stats_uv, mode_enum, x, txfm_search_done);
5301       search_state->best_skip_rd[0] = skip_rd;
5302       // Limit the total number of modes to be evaluated if the first is valid
5303       // and transform skip or compound
5304       if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5305         if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5306           // Evaluate more candidates at high quantizers where occurrence of
5307           // transform skip is high.
5308           const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5309           const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5310           num_inter_mode_cands =
5311               AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5312         } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5313           const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5314           // Evaluate more candidates at low quantizers where occurrence of
5315           // single reference mode is high.
5316           const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5317                                                 { 10, 7, 5, 3 } };
5318           const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5319           num_inter_mode_cands = AOMMIN(
5320               max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5321         }
5322       }
5323     }
5324     // If the number of candidates evaluated exceeds max_allowed_cands, break if
5325     // a newmv mode was evaluated already.
5326     if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5327   }
5328 }
5329 
5330 // Indicates number of winner simple translation modes to be used
5331 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5332 
5333 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5334 // speed feature. This list consists of modes that have only searched
5335 // SIMPLE_TRANSLATION. The final list will be used to search other motion
5336 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)5337 static void handle_winner_cand(
5338     MB_MODE_INFO *const mbmi,
5339     motion_mode_best_st_candidate *best_motion_mode_cands,
5340     int max_winner_motion_mode_cand, int64_t this_rd,
5341     motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5342   // Number of current motion mode candidates in list
5343   const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5344   int valid_motion_mode_cand_loc = num_motion_mode_cand;
5345 
5346   // find the best location to insert new motion mode candidate
5347   for (int j = 0; j < num_motion_mode_cand; j++) {
5348     if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5349       valid_motion_mode_cand_loc = j;
5350       break;
5351     }
5352   }
5353 
5354   // Insert motion mode if location is found
5355   if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5356     if (num_motion_mode_cand > 0 &&
5357         valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5358       memmove(
5359           &best_motion_mode_cands
5360                ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5361           &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5362           (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5363            valid_motion_mode_cand_loc) *
5364               sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5365     motion_mode_cand->mbmi = *mbmi;
5366     motion_mode_cand->rd_cost = this_rd;
5367     motion_mode_cand->skip_motion_mode = skip_motion_mode;
5368     best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5369         *motion_mode_cand;
5370     best_motion_mode_cands->num_motion_mode_cand =
5371         AOMMIN(max_winner_motion_mode_cand,
5372                best_motion_mode_cands->num_motion_mode_cand + 1);
5373   }
5374 }
5375 
5376 /*!\brief Search intra modes in interframes
5377  *
5378  * \ingroup intra_mode_search
5379  *
5380  * This function searches for the best intra mode when the current frame is an
5381  * interframe. This function however does *not* handle luma palette mode.
5382  * Palette mode is currently handled by \ref av1_search_palette_mode.
5383  *
5384  * This function will first iterate through the luma mode candidates to find the
5385  * best luma intra mode. Once the best luma mode it's found, it will then search
5386  * for the best chroma mode. Because palette mode is currently not handled by
5387  * here, a cache of uv mode is stored in
5388  * InterModeSearchState::intra_search_state so it can be reused later by \ref
5389  * av1_search_palette_mode.
5390  *
5391  * \param[in,out] search_state      Struct keep track of the prediction mode
5392  *                                  search state in interframe.
5393  *
5394  * \param[in]     cpi               Top-level encoder structure.
5395  * \param[in,out] x                 Pointer to struct holding all the data for
5396  *                                  the current prediction block.
5397  * \param[out]    rd_cost           Stores the best rd_cost among all the
5398  *                                  prediction modes searched.
5399  * \param[in]     bsize             Current block size.
5400  * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5401  *                                  copy the tx_type and txfm_skip arrays.
5402  *                                  for only the Y plane.
5403  * \param[in]     sf_args           Stores the list of intra mode candidates
5404  *                                  to be searched.
5405  * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5406  *                                      current ref frame is an intra frame.
5407  * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5408  *                                  terminate chroma intra mode search.
5409  *
5410  * \remark If a new best mode is found, search_state and rd_costs are updated
5411  * correspondingly. While x is also modified, it is only used as a temporary
5412  * buffer, and the final decisions are stored in search_state.
5413  */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,const InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5414 static inline void search_intra_modes_in_interframe(
5415     InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5416     RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5417     const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5418     int64_t yrd_threshold) {
5419   const AV1_COMMON *const cm = &cpi->common;
5420   const SPEED_FEATURES *const sf = &cpi->sf;
5421   const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5422   MACROBLOCKD *const xd = &x->e_mbd;
5423   MB_MODE_INFO *const mbmi = xd->mi[0];
5424   IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5425 
5426   int is_best_y_mode_intra = 0;
5427   RD_STATS best_intra_rd_stats_y;
5428   int64_t best_rd_y = INT64_MAX;
5429   int best_mode_cost_y = -1;
5430   MB_MODE_INFO best_mbmi = *xd->mi[0];
5431   THR_MODES best_mode_enum = THR_INVALID;
5432   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5433   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5434   const int num_4x4 = bsize_to_num_blk(bsize);
5435 
5436   // Performs luma search
5437   int64_t best_model_rd = INT64_MAX;
5438   int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5439   for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5440     top_intra_model_rd[i] = INT64_MAX;
5441   }
5442   for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5443     if (sf->intra_sf.skip_intra_in_interframe &&
5444         search_state->intra_search_state.skip_intra_modes)
5445       break;
5446     set_y_mode_and_delta_angle(
5447         mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5448     assert(mbmi->mode < INTRA_MODE_END);
5449 
5450     // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5451     if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5452       continue;
5453 
5454     const THR_MODES mode_enum =
5455         get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5456     if ((!intra_mode_cfg->enable_smooth_intra ||
5457          cpi->sf.intra_sf.disable_smooth_intra) &&
5458         (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5459          mbmi->mode == SMOOTH_V_PRED))
5460       continue;
5461     if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5462       continue;
5463     if (av1_is_directional_mode(mbmi->mode) &&
5464         !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5465         mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5466       continue;
5467     const PREDICTION_MODE this_mode = mbmi->mode;
5468 
5469     assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5470     assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5471     init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5472     x->txfm_search_info.skip_txfm = 0;
5473 
5474     if (this_mode != DC_PRED) {
5475       // Only search the oblique modes if the best so far is
5476       // one of the neighboring directional modes
5477       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5478           (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5479         if (search_state->best_mode_index != THR_INVALID &&
5480             search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5481           continue;
5482       }
5483       if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5484         if (conditional_skipintra(
5485                 this_mode, search_state->intra_search_state.best_intra_mode))
5486           continue;
5487       }
5488     }
5489 
5490     RD_STATS intra_rd_stats_y;
5491     int mode_cost_y;
5492     int64_t intra_rd_y = INT64_MAX;
5493     const int is_luma_result_valid = av1_handle_intra_y_mode(
5494         intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5495         &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5496         &best_model_rd, top_intra_model_rd);
5497     if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5498       is_best_y_mode_intra = 1;
5499       if (intra_rd_y < best_rd_y) {
5500         best_intra_rd_stats_y = intra_rd_stats_y;
5501         best_mode_cost_y = mode_cost_y;
5502         best_rd_y = intra_rd_y;
5503         best_mbmi = *mbmi;
5504         best_mode_enum = mode_enum;
5505         memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5506                sizeof(best_blk_skip[0]) * num_4x4);
5507         av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5508       }
5509     }
5510   }
5511 
5512   if (!is_best_y_mode_intra) {
5513     return;
5514   }
5515 
5516   assert(best_rd_y < INT64_MAX);
5517 
5518   // Restores the best luma mode
5519   *mbmi = best_mbmi;
5520   memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5521          sizeof(best_blk_skip[0]) * num_4x4);
5522   av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5523 
5524   // Performs chroma search
5525   RD_STATS intra_rd_stats, intra_rd_stats_uv;
5526   av1_init_rd_stats(&intra_rd_stats);
5527   av1_init_rd_stats(&intra_rd_stats_uv);
5528   const int num_planes = av1_num_planes(cm);
5529   if (num_planes > 1) {
5530     const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5531         intra_search_state, cpi, x, bsize, &intra_rd_stats,
5532         &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5533 
5534     if (!intra_uv_mode_valid) {
5535       return;
5536     }
5537   }
5538 
5539   // Merge the luma and chroma rd stats
5540   assert(best_mode_cost_y >= 0);
5541   intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5542   if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5543     // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5544     // in the tokenonly rate, but for intra blocks, tx_size is always coded
5545     // (prediction granularity), so we account for it in the full rate,
5546     // not the tokenonly rate.
5547     best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5548   }
5549 
5550   const ModeCosts *mode_costs = &x->mode_costs;
5551   const PREDICTION_MODE mode = mbmi->mode;
5552   if (num_planes > 1 && xd->is_chroma_ref) {
5553     const int uv_mode_cost =
5554         mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5555     intra_rd_stats.rate +=
5556         intra_rd_stats_uv.rate +
5557         intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5558   }
5559 
5560   // Intra block is always coded as non-skip
5561   intra_rd_stats.skip_txfm = 0;
5562   intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5563   // Add in the cost of the no skip flag.
5564   const int skip_ctx = av1_get_skip_txfm_context(xd);
5565   intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5566   // Calculate the final RD estimate for this mode.
5567   const int64_t this_rd =
5568       RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5569   // Keep record of best intra rd
5570   if (this_rd < search_state->best_intra_rd) {
5571     search_state->best_intra_rd = this_rd;
5572     intra_search_state->best_intra_mode = mode;
5573   }
5574 
5575   for (int i = 0; i < REFERENCE_MODES; ++i) {
5576     search_state->best_pred_rd[i] =
5577         AOMMIN(search_state->best_pred_rd[i], this_rd);
5578   }
5579 
5580   intra_rd_stats.rdcost = this_rd;
5581 
5582   // Collect mode stats for multiwinner mode processing
5583   const int txfm_search_done = 1;
5584   store_winner_mode_stats(
5585       &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5586       &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5587       cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5588   if (intra_rd_stats.rdcost < search_state->best_rd) {
5589     update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5590                         &best_intra_rd_stats_y, &intra_rd_stats_uv,
5591                         best_mode_enum, x, txfm_search_done);
5592   }
5593 }
5594 
5595 #if !CONFIG_REALTIME_ONLY
5596 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5597 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5598 static inline void calculate_cost_from_tpl_data(const AV1_COMP *cpi,
5599                                                 MACROBLOCK *x, BLOCK_SIZE bsize,
5600                                                 int mi_row, int mi_col,
5601                                                 int64_t *inter_cost,
5602                                                 int64_t *intra_cost) {
5603   const AV1_COMMON *const cm = &cpi->common;
5604   // Only consider full SB.
5605   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5606   const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5607   const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5608                   (block_size_high[sb_size] / tpl_bsize_1d);
5609   SuperBlockEnc *sb_enc = &x->sb_enc;
5610   if (sb_enc->tpl_data_count == len) {
5611     const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5612     const int tpl_stride = sb_enc->tpl_stride;
5613     const int tplw = mi_size_wide[tpl_bsize];
5614     const int tplh = mi_size_high[tpl_bsize];
5615     const int nw = mi_size_wide[bsize] / tplw;
5616     const int nh = mi_size_high[bsize] / tplh;
5617     if (nw >= 1 && nh >= 1) {
5618       const int of_h = mi_row % mi_size_high[sb_size];
5619       const int of_w = mi_col % mi_size_wide[sb_size];
5620       const int start = of_h / tplh * tpl_stride + of_w / tplw;
5621 
5622       for (int k = 0; k < nh; k++) {
5623         for (int l = 0; l < nw; l++) {
5624           *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5625           *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5626         }
5627       }
5628       *inter_cost /= nw * nh;
5629       *intra_cost /= nw * nh;
5630     }
5631   }
5632 }
5633 #endif  // !CONFIG_REALTIME_ONLY
5634 
5635 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5636 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,const SPEED_FEATURES * const sf,int64_t inter_cost,int64_t intra_cost)5637 static inline void skip_intra_modes_in_interframe(
5638     AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5639     InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5640     int64_t inter_cost, int64_t intra_cost) {
5641   MACROBLOCKD *const xd = &x->e_mbd;
5642   const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5643   if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5644       bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5645     const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5646     const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5647     if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5648         x->source_variance > 128) {
5649       search_state->intra_search_state.skip_intra_modes = 1;
5650       return;
5651     }
5652   }
5653 
5654   const unsigned int src_var_thresh_intra_skip = 1;
5655   const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5656   if (!(skip_intra_in_interframe &&
5657         (x->source_variance > src_var_thresh_intra_skip)))
5658     return;
5659 
5660   // Prune intra search based on best inter mode being transfrom skip.
5661   if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5662     const int qindex_thresh[2] = { 200, MAXQ };
5663     const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5664     if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5665         (x->qindex <= qindex_thresh[ind])) {
5666       search_state->intra_search_state.skip_intra_modes = 1;
5667       return;
5668     } else if ((skip_intra_in_interframe >= 4) &&
5669                (inter_cost < 0 || intra_cost < 0)) {
5670       search_state->intra_search_state.skip_intra_modes = 1;
5671       return;
5672     }
5673   }
5674   // Use ML model to prune intra search.
5675   if (inter_cost >= 0 && intra_cost >= 0) {
5676     const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5677                                      ? &av1_intrap_nn_config
5678                                      : &av1_intrap_hd_nn_config;
5679     float nn_features[6];
5680     float scores[2] = { 0.0f };
5681 
5682     nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5683     nn_features[1] = (float)mi_size_wide_log2[bsize];
5684     nn_features[2] = (float)mi_size_high_log2[bsize];
5685     nn_features[3] = (float)intra_cost;
5686     nn_features[4] = (float)inter_cost;
5687     const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5688     const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5689     nn_features[5] = (float)(ac_q_max / ac_q);
5690 
5691     av1_nn_predict(nn_features, nn_config, 1, scores);
5692 
5693     // For two parameters, the max prob returned from av1_nn_softmax equals
5694     // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5695     // calling of av1_nn_softmax.
5696     const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5697     assert(skip_intra_in_interframe <= 5);
5698     if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5699       search_state->intra_search_state.skip_intra_modes = 1;
5700     }
5701   }
5702 }
5703 
skip_interp_filter_search(const AV1_COMP * cpi,int is_single_pred)5704 static inline bool skip_interp_filter_search(const AV1_COMP *cpi,
5705                                              int is_single_pred) {
5706   const MODE encoding_mode = cpi->oxcf.mode;
5707   if (encoding_mode == REALTIME) {
5708     return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
5709             (cpi->sf.interp_sf.skip_interp_filter_search ||
5710              cpi->sf.winner_mode_sf.winner_mode_ifs));
5711   } else if (encoding_mode == GOOD) {
5712     // Skip interpolation filter search for single prediction modes.
5713     return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
5714   }
5715   return false;
5716 }
5717 
get_block_temp_var(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)5718 static inline int get_block_temp_var(const AV1_COMP *cpi, const MACROBLOCK *x,
5719                                      BLOCK_SIZE bsize) {
5720   const AV1_COMMON *const cm = &cpi->common;
5721   const SPEED_FEATURES *const sf = &cpi->sf;
5722 
5723   if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5724       !sf->rt_sf.short_circuit_low_temp_var ||
5725       !sf->rt_sf.prune_inter_modes_using_temp_var) {
5726     return 0;
5727   }
5728 
5729   const int mi_row = x->e_mbd.mi_row;
5730   const int mi_col = x->e_mbd.mi_col;
5731   int is_low_temp_var = 0;
5732 
5733   if (cm->seq_params->sb_size == BLOCK_64X64)
5734     is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5735         &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5736   else
5737     is_low_temp_var = av1_get_force_skip_low_temp_var(
5738         &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5739 
5740   return is_low_temp_var;
5741 }
5742 
5743 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5744 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5745                             struct macroblock *x, struct RD_STATS *rd_cost,
5746                             BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5747                             int64_t best_rd_so_far) {
5748   AV1_COMMON *const cm = &cpi->common;
5749   const FeatureFlags *const features = &cm->features;
5750   const int num_planes = av1_num_planes(cm);
5751   const SPEED_FEATURES *const sf = &cpi->sf;
5752   MACROBLOCKD *const xd = &x->e_mbd;
5753   MB_MODE_INFO *const mbmi = xd->mi[0];
5754   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5755   int i;
5756   const ModeCosts *mode_costs = &x->mode_costs;
5757   const int *comp_inter_cost =
5758       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5759 
5760   InterModeSearchState search_state;
5761   init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5762   INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5763     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5764     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5765   };
5766   HandleInterModeArgs args = { { NULL },
5767                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5768                                { NULL },
5769                                { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5770                                  MAX_SB_SIZE >> 1 },
5771                                NULL,
5772                                NULL,
5773                                NULL,
5774                                search_state.modelled_rd,
5775                                INT_MAX,
5776                                INT_MAX,
5777                                search_state.simple_rd,
5778                                0,
5779                                false,
5780                                interintra_modes,
5781                                { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5782                                { { 0, 0 } },
5783                                { 0 },
5784                                0,
5785                                0,
5786                                -1,
5787                                -1,
5788                                -1,
5789                                { 0 },
5790                                { 0 },
5791                                UINT_MAX };
5792   // Currently, is_low_temp_var is used in real time encoding.
5793   const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5794 
5795   for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5796   // Indicates the appropriate number of simple translation winner modes for
5797   // exhaustive motion mode evaluation
5798   const int max_winner_motion_mode_cand =
5799       num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
5800   assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5801   motion_mode_candidate motion_mode_cand;
5802   motion_mode_best_st_candidate best_motion_mode_cands;
5803   // Initializing the number of motion mode candidates to zero.
5804   best_motion_mode_cands.num_motion_mode_cand = 0;
5805   for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5806     best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5807 
5808   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5809 
5810   av1_invalid_rd_stats(rd_cost);
5811 
5812   for (i = 0; i < REF_FRAMES; ++i) {
5813     x->warp_sample_info[i].num = -1;
5814   }
5815 
5816   // Ref frames that are selected by square partition blocks.
5817   int picked_ref_frames_mask = 0;
5818   if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
5819       mbmi->partition != PARTITION_NONE) {
5820     // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5821     // partition blocks. prune_ref_frame_for_rect_partitions >=2
5822     // implies prune for vert, horiz and extended partition blocks.
5823     if ((mbmi->partition != PARTITION_VERT &&
5824          mbmi->partition != PARTITION_HORZ) ||
5825         sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5826       picked_ref_frames_mask =
5827           fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5828     }
5829   }
5830 
5831 #if CONFIG_COLLECT_COMPONENT_TIMING
5832   start_timing(cpi, set_params_rd_pick_inter_mode_time);
5833 #endif
5834   // Skip ref frames that never selected by square blocks.
5835   const int skip_ref_frame_mask =
5836       picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5837   mode_skip_mask_t mode_skip_mask;
5838   unsigned int ref_costs_single[REF_FRAMES];
5839   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5840   struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5841   // init params, set frame modes, speed features
5842   set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5843                                 skip_ref_frame_mask, ref_costs_single,
5844                                 ref_costs_comp, yv12_mb);
5845 #if CONFIG_COLLECT_COMPONENT_TIMING
5846   end_timing(cpi, set_params_rd_pick_inter_mode_time);
5847 #endif
5848 
5849   int64_t best_est_rd = INT64_MAX;
5850   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5851   // If do_tx_search is 0, only estimated RD should be computed.
5852   // If do_tx_search is 1, all modes have TX search performed.
5853   const int do_tx_search =
5854       !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5855         (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
5856          num_pels_log2_lookup[bsize] > 8));
5857   InterModesInfo *inter_modes_info = x->inter_modes_info;
5858   inter_modes_info->num = 0;
5859 
5860   // Temporary buffers used by handle_inter_mode().
5861   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5862 
5863   // The best RD found for the reference frame, among single reference modes.
5864   // Note that the 0-th element will contain a cut-off that is later used
5865   // to determine if we should skip a compound mode.
5866   int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5867                                        INT64_MAX, INT64_MAX, INT64_MAX,
5868                                        INT64_MAX, INT64_MAX };
5869 
5870   // Prepared stats used later to check if we could skip intra mode eval.
5871   int64_t inter_cost = -1;
5872   int64_t intra_cost = -1;
5873   // Need to tweak the threshold for hdres speed 0 & 1.
5874   const int mi_row = xd->mi_row;
5875   const int mi_col = xd->mi_col;
5876 
5877   // Obtain the relevant tpl stats for pruning inter modes
5878   PruneInfoFromTpl inter_cost_info_from_tpl;
5879 #if !CONFIG_REALTIME_ONLY
5880   if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
5881     // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5882     // prune_ref_by_selective_ref_frame()
5883     // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
5884     // prune_ref_by_selective_ref_frame()
5885     // Populating valid_refs[idx] = 1 ensures that
5886     // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5887     // pruned ref frame.
5888     int valid_refs[INTER_REFS_PER_FRAME];
5889     for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5890       const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5891       valid_refs[frame - 1] =
5892           x->tpl_keep_ref_frame[frame] ||
5893           !prune_ref_by_selective_ref_frame(
5894               cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5895     }
5896     av1_zero(inter_cost_info_from_tpl);
5897     get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5898                               &inter_cost_info_from_tpl);
5899   }
5900 
5901   const int do_pruning =
5902       (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5903   if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5904       cpi->oxcf.algo_cfg.enable_tpl_model)
5905     calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5906                                  &intra_cost);
5907 #endif  // !CONFIG_REALTIME_ONLY
5908 
5909   // Initialize best mode stats for winner mode processing.
5910   const int max_winner_mode_count =
5911       winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
5912   zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
5913   x->winner_mode_count = 0;
5914   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5915                           NULL, bsize, best_rd_so_far,
5916                           sf->winner_mode_sf.multi_winner_mode_type, 0);
5917 
5918   int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5919   if (sf->inter_sf.prune_inter_modes_if_skippable) {
5920     // Higher multiplication factor values for lower quantizers.
5921     mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5922   }
5923 
5924   // Initialize arguments for mode loop speed features
5925   InterModeSFArgs sf_args = { &args.skip_motion_mode,
5926                               &mode_skip_mask,
5927                               &search_state,
5928                               skip_ref_frame_mask,
5929                               0,
5930                               mode_thresh_mul_fact,
5931                               0,
5932                               0 };
5933   int64_t best_inter_yrd = INT64_MAX;
5934 
5935   // This is the main loop of this function. It loops over all possible inter
5936   // modes and calls handle_inter_mode() to compute the RD for each.
5937   // Here midx is just an iterator index that should not be used by itself
5938   // except to keep track of the number of modes searched. It should be used
5939   // with av1_default_mode_order to get the enum that defines the mode, which
5940   // can be used with av1_mode_defs to get the prediction mode and the ref
5941   // frames.
5942   // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5943   // good speedup for real time case. If we decide to use compound mode in real
5944   // time, maybe we can modify av1_default_mode_order table.
5945   THR_MODES mode_start = THR_INTER_MODE_START;
5946   THR_MODES mode_end = THR_INTER_MODE_END;
5947   const CurrentFrame *const current_frame = &cm->current_frame;
5948   if (current_frame->reference_mode == SINGLE_REFERENCE) {
5949     mode_start = SINGLE_REF_MODE_START;
5950     mode_end = SINGLE_REF_MODE_END;
5951   }
5952 
5953   for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5954     // Get the actual prediction mode we are trying in this iteration
5955     const THR_MODES mode_enum = av1_default_mode_order[midx];
5956     const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5957     const PREDICTION_MODE this_mode = mode_def->mode;
5958     const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5959 
5960     const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5961     const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5962     const int is_single_pred =
5963         ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5964     const int comp_pred = second_ref_frame > INTRA_FRAME;
5965 
5966     init_mbmi(mbmi, this_mode, ref_frames, cm);
5967 
5968     txfm_info->skip_txfm = 0;
5969     sf_args.num_single_modes_processed += is_single_pred;
5970     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5971 #if CONFIG_COLLECT_COMPONENT_TIMING
5972     start_timing(cpi, skip_inter_mode_time);
5973 #endif
5974     // Apply speed features to decide if this inter mode can be skipped
5975     const int is_skip_inter_mode = skip_inter_mode(
5976         cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5977 #if CONFIG_COLLECT_COMPONENT_TIMING
5978     end_timing(cpi, skip_inter_mode_time);
5979 #endif
5980     if (is_skip_inter_mode) continue;
5981 
5982     // Select prediction reference frames.
5983     for (i = 0; i < num_planes; i++) {
5984       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5985       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5986     }
5987 
5988     mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5989     mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5990     mbmi->filter_intra_mode_info.use_filter_intra = 0;
5991     mbmi->ref_mv_idx = 0;
5992 
5993     const int64_t ref_best_rd = search_state.best_rd;
5994     RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5995     av1_init_rd_stats(&rd_stats);
5996 
5997     const int ref_frame_cost = comp_pred
5998                                    ? ref_costs_comp[ref_frame][second_ref_frame]
5999                                    : ref_costs_single[ref_frame];
6000     const int compmode_cost =
6001         is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6002     const int real_compmode_cost =
6003         cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6004             ? compmode_cost
6005             : 0;
6006     // Point to variables that are maintained between loop iterations
6007     args.single_newmv = search_state.single_newmv;
6008     args.single_newmv_rate = search_state.single_newmv_rate;
6009     args.single_newmv_valid = search_state.single_newmv_valid;
6010     args.single_comp_cost = real_compmode_cost;
6011     args.ref_frame_cost = ref_frame_cost;
6012     args.best_pred_sse = search_state.best_pred_sse;
6013     args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6014 
6015     int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6016                            search_state.best_skip_rd[1] };
6017     int64_t this_yrd = INT64_MAX;
6018 #if CONFIG_COLLECT_COMPONENT_TIMING
6019     start_timing(cpi, handle_inter_mode_time);
6020 #endif
6021     int64_t this_rd = handle_inter_mode(
6022         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6023         ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6024         inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6025         &this_yrd);
6026 #if CONFIG_COLLECT_COMPONENT_TIMING
6027     end_timing(cpi, handle_inter_mode_time);
6028 #endif
6029     if (current_frame->reference_mode != SINGLE_REFERENCE) {
6030       if (!args.skip_ifs &&
6031           sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6032           is_inter_singleref_mode(this_mode)) {
6033         collect_single_states(x, &search_state, mbmi);
6034       }
6035 
6036       if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6037           is_inter_singleref_mode(this_mode))
6038         update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6039     }
6040 
6041     if (this_rd == INT64_MAX) continue;
6042 
6043     if (mbmi->skip_txfm) {
6044       rd_stats_y.rate = 0;
6045       rd_stats_uv.rate = 0;
6046     }
6047 
6048     if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6049         this_rd < ref_frame_rd[ref_frame]) {
6050       ref_frame_rd[ref_frame] = this_rd;
6051     }
6052 
6053     // Did this mode help, i.e., is it the new best mode
6054     if (this_rd < search_state.best_rd) {
6055       assert(IMPLIES(comp_pred,
6056                      cm->current_frame.reference_mode != SINGLE_REFERENCE));
6057       search_state.best_pred_sse = x->pred_sse[ref_frame];
6058       best_inter_yrd = this_yrd;
6059       update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6060                           &rd_stats_uv, mode_enum, x, do_tx_search);
6061       if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6062       // skip_rd[0] is the best total rd for a skip mode so far.
6063       // skip_rd[1] is the best total rd for a skip mode so far in luma.
6064       // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6065       // When do_tx_search = 0, skip_rd[1] is updated.
6066       search_state.best_skip_rd[1] = skip_rd[1];
6067     }
6068     if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6069       // Add this mode to motion mode candidate list for motion mode search
6070       // if using motion_mode_for_winner_cand speed feature
6071       handle_winner_cand(mbmi, &best_motion_mode_cands,
6072                          max_winner_motion_mode_cand, this_rd,
6073                          &motion_mode_cand, args.skip_motion_mode);
6074     }
6075 
6076     /* keep record of best compound/single-only prediction */
6077     record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6078                          x->rdmult, &search_state, compmode_cost);
6079   }
6080 
6081 #if CONFIG_COLLECT_COMPONENT_TIMING
6082   start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6083 #endif
6084   if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6085     // For the single ref winner candidates, evaluate other motion modes (non
6086     // simple translation).
6087     evaluate_motion_mode_for_winner_candidates(
6088         cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6089         &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6090         &search_state, &best_inter_yrd);
6091   }
6092 #if CONFIG_COLLECT_COMPONENT_TIMING
6093   end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6094 #endif
6095 
6096 #if CONFIG_COLLECT_COMPONENT_TIMING
6097   start_timing(cpi, do_tx_search_time);
6098 #endif
6099   if (do_tx_search != 1) {
6100     // A full tx search has not yet been done, do tx search for
6101     // top mode candidates
6102     tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6103                                     yv12_mb, mi_row, mi_col, &search_state,
6104                                     rd_cost, ctx, &best_inter_yrd);
6105   }
6106 #if CONFIG_COLLECT_COMPONENT_TIMING
6107   end_timing(cpi, do_tx_search_time);
6108 #endif
6109 
6110 #if CONFIG_COLLECT_COMPONENT_TIMING
6111   start_timing(cpi, handle_intra_mode_time);
6112 #endif
6113   // Gate intra mode evaluation if best of inter is skip except when source
6114   // variance is extremely low and also based on max intra bsize.
6115   skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6116                                  intra_cost);
6117 
6118   const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6119   search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6120                                    &sf_args, intra_ref_frame_cost,
6121                                    best_inter_yrd);
6122 #if CONFIG_COLLECT_COMPONENT_TIMING
6123   end_timing(cpi, handle_intra_mode_time);
6124 #endif
6125 
6126 #if CONFIG_COLLECT_COMPONENT_TIMING
6127   start_timing(cpi, refine_winner_mode_tx_time);
6128 #endif
6129   int winner_mode_count =
6130       sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6131   // In effect only when fast tx search speed features are enabled.
6132   refine_winner_mode_tx(
6133       cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6134       &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6135       search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6136 #if CONFIG_COLLECT_COMPONENT_TIMING
6137   end_timing(cpi, refine_winner_mode_tx_time);
6138 #endif
6139 
6140   // Initialize default mode evaluation params
6141   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6142 
6143   // Only try palette mode when the best mode so far is an intra mode.
6144   const int try_palette =
6145       cpi->oxcf.tool_cfg.enable_palette &&
6146       av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6147       !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6148   RD_STATS this_rd_cost;
6149   int this_skippable = 0;
6150   if (try_palette) {
6151 #if CONFIG_COLLECT_COMPONENT_TIMING
6152     start_timing(cpi, av1_search_palette_mode_time);
6153 #endif
6154     this_skippable = av1_search_palette_mode(
6155         &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6156         ctx, &this_rd_cost, search_state.best_rd);
6157 #if CONFIG_COLLECT_COMPONENT_TIMING
6158     end_timing(cpi, av1_search_palette_mode_time);
6159 #endif
6160     if (this_rd_cost.rdcost < search_state.best_rd) {
6161       search_state.best_mode_index = THR_DC;
6162       mbmi->mv[0].as_int = 0;
6163       rd_cost->rate = this_rd_cost.rate;
6164       rd_cost->dist = this_rd_cost.dist;
6165       rd_cost->rdcost = this_rd_cost.rdcost;
6166       search_state.best_rd = rd_cost->rdcost;
6167       search_state.best_mbmode = *mbmi;
6168       search_state.best_skip2 = 0;
6169       search_state.best_mode_skippable = this_skippable;
6170       memcpy(ctx->blk_skip, txfm_info->blk_skip,
6171              sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6172       av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6173     }
6174   }
6175 
6176   search_state.best_mbmode.skip_mode = 0;
6177   if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6178       is_comp_ref_allowed(bsize)) {
6179     const struct segmentation *const seg = &cm->seg;
6180     unsigned char segment_id = mbmi->segment_id;
6181     if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6182       rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6183     }
6184   }
6185 
6186   // Make sure that the ref_mv_idx is only nonzero when we're
6187   // using a mode which can support ref_mv_idx
6188   if (search_state.best_mbmode.ref_mv_idx != 0 &&
6189       !(search_state.best_mbmode.mode == NEWMV ||
6190         search_state.best_mbmode.mode == NEW_NEWMV ||
6191         have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6192     search_state.best_mbmode.ref_mv_idx = 0;
6193   }
6194 
6195   if (search_state.best_mode_index == THR_INVALID ||
6196       search_state.best_rd >= best_rd_so_far) {
6197     rd_cost->rate = INT_MAX;
6198     rd_cost->rdcost = INT64_MAX;
6199     return;
6200   }
6201 
6202   const InterpFilter interp_filter = features->interp_filter;
6203   assert((interp_filter == SWITCHABLE) ||
6204          (interp_filter ==
6205           search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6206          !is_inter_block(&search_state.best_mbmode));
6207   assert((interp_filter == SWITCHABLE) ||
6208          (interp_filter ==
6209           search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6210          !is_inter_block(&search_state.best_mbmode));
6211 
6212   if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6213     av1_update_rd_thresh_fact(
6214         cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6215         search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6216   }
6217 
6218   // macroblock modes
6219   *mbmi = search_state.best_mbmode;
6220   txfm_info->skip_txfm |= search_state.best_skip2;
6221 
6222   // Note: this section is needed since the mode may have been forced to
6223   // GLOBALMV by the all-zero mode handling of ref-mv.
6224   if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6225     // Correct the interp filters for GLOBALMV
6226     if (is_nontrans_global_motion(xd, xd->mi[0])) {
6227       int_interpfilters filters =
6228           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6229       assert(mbmi->interp_filters.as_int == filters.as_int);
6230       (void)filters;
6231     }
6232   }
6233 
6234   txfm_info->skip_txfm |= search_state.best_mode_skippable;
6235 
6236   assert(search_state.best_mode_index != THR_INVALID);
6237 
6238 #if CONFIG_INTERNAL_STATS
6239   store_coding_context(x, ctx, search_state.best_mode_index,
6240                        search_state.best_mode_skippable);
6241 #else
6242   store_coding_context(x, ctx, search_state.best_mode_skippable);
6243 #endif  // CONFIG_INTERNAL_STATS
6244 
6245   if (mbmi->palette_mode_info.palette_size[1] > 0) {
6246     assert(try_palette);
6247     av1_restore_uv_color_map(cpi, x);
6248   }
6249 }
6250 
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)6251 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6252                                         TileDataEnc *tile_data, MACROBLOCK *x,
6253                                         int mi_row, int mi_col,
6254                                         RD_STATS *rd_cost, BLOCK_SIZE bsize,
6255                                         PICK_MODE_CONTEXT *ctx,
6256                                         int64_t best_rd_so_far) {
6257   const AV1_COMMON *const cm = &cpi->common;
6258   const FeatureFlags *const features = &cm->features;
6259   MACROBLOCKD *const xd = &x->e_mbd;
6260   MB_MODE_INFO *const mbmi = xd->mi[0];
6261   unsigned char segment_id = mbmi->segment_id;
6262   const int comp_pred = 0;
6263   int i;
6264   unsigned int ref_costs_single[REF_FRAMES];
6265   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6266   const ModeCosts *mode_costs = &x->mode_costs;
6267   const int *comp_inter_cost =
6268       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6269   InterpFilter best_filter = SWITCHABLE;
6270   int64_t this_rd = INT64_MAX;
6271   int rate2 = 0;
6272   const int64_t distortion2 = 0;
6273   (void)mi_row;
6274   (void)mi_col;
6275   (void)tile_data;
6276 
6277   av1_collect_neighbors_ref_counts(xd);
6278 
6279   estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6280                            ref_costs_comp);
6281 
6282   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6283   for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6284 
6285   rd_cost->rate = INT_MAX;
6286 
6287   assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6288 
6289   mbmi->palette_mode_info.palette_size[0] = 0;
6290   mbmi->palette_mode_info.palette_size[1] = 0;
6291   mbmi->filter_intra_mode_info.use_filter_intra = 0;
6292   mbmi->mode = GLOBALMV;
6293   mbmi->motion_mode = SIMPLE_TRANSLATION;
6294   mbmi->uv_mode = UV_DC_PRED;
6295   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6296     mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6297   else
6298     mbmi->ref_frame[0] = LAST_FRAME;
6299   mbmi->ref_frame[1] = NONE_FRAME;
6300   mbmi->mv[0].as_int =
6301       gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6302                            features->allow_high_precision_mv, bsize, mi_col,
6303                            mi_row, features->cur_frame_force_integer_mv)
6304           .as_int;
6305   mbmi->tx_size = max_txsize_lookup[bsize];
6306   x->txfm_search_info.skip_txfm = 1;
6307 
6308   mbmi->ref_mv_idx = 0;
6309 
6310   mbmi->motion_mode = SIMPLE_TRANSLATION;
6311   av1_count_overlappable_neighbors(cm, xd);
6312   if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6313     int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6314     mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6315     // Select the samples according to motion vector difference
6316     if (mbmi->num_proj_ref > 1) {
6317       mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6318                                              mbmi->num_proj_ref, bsize);
6319     }
6320   }
6321 
6322   const InterpFilter interp_filter = features->interp_filter;
6323   set_default_interp_filters(mbmi, interp_filter);
6324 
6325   if (interp_filter != SWITCHABLE) {
6326     best_filter = interp_filter;
6327   } else {
6328     best_filter = EIGHTTAP_REGULAR;
6329     if (av1_is_interp_needed(xd)) {
6330       int rs;
6331       int best_rs = INT_MAX;
6332       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6333         mbmi->interp_filters = av1_broadcast_interp_filter(i);
6334         rs = av1_get_switchable_rate(x, xd, interp_filter,
6335                                      cm->seq_params->enable_dual_filter);
6336         if (rs < best_rs) {
6337           best_rs = rs;
6338           best_filter = mbmi->interp_filters.as_filters.y_filter;
6339         }
6340       }
6341     }
6342   }
6343   // Set the appropriate filter
6344   mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6345   rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6346                                    cm->seq_params->enable_dual_filter);
6347 
6348   if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6349     rate2 += comp_inter_cost[comp_pred];
6350 
6351   // Estimate the reference frame signaling cost and add it
6352   // to the rolling cost variable.
6353   rate2 += ref_costs_single[LAST_FRAME];
6354   this_rd = RDCOST(x->rdmult, rate2, distortion2);
6355 
6356   rd_cost->rate = rate2;
6357   rd_cost->dist = distortion2;
6358   rd_cost->rdcost = this_rd;
6359 
6360   if (this_rd >= best_rd_so_far) {
6361     rd_cost->rate = INT_MAX;
6362     rd_cost->rdcost = INT64_MAX;
6363     return;
6364   }
6365 
6366   assert((interp_filter == SWITCHABLE) ||
6367          (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6368 
6369   if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6370     av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6371                               cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6372                               THR_GLOBALMV, THR_INTER_MODE_START,
6373                               THR_INTER_MODE_END, THR_DC, MAX_MODES);
6374   }
6375 
6376 #if CONFIG_INTERNAL_STATS
6377   store_coding_context(x, ctx, THR_GLOBALMV, 0);
6378 #else
6379   store_coding_context(x, ctx, 0);
6380 #endif  // CONFIG_INTERNAL_STATS
6381 }
6382 
6383 /*!\cond */
6384 struct calc_target_weighted_pred_ctxt {
6385   const OBMCBuffer *obmc_buffer;
6386   const uint8_t *tmp;
6387   int tmp_stride;
6388   int overlap;
6389 };
6390 /*!\endcond */
6391 
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6392 static inline void calc_target_weighted_pred_above(
6393     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6394     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6395   (void)nb_mi;
6396   (void)num_planes;
6397   (void)rel_mi_row;
6398   (void)dir;
6399 
6400   struct calc_target_weighted_pred_ctxt *ctxt =
6401       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6402 
6403   const int bw = xd->width << MI_SIZE_LOG2;
6404   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6405 
6406   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6407   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6408   const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6409   const int is_hbd = is_cur_buf_hbd(xd);
6410 
6411   if (!is_hbd) {
6412     for (int row = 0; row < ctxt->overlap; ++row) {
6413       const uint8_t m0 = mask1d[row];
6414       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6415       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6416         wsrc[col] = m1 * tmp[col];
6417         mask[col] = m0;
6418       }
6419       wsrc += bw;
6420       mask += bw;
6421       tmp += ctxt->tmp_stride;
6422     }
6423   } else {
6424     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6425 
6426     for (int row = 0; row < ctxt->overlap; ++row) {
6427       const uint8_t m0 = mask1d[row];
6428       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6429       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6430         wsrc[col] = m1 * tmp16[col];
6431         mask[col] = m0;
6432       }
6433       wsrc += bw;
6434       mask += bw;
6435       tmp16 += ctxt->tmp_stride;
6436     }
6437   }
6438 }
6439 
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6440 static inline void calc_target_weighted_pred_left(
6441     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6442     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6443   (void)nb_mi;
6444   (void)num_planes;
6445   (void)rel_mi_col;
6446   (void)dir;
6447 
6448   struct calc_target_weighted_pred_ctxt *ctxt =
6449       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6450 
6451   const int bw = xd->width << MI_SIZE_LOG2;
6452   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6453 
6454   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6455   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6456   const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6457   const int is_hbd = is_cur_buf_hbd(xd);
6458 
6459   if (!is_hbd) {
6460     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6461       for (int col = 0; col < ctxt->overlap; ++col) {
6462         const uint8_t m0 = mask1d[col];
6463         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6464         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6465                     (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6466         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6467       }
6468       wsrc += bw;
6469       mask += bw;
6470       tmp += ctxt->tmp_stride;
6471     }
6472   } else {
6473     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6474 
6475     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6476       for (int col = 0; col < ctxt->overlap; ++col) {
6477         const uint8_t m0 = mask1d[col];
6478         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6479         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6480                     (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6481         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6482       }
6483       wsrc += bw;
6484       mask += bw;
6485       tmp16 += ctxt->tmp_stride;
6486     }
6487   }
6488 }
6489 
6490 // This function has a structure similar to av1_build_obmc_inter_prediction
6491 //
6492 // The OBMC predictor is computed as:
6493 //
6494 //  PObmc(x,y) =
6495 //    AOM_BLEND_A64(Mh(x),
6496 //                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6497 //                  PLeft(x, y))
6498 //
6499 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6500 // rounding, this can be written as:
6501 //
6502 //  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6503 //    Mh(x) * Mv(y) * P(x,y) +
6504 //      Mh(x) * Cv(y) * Pabove(x,y) +
6505 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6506 //
6507 // Where :
6508 //
6509 //  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6510 //  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6511 //
6512 // This function computes 'wsrc' and 'mask' as:
6513 //
6514 //  wsrc(x, y) =
6515 //    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6516 //      Mh(x) * Cv(y) * Pabove(x,y) +
6517 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6518 //
6519 //  mask(x, y) = Mh(x) * Mv(y)
6520 //
6521 // These can then be used to efficiently approximate the error for any
6522 // predictor P in the context of the provided neighbouring predictors by
6523 // computing:
6524 //
6525 //  error(x, y) =
6526 //    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6527 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6528 static inline void calc_target_weighted_pred(
6529     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6530     const uint8_t *above, int above_stride, const uint8_t *left,
6531     int left_stride) {
6532   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6533   const int bw = xd->width << MI_SIZE_LOG2;
6534   const int bh = xd->height << MI_SIZE_LOG2;
6535   const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6536   int32_t *mask_buf = obmc_buffer->mask;
6537   int32_t *wsrc_buf = obmc_buffer->wsrc;
6538 
6539   const int is_hbd = is_cur_buf_hbd(xd);
6540   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6541 
6542   // plane 0 should not be sub-sampled
6543   assert(xd->plane[0].subsampling_x == 0);
6544   assert(xd->plane[0].subsampling_y == 0);
6545 
6546   av1_zero_array(wsrc_buf, bw * bh);
6547   for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6548 
6549   // handle above row
6550   if (xd->up_available) {
6551     const int overlap =
6552         AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6553     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6554                                                    above_stride, overlap };
6555     foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6556                                   max_neighbor_obmc[mi_size_wide_log2[bsize]],
6557                                   calc_target_weighted_pred_above, &ctxt);
6558   }
6559 
6560   for (int i = 0; i < bw * bh; ++i) {
6561     wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6562     mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6563   }
6564 
6565   // handle left column
6566   if (xd->left_available) {
6567     const int overlap =
6568         AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6569     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6570                                                    left_stride, overlap };
6571     foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6572                                  max_neighbor_obmc[mi_size_high_log2[bsize]],
6573                                  calc_target_weighted_pred_left, &ctxt);
6574   }
6575 
6576   if (!is_hbd) {
6577     const uint8_t *src = x->plane[0].src.buf;
6578 
6579     for (int row = 0; row < bh; ++row) {
6580       for (int col = 0; col < bw; ++col) {
6581         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6582       }
6583       wsrc_buf += bw;
6584       src += x->plane[0].src.stride;
6585     }
6586   } else {
6587     const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6588 
6589     for (int row = 0; row < bh; ++row) {
6590       for (int col = 0; col < bw; ++col) {
6591         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6592       }
6593       wsrc_buf += bw;
6594       src += x->plane[0].src.stride;
6595     }
6596   }
6597 }
6598