• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19 
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44 
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72 #include "av1/encoder/var_based_part.h"
73 
74 #define LAST_NEW_MV_INDEX 6
75 
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81   10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82   9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
83   9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
84   9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
85   9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
86   8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
87   8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
88   8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
89   8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
90   7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
91   7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
92   7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
93   7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
94   6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
95   6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
96   6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
97   5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
98   5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
99   5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
100   5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
101   4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
102   4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
103   4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
104   4144,  4120,  4096
105 };
106 
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108   THR_NEARESTMV,
109   THR_NEARESTL2,
110   THR_NEARESTL3,
111   THR_NEARESTB,
112   THR_NEARESTA2,
113   THR_NEARESTA,
114   THR_NEARESTG,
115 
116   THR_NEWMV,
117   THR_NEWL2,
118   THR_NEWL3,
119   THR_NEWB,
120   THR_NEWA2,
121   THR_NEWA,
122   THR_NEWG,
123 
124   THR_NEARMV,
125   THR_NEARL2,
126   THR_NEARL3,
127   THR_NEARB,
128   THR_NEARA2,
129   THR_NEARA,
130   THR_NEARG,
131 
132   THR_GLOBALMV,
133   THR_GLOBALL2,
134   THR_GLOBALL3,
135   THR_GLOBALB,
136   THR_GLOBALA2,
137   THR_GLOBALA,
138   THR_GLOBALG,
139 
140   THR_COMP_NEAREST_NEARESTLA,
141   THR_COMP_NEAREST_NEARESTL2A,
142   THR_COMP_NEAREST_NEARESTL3A,
143   THR_COMP_NEAREST_NEARESTGA,
144   THR_COMP_NEAREST_NEARESTLB,
145   THR_COMP_NEAREST_NEARESTL2B,
146   THR_COMP_NEAREST_NEARESTL3B,
147   THR_COMP_NEAREST_NEARESTGB,
148   THR_COMP_NEAREST_NEARESTLA2,
149   THR_COMP_NEAREST_NEARESTL2A2,
150   THR_COMP_NEAREST_NEARESTL3A2,
151   THR_COMP_NEAREST_NEARESTGA2,
152   THR_COMP_NEAREST_NEARESTLL2,
153   THR_COMP_NEAREST_NEARESTLL3,
154   THR_COMP_NEAREST_NEARESTLG,
155   THR_COMP_NEAREST_NEARESTBA,
156 
157   THR_COMP_NEAR_NEARLB,
158   THR_COMP_NEW_NEWLB,
159   THR_COMP_NEW_NEARESTLB,
160   THR_COMP_NEAREST_NEWLB,
161   THR_COMP_NEW_NEARLB,
162   THR_COMP_NEAR_NEWLB,
163   THR_COMP_GLOBAL_GLOBALLB,
164 
165   THR_COMP_NEAR_NEARLA,
166   THR_COMP_NEW_NEWLA,
167   THR_COMP_NEW_NEARESTLA,
168   THR_COMP_NEAREST_NEWLA,
169   THR_COMP_NEW_NEARLA,
170   THR_COMP_NEAR_NEWLA,
171   THR_COMP_GLOBAL_GLOBALLA,
172 
173   THR_COMP_NEAR_NEARL2A,
174   THR_COMP_NEW_NEWL2A,
175   THR_COMP_NEW_NEARESTL2A,
176   THR_COMP_NEAREST_NEWL2A,
177   THR_COMP_NEW_NEARL2A,
178   THR_COMP_NEAR_NEWL2A,
179   THR_COMP_GLOBAL_GLOBALL2A,
180 
181   THR_COMP_NEAR_NEARL3A,
182   THR_COMP_NEW_NEWL3A,
183   THR_COMP_NEW_NEARESTL3A,
184   THR_COMP_NEAREST_NEWL3A,
185   THR_COMP_NEW_NEARL3A,
186   THR_COMP_NEAR_NEWL3A,
187   THR_COMP_GLOBAL_GLOBALL3A,
188 
189   THR_COMP_NEAR_NEARGA,
190   THR_COMP_NEW_NEWGA,
191   THR_COMP_NEW_NEARESTGA,
192   THR_COMP_NEAREST_NEWGA,
193   THR_COMP_NEW_NEARGA,
194   THR_COMP_NEAR_NEWGA,
195   THR_COMP_GLOBAL_GLOBALGA,
196 
197   THR_COMP_NEAR_NEARL2B,
198   THR_COMP_NEW_NEWL2B,
199   THR_COMP_NEW_NEARESTL2B,
200   THR_COMP_NEAREST_NEWL2B,
201   THR_COMP_NEW_NEARL2B,
202   THR_COMP_NEAR_NEWL2B,
203   THR_COMP_GLOBAL_GLOBALL2B,
204 
205   THR_COMP_NEAR_NEARL3B,
206   THR_COMP_NEW_NEWL3B,
207   THR_COMP_NEW_NEARESTL3B,
208   THR_COMP_NEAREST_NEWL3B,
209   THR_COMP_NEW_NEARL3B,
210   THR_COMP_NEAR_NEWL3B,
211   THR_COMP_GLOBAL_GLOBALL3B,
212 
213   THR_COMP_NEAR_NEARGB,
214   THR_COMP_NEW_NEWGB,
215   THR_COMP_NEW_NEARESTGB,
216   THR_COMP_NEAREST_NEWGB,
217   THR_COMP_NEW_NEARGB,
218   THR_COMP_NEAR_NEWGB,
219   THR_COMP_GLOBAL_GLOBALGB,
220 
221   THR_COMP_NEAR_NEARLA2,
222   THR_COMP_NEW_NEWLA2,
223   THR_COMP_NEW_NEARESTLA2,
224   THR_COMP_NEAREST_NEWLA2,
225   THR_COMP_NEW_NEARLA2,
226   THR_COMP_NEAR_NEWLA2,
227   THR_COMP_GLOBAL_GLOBALLA2,
228 
229   THR_COMP_NEAR_NEARL2A2,
230   THR_COMP_NEW_NEWL2A2,
231   THR_COMP_NEW_NEARESTL2A2,
232   THR_COMP_NEAREST_NEWL2A2,
233   THR_COMP_NEW_NEARL2A2,
234   THR_COMP_NEAR_NEWL2A2,
235   THR_COMP_GLOBAL_GLOBALL2A2,
236 
237   THR_COMP_NEAR_NEARL3A2,
238   THR_COMP_NEW_NEWL3A2,
239   THR_COMP_NEW_NEARESTL3A2,
240   THR_COMP_NEAREST_NEWL3A2,
241   THR_COMP_NEW_NEARL3A2,
242   THR_COMP_NEAR_NEWL3A2,
243   THR_COMP_GLOBAL_GLOBALL3A2,
244 
245   THR_COMP_NEAR_NEARGA2,
246   THR_COMP_NEW_NEWGA2,
247   THR_COMP_NEW_NEARESTGA2,
248   THR_COMP_NEAREST_NEWGA2,
249   THR_COMP_NEW_NEARGA2,
250   THR_COMP_NEAR_NEWGA2,
251   THR_COMP_GLOBAL_GLOBALGA2,
252 
253   THR_COMP_NEAR_NEARLL2,
254   THR_COMP_NEW_NEWLL2,
255   THR_COMP_NEW_NEARESTLL2,
256   THR_COMP_NEAREST_NEWLL2,
257   THR_COMP_NEW_NEARLL2,
258   THR_COMP_NEAR_NEWLL2,
259   THR_COMP_GLOBAL_GLOBALLL2,
260 
261   THR_COMP_NEAR_NEARLL3,
262   THR_COMP_NEW_NEWLL3,
263   THR_COMP_NEW_NEARESTLL3,
264   THR_COMP_NEAREST_NEWLL3,
265   THR_COMP_NEW_NEARLL3,
266   THR_COMP_NEAR_NEWLL3,
267   THR_COMP_GLOBAL_GLOBALLL3,
268 
269   THR_COMP_NEAR_NEARLG,
270   THR_COMP_NEW_NEWLG,
271   THR_COMP_NEW_NEARESTLG,
272   THR_COMP_NEAREST_NEWLG,
273   THR_COMP_NEW_NEARLG,
274   THR_COMP_NEAR_NEWLG,
275   THR_COMP_GLOBAL_GLOBALLG,
276 
277   THR_COMP_NEAR_NEARBA,
278   THR_COMP_NEW_NEWBA,
279   THR_COMP_NEW_NEARESTBA,
280   THR_COMP_NEAREST_NEWBA,
281   THR_COMP_NEW_NEARBA,
282   THR_COMP_NEAR_NEWBA,
283   THR_COMP_GLOBAL_GLOBALBA,
284 
285   THR_DC,
286   THR_PAETH,
287   THR_SMOOTH,
288   THR_SMOOTH_V,
289   THR_SMOOTH_H,
290   THR_H_PRED,
291   THR_V_PRED,
292   THR_D135_PRED,
293   THR_D203_PRED,
294   THR_D157_PRED,
295   THR_D67_PRED,
296   THR_D113_PRED,
297   THR_D45_PRED,
298 };
299 
300 /*!\cond */
301 typedef struct SingleInterModeState {
302   int64_t rd;
303   MV_REFERENCE_FRAME ref_frame;
304   int valid;
305 } SingleInterModeState;
306 
307 typedef struct InterModeSearchState {
308   int64_t best_rd;
309   int64_t best_skip_rd[2];
310   MB_MODE_INFO best_mbmode;
311   int best_rate_y;
312   int best_rate_uv;
313   int best_mode_skippable;
314   int best_skip2;
315   THR_MODES best_mode_index;
316   int num_available_refs;
317   int64_t dist_refs[REF_FRAMES];
318   int dist_order_refs[REF_FRAMES];
319   int64_t mode_threshold[MAX_MODES];
320   int64_t best_intra_rd;
321   unsigned int best_pred_sse;
322 
323   /*!
324    * \brief Keep track of best intra rd for use in compound mode.
325    */
326   int64_t best_pred_rd[REFERENCE_MODES];
327   // Save a set of single_newmv for each checked ref_mv.
328   int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329   int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330   int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331   int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332   // The rd of simple translation in single inter modes
333   int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334   int64_t best_single_rd[REF_FRAMES];
335   PREDICTION_MODE best_single_mode[REF_FRAMES];
336 
337   // Single search results by [directions][modes][reference frames]
338   SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339   int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340   SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341                                             [FWD_REFS];
342   int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343   MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344   IntraModeSearchState intra_search_state;
345   RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348 
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350   for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351     InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352     md->ready = 0;
353     md->num = 0;
354     md->dist_sum = 0;
355     md->ld_sum = 0;
356     md->sse_sum = 0;
357     md->sse_sse_sum = 0;
358     md->sse_ld_sum = 0;
359   }
360 }
361 
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363                              int64_t sse, int *est_residue_cost,
364                              int64_t *est_dist) {
365   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366   if (md->ready) {
367     if (sse < md->dist_mean) {
368       *est_residue_cost = 0;
369       *est_dist = sse;
370     } else {
371       *est_dist = (int64_t)round(md->dist_mean);
372       const double est_ld = md->a * sse + md->b;
373       // Clamp estimated rate cost by INT_MAX / 2.
374       // TODO(angiebird@google.com): find better solution than clamping.
375       if (fabs(est_ld) < 1e-2) {
376         *est_residue_cost = INT_MAX / 2;
377       } else {
378         double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379         if (est_residue_cost_dbl < 0) {
380           *est_residue_cost = 0;
381         } else {
382           *est_residue_cost =
383               (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384         }
385       }
386       if (*est_residue_cost <= 0) {
387         *est_residue_cost = 0;
388         *est_dist = sse;
389       }
390     }
391     return 1;
392   }
393   return 0;
394 }
395 
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397   for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398     const int block_idx = inter_mode_data_block_idx(bsize);
399     InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400     if (block_idx == -1) continue;
401     if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402       continue;
403     } else {
404       if (md->ready == 0) {
405         md->dist_mean = md->dist_sum / md->num;
406         md->ld_mean = md->ld_sum / md->num;
407         md->sse_mean = md->sse_sum / md->num;
408         md->sse_sse_mean = md->sse_sse_sum / md->num;
409         md->sse_ld_mean = md->sse_ld_sum / md->num;
410       } else {
411         const double factor = 3;
412         md->dist_mean =
413             (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414         md->ld_mean =
415             (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416         md->sse_mean =
417             (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418         md->sse_sse_mean =
419             (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420             (factor + 1);
421         md->sse_ld_mean =
422             (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423             (factor + 1);
424       }
425 
426       const double my = md->ld_mean;
427       const double mx = md->sse_mean;
428       const double dx = sqrt(md->sse_sse_mean);
429       const double dxy = md->sse_ld_mean;
430 
431       md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432       md->b = my - md->a * mx;
433       md->ready = 1;
434 
435       md->num = 0;
436       md->dist_sum = 0;
437       md->ld_sum = 0;
438       md->sse_sum = 0;
439       md->sse_sse_sum = 0;
440       md->sse_ld_sum = 0;
441     }
442     (void)rdmult;
443   }
444 }
445 
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
447                                             BLOCK_SIZE bsize, int64_t sse,
448                                             int64_t dist, int residue_cost) {
449   if (residue_cost == 0 || sse == dist) return;
450   const int block_idx = inter_mode_data_block_idx(bsize);
451   if (block_idx == -1) return;
452   InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453   if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454     const double ld = (sse - dist) * 1. / residue_cost;
455     ++rd_model->num;
456     rd_model->dist_sum += dist;
457     rd_model->ld_sum += ld;
458     rd_model->sse_sum += sse;
459     rd_model->sse_sse_sum += (double)sse * (double)sse;
460     rd_model->sse_ld_sum += sse * ld;
461   }
462 }
463 
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
465                                              int mode_rate, int64_t sse,
466                                              int64_t rd, RD_STATS *rd_cost,
467                                              RD_STATS *rd_cost_y,
468                                              RD_STATS *rd_cost_uv,
469                                              const MB_MODE_INFO *mbmi) {
470   const int num = inter_modes_info->num;
471   assert(num < MAX_INTER_MODES);
472   inter_modes_info->mbmi_arr[num] = *mbmi;
473   inter_modes_info->mode_rate_arr[num] = mode_rate;
474   inter_modes_info->sse_arr[num] = sse;
475   inter_modes_info->est_rd_arr[num] = rd;
476   inter_modes_info->rd_cost_arr[num] = *rd_cost;
477   inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
478   inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
479   ++inter_modes_info->num;
480 }
481 
compare_rd_idx_pair(const void * a,const void * b)482 static int compare_rd_idx_pair(const void *a, const void *b) {
483   if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
484     // To avoid inconsistency in qsort() ordering when two elements are equal,
485     // using idx as tie breaker. Refer aomedia:2928
486     if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
487       return 0;
488     else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
489       return 1;
490     else
491       return -1;
492   } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
493     return 1;
494   } else {
495     return -1;
496   }
497 }
498 
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)499 static AOM_INLINE void inter_modes_info_sort(
500     const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
501   if (inter_modes_info->num == 0) {
502     return;
503   }
504   for (int i = 0; i < inter_modes_info->num; ++i) {
505     rd_idx_pair_arr[i].idx = i;
506     rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
507   }
508   qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
509         compare_rd_idx_pair);
510 }
511 
512 // Similar to get_horver_correlation, but also takes into account first
513 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)514 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
515                                        int width, int height, float *hcorr,
516                                        float *vcorr) {
517   // The following notation is used:
518   // x - current pixel
519   // y - left neighbor pixel
520   // z - top neighbor pixel
521   int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
522   int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
523   int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
524 
525   // First, process horizontal correlation on just the first row
526   x_sum += diff[0];
527   x2_sum += diff[0] * diff[0];
528   x_firstrow += diff[0];
529   x2_firstrow += diff[0] * diff[0];
530   for (int j = 1; j < width; ++j) {
531     const int16_t x = diff[j];
532     const int16_t y = diff[j - 1];
533     x_sum += x;
534     x_firstrow += x;
535     x2_sum += x * x;
536     x2_firstrow += x * x;
537     xy_sum += x * y;
538   }
539 
540   // Process vertical correlation in the first column
541   x_firstcol += diff[0];
542   x2_firstcol += diff[0] * diff[0];
543   for (int i = 1; i < height; ++i) {
544     const int16_t x = diff[i * stride];
545     const int16_t z = diff[(i - 1) * stride];
546     x_sum += x;
547     x_firstcol += x;
548     x2_sum += x * x;
549     x2_firstcol += x * x;
550     xz_sum += x * z;
551   }
552 
553   // Now process horiz and vert correlation through the rest unit
554   for (int i = 1; i < height; ++i) {
555     for (int j = 1; j < width; ++j) {
556       const int16_t x = diff[i * stride + j];
557       const int16_t y = diff[i * stride + j - 1];
558       const int16_t z = diff[(i - 1) * stride + j];
559       x_sum += x;
560       x2_sum += x * x;
561       xy_sum += x * y;
562       xz_sum += x * z;
563     }
564   }
565 
566   for (int j = 0; j < width; ++j) {
567     x_finalrow += diff[(height - 1) * stride + j];
568     x2_finalrow +=
569         diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
570   }
571   for (int i = 0; i < height; ++i) {
572     x_finalcol += diff[i * stride + width - 1];
573     x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
574   }
575 
576   int64_t xhor_sum = x_sum - x_finalcol;
577   int64_t xver_sum = x_sum - x_finalrow;
578   int64_t y_sum = x_sum - x_firstcol;
579   int64_t z_sum = x_sum - x_firstrow;
580   int64_t x2hor_sum = x2_sum - x2_finalcol;
581   int64_t x2ver_sum = x2_sum - x2_finalrow;
582   int64_t y2_sum = x2_sum - x2_firstcol;
583   int64_t z2_sum = x2_sum - x2_firstrow;
584 
585   const float num_hor = (float)(height * (width - 1));
586   const float num_ver = (float)((height - 1) * width);
587 
588   const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
589   const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
590 
591   const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
592   const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
593 
594   const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
595   const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
596 
597   if (xhor_var_n > 0 && y_var_n > 0) {
598     *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
599     *hcorr = *hcorr < 0 ? 0 : *hcorr;
600   } else {
601     *hcorr = 1.0;
602   }
603   if (xver_var_n > 0 && z_var_n > 0) {
604     *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
605     *vcorr = *vcorr < 0 ? 0 : *vcorr;
606   } else {
607     *vcorr = 1.0;
608   }
609 }
610 
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)611 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
612                        int64_t *sse_y) {
613   const AV1_COMMON *cm = &cpi->common;
614   const int num_planes = av1_num_planes(cm);
615   const MACROBLOCKD *xd = &x->e_mbd;
616   const MB_MODE_INFO *mbmi = xd->mi[0];
617   int64_t total_sse = 0;
618   for (int plane = 0; plane < num_planes; ++plane) {
619     if (plane && !xd->is_chroma_ref) break;
620     const struct macroblock_plane *const p = &x->plane[plane];
621     const struct macroblockd_plane *const pd = &xd->plane[plane];
622     const BLOCK_SIZE bs =
623         get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
624     unsigned int sse;
625 
626     cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
627                             pd->dst.stride, &sse);
628     total_sse += sse;
629     if (!plane && sse_y) *sse_y = sse;
630   }
631   total_sse <<= 4;
632   return total_sse;
633 }
634 
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)635 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
636                           intptr_t block_size, int64_t *ssz) {
637   int i;
638   int64_t error = 0, sqcoeff = 0;
639 
640   for (i = 0; i < block_size; i++) {
641     const int diff = coeff[i] - dqcoeff[i];
642     error += diff * diff;
643     sqcoeff += coeff[i] * coeff[i];
644   }
645 
646   *ssz = sqcoeff;
647   return error;
648 }
649 
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)650 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
651                              intptr_t block_size) {
652   int64_t error = 0;
653 
654   for (int i = 0; i < block_size; i++) {
655     const int diff = coeff[i] - dqcoeff[i];
656     error += diff * diff;
657   }
658 
659   return error;
660 }
661 
662 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)663 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
664                                  const tran_low_t *dqcoeff, intptr_t block_size,
665                                  int64_t *ssz, int bd) {
666   int i;
667   int64_t error = 0, sqcoeff = 0;
668   int shift = 2 * (bd - 8);
669   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
670 
671   for (i = 0; i < block_size; i++) {
672     const int64_t diff = coeff[i] - dqcoeff[i];
673     error += diff * diff;
674     sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
675   }
676   assert(error >= 0 && sqcoeff >= 0);
677   error = (error + rounding) >> shift;
678   sqcoeff = (sqcoeff + rounding) >> shift;
679 
680   *ssz = sqcoeff;
681   return error;
682 }
683 #endif
684 
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)685 static int conditional_skipintra(PREDICTION_MODE mode,
686                                  PREDICTION_MODE best_intra_mode) {
687   if (mode == D113_PRED && best_intra_mode != V_PRED &&
688       best_intra_mode != D135_PRED)
689     return 1;
690   if (mode == D67_PRED && best_intra_mode != V_PRED &&
691       best_intra_mode != D45_PRED)
692     return 1;
693   if (mode == D203_PRED && best_intra_mode != H_PRED &&
694       best_intra_mode != D45_PRED)
695     return 1;
696   if (mode == D157_PRED && best_intra_mode != H_PRED &&
697       best_intra_mode != D135_PRED)
698     return 1;
699   return 0;
700 }
701 
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)702 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
703                        int16_t mode_context) {
704   if (is_inter_compound_mode(mode)) {
705     return mode_costs
706         ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
707   }
708 
709   int mode_cost = 0;
710   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
711 
712   assert(is_inter_mode(mode));
713 
714   if (mode == NEWMV) {
715     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
716     return mode_cost;
717   } else {
718     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
719     mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
720 
721     if (mode == GLOBALMV) {
722       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
723       return mode_cost;
724     } else {
725       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
726       mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
727       mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
728       return mode_cost;
729     }
730   }
731 }
732 
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)733 static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
734                                               int ref_idx) {
735   return ref_idx ? compound_ref1_mode(this_mode)
736                  : compound_ref0_mode(this_mode);
737 }
738 
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])739 static AOM_INLINE void estimate_ref_frame_costs(
740     const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
741     int segment_id, unsigned int *ref_costs_single,
742     unsigned int (*ref_costs_comp)[REF_FRAMES]) {
743   int seg_ref_active =
744       segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
745   if (seg_ref_active) {
746     memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
747     int ref_frame;
748     for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
749       memset(ref_costs_comp[ref_frame], 0,
750              REF_FRAMES * sizeof((*ref_costs_comp)[0]));
751   } else {
752     int intra_inter_ctx = av1_get_intra_inter_context(xd);
753     ref_costs_single[INTRA_FRAME] =
754         mode_costs->intra_inter_cost[intra_inter_ctx][0];
755     unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
756 
757     for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
758       ref_costs_single[i] = base_cost;
759 
760     const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
761     const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
762     const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
763     const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
764     const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
765     const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
766 
767     // Determine cost of a single ref frame, where frame types are represented
768     // by a tree:
769     // Level 0: add cost whether this ref is a forward or backward ref
770     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
773     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
774     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
775     ref_costs_single[ALTREF2_FRAME] +=
776         mode_costs->single_ref_cost[ctx_p1][0][1];
777     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
778 
779     // Level 1: if this ref is forward ref,
780     // add cost whether it is last/last2 or last3/golden
781     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
782     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
783     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
784     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
785 
786     // Level 1: if this ref is backward ref
787     // then add cost whether this ref is altref or backward ref
788     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
789     ref_costs_single[ALTREF2_FRAME] +=
790         mode_costs->single_ref_cost[ctx_p2][1][0];
791     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
792 
793     // Level 2: further add cost whether this ref is last or last2
794     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
795     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
796 
797     // Level 2: last3 or golden
798     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
799     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
800 
801     // Level 2: bwdref or altref2
802     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
803     ref_costs_single[ALTREF2_FRAME] +=
804         mode_costs->single_ref_cost[ctx_p6][5][1];
805 
806     if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
807       // Similar to single ref, determine cost of compound ref frames.
808       // cost_compound_refs = cost_first_ref + cost_second_ref
809       const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
810       const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
811       const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
812       const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
813       const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
814 
815       const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
816       unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
817 
818       ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
819           ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
820               base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
821       ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
822       ref_bicomp_costs[ALTREF_FRAME] = 0;
823 
824       // cost of first ref frame
825       ref_bicomp_costs[LAST_FRAME] +=
826           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827       ref_bicomp_costs[LAST2_FRAME] +=
828           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
829       ref_bicomp_costs[LAST3_FRAME] +=
830           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831       ref_bicomp_costs[GOLDEN_FRAME] +=
832           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
833 
834       ref_bicomp_costs[LAST_FRAME] +=
835           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
836       ref_bicomp_costs[LAST2_FRAME] +=
837           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
838 
839       ref_bicomp_costs[LAST3_FRAME] +=
840           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
841       ref_bicomp_costs[GOLDEN_FRAME] +=
842           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
843 
844       // cost of second ref frame
845       ref_bicomp_costs[BWDREF_FRAME] +=
846           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847       ref_bicomp_costs[ALTREF2_FRAME] +=
848           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
849       ref_bicomp_costs[ALTREF_FRAME] +=
850           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
851 
852       ref_bicomp_costs[BWDREF_FRAME] +=
853           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
854       ref_bicomp_costs[ALTREF2_FRAME] +=
855           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
856 
857       // cost: if one ref frame is forward ref, the other ref is backward ref
858       int ref0, ref1;
859       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
860         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
861           ref_costs_comp[ref0][ref1] =
862               ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
863         }
864       }
865 
866       // cost: if both ref frames are the same side.
867       const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
868       const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
869       const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
870       ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
871           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
872           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
873           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
874       ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
875           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
876           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
877           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
878           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
879       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
880           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
881           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
882           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
883           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
884       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
885           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
886           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
887     } else {
888       int ref0, ref1;
889       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
890         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
891           ref_costs_comp[ref0][ref1] = 512;
892       }
893       ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
894       ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
895       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
896       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
897     }
898   }
899 }
900 
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int skippable)901 static AOM_INLINE void store_coding_context(
902 #if CONFIG_INTERNAL_STATS
903     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
904 #else
905     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
906 #endif  // CONFIG_INTERNAL_STATS
907     int skippable) {
908   MACROBLOCKD *const xd = &x->e_mbd;
909 
910   // Take a snapshot of the coding context so it can be
911   // restored if we decide to encode this way
912   ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
913   ctx->skippable = skippable;
914 #if CONFIG_INTERNAL_STATS
915   ctx->best_mode_index = mode_index;
916 #endif  // CONFIG_INTERNAL_STATS
917   ctx->mic = *xd->mi[0];
918   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
919                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
920 }
921 
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])922 static AOM_INLINE void setup_buffer_ref_mvs_inter(
923     const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
924     BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
925   const AV1_COMMON *cm = &cpi->common;
926   const int num_planes = av1_num_planes(cm);
927   const YV12_BUFFER_CONFIG *scaled_ref_frame =
928       av1_get_scaled_ref_frame(cpi, ref_frame);
929   MACROBLOCKD *const xd = &x->e_mbd;
930   MB_MODE_INFO *const mbmi = xd->mi[0];
931   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
932   const struct scale_factors *const sf =
933       get_ref_scale_factors_const(cm, ref_frame);
934   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
935   assert(yv12 != NULL);
936 
937   if (scaled_ref_frame) {
938     // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
939     // support scaling.
940     av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
941                          num_planes);
942   } else {
943     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
944   }
945 
946   // Gets an initial list of candidate vectors from neighbours and orders them
947   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
948                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
949                    mbmi_ext->mode_context);
950   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
951   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
952   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
953   // Further refinement that is encode side only to test the top few candidates
954   // in full and choose the best as the center point for subsequent searches.
955   // The current implementation doesn't support scaling.
956   av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
957               ref_frame, block_size);
958 
959   // Go back to unscaled reference.
960   if (scaled_ref_frame) {
961     // We had temporarily setup pred block based on scaled reference above. Go
962     // back to unscaled reference now, for subsequent use.
963     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
964   }
965 }
966 
967 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
968 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
969 
970 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)971 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
972   const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
973                                      xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
974                                      xd->mb_to_top_edge - LEFT_TOP_MARGIN,
975                                      xd->mb_to_bottom_edge +
976                                          RIGHT_BOTTOM_MARGIN };
977   clamp_mv(mv, &mv_limits);
978 }
979 
980 /* If the current mode shares the same mv with other modes with higher cost,
981  * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)982 static int skip_repeated_mv(const AV1_COMMON *const cm,
983                             const MACROBLOCK *const x,
984                             PREDICTION_MODE this_mode,
985                             const MV_REFERENCE_FRAME ref_frames[2],
986                             InterModeSearchState *search_state) {
987   const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
988   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
989   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
990   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
991   PREDICTION_MODE compare_mode = MB_MODE_COUNT;
992   if (!is_comp_pred) {
993     if (this_mode == NEARMV) {
994       if (ref_mv_count == 0) {
995         // NEARMV has the same motion vector as NEARESTMV
996         compare_mode = NEARESTMV;
997       }
998       if (ref_mv_count == 1 &&
999           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1000         // NEARMV has the same motion vector as GLOBALMV
1001         compare_mode = GLOBALMV;
1002       }
1003     }
1004     if (this_mode == GLOBALMV) {
1005       if (ref_mv_count == 0 &&
1006           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1007         // GLOBALMV has the same motion vector as NEARESTMV
1008         compare_mode = NEARESTMV;
1009       }
1010       if (ref_mv_count == 1) {
1011         // GLOBALMV has the same motion vector as NEARMV
1012         compare_mode = NEARMV;
1013       }
1014     }
1015 
1016     if (compare_mode != MB_MODE_COUNT) {
1017       // Use modelled_rd to check whether compare mode was searched
1018       if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1019           INT64_MAX) {
1020         const int16_t mode_ctx =
1021             av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1022         const int compare_cost =
1023             cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1024         const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1025 
1026         // Only skip if the mode cost is larger than compare mode cost
1027         if (this_cost > compare_cost) {
1028           search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1029               search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1030           return 1;
1031         }
1032       }
1033     }
1034   }
1035   return 0;
1036 }
1037 
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1038 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1039                                      const AV1_COMMON *cm,
1040                                      const MACROBLOCK *x) {
1041   const MACROBLOCKD *const xd = &x->e_mbd;
1042   *out_mv = in_mv;
1043   lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1044                      cm->features.cur_frame_force_integer_mv);
1045   clamp_mv2(&out_mv->as_mv, xd);
1046   return av1_is_fullmv_in_range(&x->mv_limits,
1047                                 get_fullmv_from_mv(&out_mv->as_mv));
1048 }
1049 
1050 // To use single newmv directly for compound modes, need to clamp the mv to the
1051 // valid mv range. Without this, encoder would generate out of range mv, and
1052 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1053 static INLINE void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1054                                      int ref_idx) {
1055   const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1056   SubpelMvLimits mv_limits;
1057 
1058   av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1059   clamp_mv(&mv->as_mv, &mv_limits);
1060 }
1061 
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1062 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1063                             const BLOCK_SIZE bsize, int_mv *cur_mv,
1064                             int *const rate_mv, HandleInterModeArgs *const args,
1065                             inter_mode_info *mode_info) {
1066   MACROBLOCKD *const xd = &x->e_mbd;
1067   MB_MODE_INFO *const mbmi = xd->mi[0];
1068   const int is_comp_pred = has_second_ref(mbmi);
1069   const PREDICTION_MODE this_mode = mbmi->mode;
1070   const int refs[2] = { mbmi->ref_frame[0],
1071                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1072   const int ref_mv_idx = mbmi->ref_mv_idx;
1073 
1074   if (is_comp_pred) {
1075     const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1076     const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1077     if (this_mode == NEW_NEWMV) {
1078       if (valid_mv0) {
1079         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1080         clamp_mv_in_range(x, &cur_mv[0], 0);
1081       }
1082       if (valid_mv1) {
1083         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1084         clamp_mv_in_range(x, &cur_mv[1], 1);
1085       }
1086       *rate_mv = 0;
1087       for (int i = 0; i < 2; ++i) {
1088         const int_mv ref_mv = av1_get_ref_mv(x, i);
1089         *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1090                                     x->mv_costs->nmv_joint_cost,
1091                                     x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1092       }
1093     } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1094       if (valid_mv1) {
1095         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1096         clamp_mv_in_range(x, &cur_mv[1], 1);
1097       }
1098       const int_mv ref_mv = av1_get_ref_mv(x, 1);
1099       *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1100                                  x->mv_costs->nmv_joint_cost,
1101                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1102     } else {
1103       assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1104       if (valid_mv0) {
1105         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1106         clamp_mv_in_range(x, &cur_mv[0], 0);
1107       }
1108       const int_mv ref_mv = av1_get_ref_mv(x, 0);
1109       *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1110                                  x->mv_costs->nmv_joint_cost,
1111                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1112     }
1113   } else {
1114     // Single ref case.
1115     const int ref_idx = 0;
1116     int search_range = INT_MAX;
1117 
1118     if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1119       const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1120       int min_mv_diff = INT_MAX;
1121       int best_match = -1;
1122       MV prev_ref_mv[2] = { { 0 } };
1123       for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1124         prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1125                                                      idx, &x->mbmi_ext)
1126                                .as_mv;
1127         const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1128                                        abs(ref_mv.col - prev_ref_mv[idx].col));
1129 
1130         if (min_mv_diff > ref_mv_diff) {
1131           min_mv_diff = ref_mv_diff;
1132           best_match = idx;
1133         }
1134       }
1135 
1136       if (min_mv_diff < (16 << 3)) {
1137         if (args->single_newmv_valid[best_match][refs[0]]) {
1138           search_range = min_mv_diff;
1139           search_range +=
1140               AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1141                          prev_ref_mv[best_match].row),
1142                      abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1143                          prev_ref_mv[best_match].col));
1144           // Get full pixel search range.
1145           search_range = (search_range + 4) >> 3;
1146         }
1147       }
1148     }
1149 
1150     int_mv best_mv;
1151     av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1152                              mode_info, &best_mv, args);
1153     if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1154 
1155     args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1156     args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1157     args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1158     cur_mv[0].as_int = best_mv.as_int;
1159 
1160     // Return after single_newmv is set.
1161     if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1162   }
1163 
1164   return 0;
1165 }
1166 
update_mode_start_end_index(const AV1_COMP * const cpi,const MB_MODE_INFO * const mbmi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1167 static INLINE void update_mode_start_end_index(
1168     const AV1_COMP *const cpi, const MB_MODE_INFO *const mbmi,
1169     int *mode_index_start, int *mode_index_end, int last_motion_mode_allowed,
1170     int interintra_allowed, int eval_motion_mode) {
1171   *mode_index_start = (int)SIMPLE_TRANSLATION;
1172   *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1173   if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1174     if (!eval_motion_mode) {
1175       *mode_index_end = (int)SIMPLE_TRANSLATION;
1176     } else {
1177       // Set the start index appropriately to process motion modes other than
1178       // simple translation
1179       *mode_index_start = 1;
1180     }
1181   }
1182   if (cpi->sf.inter_sf.extra_prune_warped && mbmi->bsize > BLOCK_16X16)
1183     *mode_index_end = SIMPLE_TRANSLATION;
1184 }
1185 
1186 /*!\brief AV1 motion mode search
1187  *
1188  * \ingroup inter_mode_search
1189  * Function to search over and determine the motion mode. It will update
1190  * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1191  * WARPED_CAUSAL and determine any necessary side information for the selected
1192  * motion mode. It will also perform the full transform search, unless the
1193  * input parameter do_tx_search indicates to do an estimation of the RD rather
1194  * than an RD corresponding to a full transform search. It will return the
1195  * RD for the final motion_mode.
1196  * Do the RD search for a given inter mode and compute all information relevant
1197  * to the input mode. It will compute the best MV,
1198  * compound parameters (if the mode is a compound mode) and interpolation filter
1199  * parameters.
1200  *
1201  * \param[in]     cpi               Top-level encoder structure.
1202  * \param[in]     tile_data         Pointer to struct holding adaptive
1203  *                                  data/contexts/models for the tile during
1204  *                                  encoding.
1205  * \param[in]     x                 Pointer to struct holding all the data for
1206  *                                  the current macroblock.
1207  * \param[in]     bsize             Current block size.
1208  * \param[in,out] rd_stats          Struct to keep track of the overall RD
1209  *                                  information.
1210  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1211  *                                  for only the Y plane.
1212  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1213  *                                  for only the UV planes.
1214  * \param[in]     args              HandleInterModeArgs struct holding
1215  *                                  miscellaneous arguments for inter mode
1216  *                                  search. See the documentation for this
1217  *                                  struct for a description of each member.
1218  * \param[in]     ref_best_rd       Best RD found so far for this block.
1219  *                                  It is used for early termination of this
1220  *                                  search if the RD exceeds this value.
1221  * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1222  *                                  best total RD for a skip mode so far, and
1223  *                                  skip_rd[1] is the best RD for a skip mode so
1224  *                                  far in luma. This is used as a speed feature
1225  *                                  to skip the transform search if the computed
1226  *                                  skip RD for the current mode is not better
1227  *                                  than the best skip_rd so far.
1228  * \param[in,out] rate_mv           The rate associated with the motion vectors.
1229  *                                  This will be modified if a motion search is
1230  *                                  done in the motion mode search.
1231  * \param[in,out] orig_dst          A prediction buffer to hold a computed
1232  *                                  prediction. This will eventually hold the
1233  *                                  final prediction, and the tmp_dst info will
1234  *                                  be copied here.
1235  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1236  *                                  do_tx_search (see below) is 0.
1237  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1238  *                                  a full transform search. This will compute
1239  *                                  an estimated RD for the modes without the
1240  *                                  transform search and later perform the full
1241  *                                  transform search on the best candidates.
1242  * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1243  *                                  information to perform a full transform
1244  *                                  search only on winning candidates searched
1245  *                                  with an estimate for transform coding RD.
1246  * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1247  *                                  motion modes other than SIMPLE_TRANSLATION.
1248  * \param[out]    yrd               Stores the rdcost corresponding to encoding
1249  *                                  the luma plane.
1250  * \return Returns INT64_MAX if the determined motion mode is invalid and the
1251  * current motion mode being tested should be skipped. It returns 0 if the
1252  * motion mode search is a success.
1253  */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1254 static int64_t motion_mode_rd(
1255     const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1256     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1257     RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1258     int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1259     int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1260     int eval_motion_mode, int64_t *yrd) {
1261   const AV1_COMMON *const cm = &cpi->common;
1262   const FeatureFlags *const features = &cm->features;
1263   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1264   const int num_planes = av1_num_planes(cm);
1265   MACROBLOCKD *xd = &x->e_mbd;
1266   MB_MODE_INFO *mbmi = xd->mi[0];
1267   const int is_comp_pred = has_second_ref(mbmi);
1268   const PREDICTION_MODE this_mode = mbmi->mode;
1269   const int rate2_nocoeff = rd_stats->rate;
1270   int best_xskip_txfm = 0;
1271   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1272   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1273   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1274   const int rate_mv0 = *rate_mv;
1275   const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1276                                  is_interintra_allowed(mbmi) &&
1277                                  mbmi->compound_idx;
1278   WARP_SAMPLE_INFO *const warp_sample_info =
1279       &x->warp_sample_info[mbmi->ref_frame[0]];
1280   int *pts0 = warp_sample_info->pts;
1281   int *pts_inref0 = warp_sample_info->pts_inref;
1282 
1283   assert(mbmi->ref_frame[1] != INTRA_FRAME);
1284   const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1285   av1_invalid_rd_stats(&best_rd_stats);
1286   mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1287   MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1288   *yrd = INT64_MAX;
1289   if (features->switchable_motion_mode) {
1290     // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1291     // is allowed.
1292     last_motion_mode_allowed = motion_mode_allowed(
1293         xd->global_motion, xd, mbmi, features->allow_warped_motion);
1294   }
1295 
1296   if (last_motion_mode_allowed == WARPED_CAUSAL) {
1297     // Collect projection samples used in least squares approximation of
1298     // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1299     if (warp_sample_info->num < 0) {
1300       warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1301     }
1302     mbmi->num_proj_ref = warp_sample_info->num;
1303   }
1304   const int total_samples = mbmi->num_proj_ref;
1305   if (total_samples == 0) {
1306     // Do not search WARPED_CAUSAL if there are no samples to use to determine
1307     // warped parameters.
1308     last_motion_mode_allowed = OBMC_CAUSAL;
1309   }
1310 
1311   const MB_MODE_INFO base_mbmi = *mbmi;
1312   MB_MODE_INFO best_mbmi;
1313   const int interp_filter = features->interp_filter;
1314   const int switchable_rate =
1315       av1_is_interp_needed(xd)
1316           ? av1_get_switchable_rate(x, xd, interp_filter,
1317                                     cm->seq_params->enable_dual_filter)
1318           : 0;
1319   int64_t best_rd = INT64_MAX;
1320   int best_rate_mv = rate_mv0;
1321   const int mi_row = xd->mi_row;
1322   const int mi_col = xd->mi_col;
1323   int mode_index_start, mode_index_end;
1324   const int txfm_rd_gate_level =
1325       get_txfm_rd_gate_level(cm->seq_params->enable_masked_compound,
1326                              cpi->sf.inter_sf.txfm_rd_gate_level, bsize,
1327                              TX_SEARCH_MOTION_MODE, eval_motion_mode);
1328 
1329   // Modify the start and end index according to speed features. For example,
1330   // if SIMPLE_TRANSLATION has already been searched according to
1331   // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1332   // to avoid searching it again.
1333   update_mode_start_end_index(cpi, mbmi, &mode_index_start, &mode_index_end,
1334                               last_motion_mode_allowed, interintra_allowed,
1335                               eval_motion_mode);
1336   // Main function loop. This loops over all of the possible motion modes and
1337   // computes RD to determine the best one. This process includes computing
1338   // any necessary side information for the motion mode and performing the
1339   // transform search.
1340   for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1341        mode_index++) {
1342     if (args->skip_motion_mode && mode_index) continue;
1343     int tmp_rate2 = rate2_nocoeff;
1344     const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1345     int tmp_rate_mv = rate_mv0;
1346 
1347     *mbmi = base_mbmi;
1348     if (is_interintra_mode) {
1349       // Only use SIMPLE_TRANSLATION for interintra
1350       mbmi->motion_mode = SIMPLE_TRANSLATION;
1351     } else {
1352       mbmi->motion_mode = (MOTION_MODE)mode_index;
1353       assert(mbmi->ref_frame[1] != INTRA_FRAME);
1354     }
1355 
1356     // Do not search OBMC if the probability of selecting it is below a
1357     // predetermined threshold for this update_type and block size.
1358     const FRAME_UPDATE_TYPE update_type =
1359         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1360     int use_actual_frame_probs = 1;
1361     int prune_obmc;
1362 #if CONFIG_FPMT_TEST
1363     use_actual_frame_probs =
1364         (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
1365     if (!use_actual_frame_probs) {
1366       prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
1367                    cpi->sf.inter_sf.prune_obmc_prob_thresh;
1368     }
1369 #endif
1370     if (use_actual_frame_probs) {
1371       prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1372                    cpi->sf.inter_sf.prune_obmc_prob_thresh;
1373     }
1374     if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1375         mbmi->motion_mode == OBMC_CAUSAL)
1376       continue;
1377 
1378     if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1379       // SIMPLE_TRANSLATION mode: no need to recalculate.
1380       // The prediction is calculated before motion_mode_rd() is called in
1381       // handle_inter_mode()
1382     } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1383       const uint32_t cur_mv = mbmi->mv[0].as_int;
1384       // OBMC_CAUSAL not allowed for compound prediction
1385       assert(!is_comp_pred);
1386       if (have_newmv_in_inter_mode(this_mode)) {
1387         av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1388                                  &mbmi->mv[0], NULL);
1389         tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1390       }
1391       if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1392         // Build the predictor according to the current motion vector if it has
1393         // not already been built
1394         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1395                                       0, av1_num_planes(cm) - 1);
1396       }
1397       // Build the inter predictor by blending the predictor corresponding to
1398       // this MV, and the neighboring blocks using the OBMC model
1399       av1_build_obmc_inter_prediction(
1400           cm, xd, args->above_pred_buf, args->above_pred_stride,
1401           args->left_pred_buf, args->left_pred_stride);
1402 #if !CONFIG_REALTIME_ONLY
1403     } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1404       int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1405       mbmi->motion_mode = WARPED_CAUSAL;
1406       mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1407       mbmi->interp_filters =
1408           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1409 
1410       memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1411       memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1412       // Select the samples according to motion vector difference
1413       if (mbmi->num_proj_ref > 1) {
1414         mbmi->num_proj_ref = av1_selectSamples(
1415             &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1416       }
1417 
1418       // Compute the warped motion parameters with a least squares fit
1419       //  using the collected samples
1420       if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1421                                mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1422                                &mbmi->wm_params, mi_row, mi_col)) {
1423         assert(!is_comp_pred);
1424         if (have_newmv_in_inter_mode(this_mode)) {
1425           // Refine MV for NEWMV mode
1426           const int_mv mv0 = mbmi->mv[0];
1427           const WarpedMotionParams wm_params0 = mbmi->wm_params;
1428           const int num_proj_ref0 = mbmi->num_proj_ref;
1429 
1430           const int_mv ref_mv = av1_get_ref_mv(x, 0);
1431           SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1432           av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1433                                             &ref_mv.as_mv, NULL);
1434 
1435           // Refine MV in a small range.
1436           av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1437                                total_samples, cpi->sf.mv_sf.warp_search_method,
1438                                cpi->sf.mv_sf.warp_search_iters);
1439 
1440           if (mv0.as_int != mbmi->mv[0].as_int) {
1441             // Keep the refined MV and WM parameters.
1442             tmp_rate_mv = av1_mv_bit_cost(
1443                 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1444                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1445             tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1446           } else {
1447             // Restore the old MV and WM parameters.
1448             mbmi->mv[0] = mv0;
1449             mbmi->wm_params = wm_params0;
1450             mbmi->num_proj_ref = num_proj_ref0;
1451           }
1452         }
1453 
1454         // Build the warped predictor
1455         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1456                                       av1_num_planes(cm) - 1);
1457       } else {
1458         continue;
1459       }
1460 #endif  // !CONFIG_REALTIME_ONLY
1461     } else if (is_interintra_mode) {
1462       const int ret =
1463           av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1464                                       &tmp_rate_mv, &tmp_rate2, orig_dst);
1465       if (ret < 0) continue;
1466     }
1467 
1468     // If we are searching newmv and the mv is the same as refmv, skip the
1469     // current mode
1470     if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1471 
1472     // Update rd_stats for the current motion mode
1473     txfm_info->skip_txfm = 0;
1474     rd_stats->dist = 0;
1475     rd_stats->sse = 0;
1476     rd_stats->skip_txfm = 1;
1477     rd_stats->rate = tmp_rate2;
1478     const ModeCosts *mode_costs = &x->mode_costs;
1479     if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1480     if (interintra_allowed) {
1481       rd_stats->rate +=
1482           mode_costs->interintra_cost[size_group_lookup[bsize]]
1483                                      [mbmi->ref_frame[1] == INTRA_FRAME];
1484     }
1485     if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1486         (mbmi->ref_frame[1] != INTRA_FRAME)) {
1487       if (last_motion_mode_allowed == WARPED_CAUSAL) {
1488         rd_stats->rate +=
1489             mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1490       } else {
1491         rd_stats->rate +=
1492             mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1493       }
1494     }
1495 
1496     int64_t this_yrd = INT64_MAX;
1497 
1498     if (!do_tx_search) {
1499       // Avoid doing a transform search here to speed up the overall mode
1500       // search. It will be done later in the mode search if the current
1501       // motion mode seems promising.
1502       int64_t curr_sse = -1;
1503       int64_t sse_y = -1;
1504       int est_residue_cost = 0;
1505       int64_t est_dist = 0;
1506       int64_t est_rd = 0;
1507       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1508         curr_sse = get_sse(cpi, x, &sse_y);
1509         const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1510                                                  &est_residue_cost, &est_dist);
1511         (void)has_est_rd;
1512         assert(has_est_rd);
1513       } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1514                  cpi->sf.rt_sf.use_nonrd_pick_mode) {
1515         model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1516             cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1517             NULL, &curr_sse, NULL, NULL, NULL);
1518         sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1519       }
1520       est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1521       if (est_rd * 0.80 > *best_est_rd) {
1522         mbmi->ref_frame[1] = ref_frame_1;
1523         continue;
1524       }
1525       const int mode_rate = rd_stats->rate;
1526       rd_stats->rate += est_residue_cost;
1527       rd_stats->dist = est_dist;
1528       rd_stats->rdcost = est_rd;
1529       if (rd_stats->rdcost < *best_est_rd) {
1530         *best_est_rd = rd_stats->rdcost;
1531         assert(sse_y >= 0);
1532         ref_skip_rd[1] = txfm_rd_gate_level
1533                              ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1534                              : INT64_MAX;
1535       }
1536       if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1537         if (!is_comp_pred) {
1538           assert(curr_sse >= 0);
1539           inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1540                                 rd_stats->rdcost, rd_stats, rd_stats_y,
1541                                 rd_stats_uv, mbmi);
1542         }
1543       } else {
1544         assert(curr_sse >= 0);
1545         inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1546                               rd_stats->rdcost, rd_stats, rd_stats_y,
1547                               rd_stats_uv, mbmi);
1548       }
1549       mbmi->skip_txfm = 0;
1550     } else {
1551       // Perform full transform search
1552       int64_t skip_rd = INT64_MAX;
1553       int64_t skip_rdy = INT64_MAX;
1554       if (txfm_rd_gate_level) {
1555         // Check if the mode is good enough based on skip RD
1556         int64_t sse_y = INT64_MAX;
1557         int64_t curr_sse = get_sse(cpi, x, &sse_y);
1558         skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1559         skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1560         int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1561                                         txfm_rd_gate_level, 0);
1562         if (!eval_txfm) continue;
1563       }
1564 
1565       // Do transform search
1566       const int mode_rate = rd_stats->rate;
1567       if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1568                            rd_stats->rate, ref_best_rd)) {
1569         if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1570           return INT64_MAX;
1571         }
1572         continue;
1573       }
1574       const int skip_ctx = av1_get_skip_txfm_context(xd);
1575       const int y_rate =
1576           rd_stats->skip_txfm
1577               ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1578               : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1579       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1580 
1581       const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1582       if (curr_rd < ref_best_rd) {
1583         ref_best_rd = curr_rd;
1584         ref_skip_rd[0] = skip_rd;
1585         ref_skip_rd[1] = skip_rdy;
1586       }
1587       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1588         inter_mode_data_push(
1589             tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1590             rd_stats_y->rate + rd_stats_uv->rate +
1591                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1592       }
1593     }
1594 
1595     if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1596       if (is_nontrans_global_motion(xd, xd->mi[0])) {
1597         mbmi->interp_filters =
1598             av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1599       }
1600     }
1601 
1602     const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1603     if (mode_index == 0) {
1604       args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1605     }
1606     if (mode_index == 0 || tmp_rd < best_rd) {
1607       // Update best_rd data if this is the best motion mode so far
1608       best_mbmi = *mbmi;
1609       best_rd = tmp_rd;
1610       best_rd_stats = *rd_stats;
1611       best_rd_stats_y = *rd_stats_y;
1612       best_rate_mv = tmp_rate_mv;
1613       *yrd = this_yrd;
1614       if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1615       memcpy(best_blk_skip, txfm_info->blk_skip,
1616              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1617       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1618       best_xskip_txfm = mbmi->skip_txfm;
1619     }
1620   }
1621   // Update RD and mbmi stats for selected motion mode
1622   mbmi->ref_frame[1] = ref_frame_1;
1623   *rate_mv = best_rate_mv;
1624   if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1625     av1_invalid_rd_stats(rd_stats);
1626     restore_dst_buf(xd, *orig_dst, num_planes);
1627     return INT64_MAX;
1628   }
1629   *mbmi = best_mbmi;
1630   *rd_stats = best_rd_stats;
1631   *rd_stats_y = best_rd_stats_y;
1632   if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1633   memcpy(txfm_info->blk_skip, best_blk_skip,
1634          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1635   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1636   txfm_info->skip_txfm = best_xskip_txfm;
1637 
1638   restore_dst_buf(xd, *orig_dst, num_planes);
1639   return 0;
1640 }
1641 
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t best_rd)1642 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1643                             MACROBLOCK *const x, BLOCK_SIZE bsize,
1644                             const BUFFER_SET *const orig_dst, int64_t best_rd) {
1645   assert(bsize < BLOCK_SIZES_ALL);
1646   const AV1_COMMON *cm = &cpi->common;
1647   const int num_planes = av1_num_planes(cm);
1648   MACROBLOCKD *const xd = &x->e_mbd;
1649   const int mi_row = xd->mi_row;
1650   const int mi_col = xd->mi_col;
1651   int64_t total_sse = 0;
1652   int64_t this_rd = INT64_MAX;
1653   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1654   rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1655 
1656   for (int plane = 0; plane < num_planes; ++plane) {
1657     // Call av1_enc_build_inter_predictor() for one plane at a time.
1658     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1659                                   plane, plane);
1660     const struct macroblockd_plane *const pd = &xd->plane[plane];
1661     const BLOCK_SIZE plane_bsize =
1662         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1663 
1664     av1_subtract_plane(x, plane_bsize, plane);
1665 
1666     int64_t sse =
1667         av1_pixel_diff_dist(x, plane, 0, 0, plane_bsize, plane_bsize, NULL);
1668     if (is_cur_buf_hbd(xd)) sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1669     sse <<= 4;
1670     total_sse += sse;
1671     // When current rd cost is more than the best rd, skip evaluation of
1672     // remaining planes.
1673     this_rd = RDCOST(x->rdmult, rd_stats->rate, total_sse);
1674     if (this_rd > best_rd) break;
1675   }
1676 
1677   rd_stats->dist = rd_stats->sse = total_sse;
1678   rd_stats->rdcost = this_rd;
1679 
1680   restore_dst_buf(xd, *orig_dst, num_planes);
1681   return 0;
1682 }
1683 
1684 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1685 // mode
1686 // Note(rachelbarker): This speed feature currently does not interact correctly
1687 // with global motion. The issue is that, when global motion is used, GLOBALMV
1688 // produces a different prediction to NEARESTMV/NEARMV even if the motion
1689 // vectors are the same. Thus GLOBALMV should not be pruned in this case.
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1690 static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1691                                       int ref_idx,
1692                                       const MV_REFERENCE_FRAME *ref_frame,
1693                                       PREDICTION_MODE single_mode) {
1694   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1695   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1696   assert(single_mode != NEWMV);
1697   if (single_mode == NEARESTMV) {
1698     return 0;
1699   } else if (single_mode == NEARMV) {
1700     // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1701     // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1702     if (ref_mv_count < 2) return 1;
1703   } else if (single_mode == GLOBALMV) {
1704     // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1705     if (ref_mv_count == 0) return 1;
1706     // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1707     else if (ref_mv_count == 1)
1708       return 0;
1709 
1710     int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1711     // Check GLOBALMV is matching with any mv in ref_mv_stack
1712     for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1713       int_mv this_mv;
1714 
1715       if (ref_idx == 0)
1716         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1717       else
1718         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1719 
1720       if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1721         return 1;
1722     }
1723   }
1724   return 0;
1725 }
1726 
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1727 static INLINE int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1728                               int ref_idx, int ref_mv_idx,
1729                               int skip_repeated_ref_mv,
1730                               const MV_REFERENCE_FRAME *ref_frame,
1731                               const MB_MODE_INFO_EXT *mbmi_ext) {
1732   const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1733   assert(is_inter_singleref_mode(single_mode));
1734   if (single_mode == NEWMV) {
1735     this_mv->as_int = INVALID_MV;
1736   } else if (single_mode == GLOBALMV) {
1737     if (skip_repeated_ref_mv &&
1738         check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1739       return 0;
1740     *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1741   } else {
1742     assert(single_mode == NEARMV || single_mode == NEARESTMV);
1743     const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1744     const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1745     if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1746       assert(ref_mv_offset >= 0);
1747       if (ref_idx == 0) {
1748         *this_mv =
1749             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1750       } else {
1751         *this_mv =
1752             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1753       }
1754     } else {
1755       if (skip_repeated_ref_mv &&
1756           check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1757         return 0;
1758       *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1759     }
1760   }
1761   return 1;
1762 }
1763 
1764 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1765 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type,PREDICTION_MODE best_mode)1766 static INLINE int skip_nearest_near_mv_using_refmv_weight(
1767     const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1768     const int8_t ref_frame_type, PREDICTION_MODE best_mode) {
1769   if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1770   // Do not skip the mode if the current block has not yet obtained a valid
1771   // inter mode.
1772   if (!is_inter_mode(best_mode)) return 0;
1773 
1774   const MACROBLOCKD *xd = &x->e_mbd;
1775   // Do not skip the mode if both the top and left neighboring blocks are not
1776   // available.
1777   if (!xd->left_available || !xd->up_available) return 0;
1778   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1779   const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1780   const int ref_mv_count =
1781       AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1782 
1783   if (ref_mv_count == 0) return 0;
1784   // If ref mv list has at least one nearest candidate do not prune NEARESTMV
1785   if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1786 
1787   // Count number of ref mvs populated from nearest candidates
1788   int nearest_refmv_count = 0;
1789   for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1790     if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1791   }
1792 
1793   // nearest_refmv_count indicates the closeness of block motion characteristics
1794   // with respect to its spatial neighbor. Smaller value of nearest_refmv_count
1795   // w.r.t to ref_mv_count means less correlation with its spatial neighbors.
1796   // Hence less possibility for NEARESTMV and NEARMV modes becoming the best
1797   // mode since these modes work well for blocks that shares similar motion
1798   // characteristics with its neighbor. Thus, NEARMV mode is pruned when
1799   // nearest_refmv_count is relatively smaller than ref_mv_count and NEARESTMV
1800   // mode is pruned if none of the ref mvs are populated from nearest candidate.
1801   const int prune_thresh = 1 + (ref_mv_count >= 2);
1802   if (nearest_refmv_count < prune_thresh) return 1;
1803   return 0;
1804 }
1805 
1806 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1807 static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1808                                const AV1_COMMON *cm, const MACROBLOCK *x,
1809                                int skip_repeated_ref_mv) {
1810   const MACROBLOCKD *xd = &x->e_mbd;
1811   const MB_MODE_INFO *mbmi = xd->mi[0];
1812   const int is_comp_pred = has_second_ref(mbmi);
1813 
1814   int ret = 1;
1815   for (int i = 0; i < is_comp_pred + 1; ++i) {
1816     int_mv this_mv;
1817     this_mv.as_int = INVALID_MV;
1818     ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1819                       skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1820     if (!ret) return 0;
1821     const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1822     if (single_mode == NEWMV) {
1823       const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1824       cur_mv[i] =
1825           (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1826                          .this_mv
1827                    : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1828                          .comp_mv;
1829     } else {
1830       ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1831     }
1832   }
1833   return ret;
1834 }
1835 
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1836 static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
1837                                const MB_MODE_INFO_EXT *mbmi_ext,
1838                                const int (*const drl_mode_cost0)[2],
1839                                int8_t ref_frame_type) {
1840   int cost = 0;
1841   if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1842     for (int idx = 0; idx < 2; ++idx) {
1843       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1844         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1845         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1846         if (mbmi->ref_mv_idx == idx) return cost;
1847       }
1848     }
1849     return cost;
1850   }
1851 
1852   if (have_nearmv_in_inter_mode(mbmi->mode)) {
1853     for (int idx = 1; idx < 3; ++idx) {
1854       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1855         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1856         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1857         if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1858       }
1859     }
1860     return cost;
1861   }
1862   return cost;
1863 }
1864 
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1865 static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
1866                                         const MB_MODE_INFO *const mbmi,
1867                                         PREDICTION_MODE this_mode) {
1868   for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1869     const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1870     const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1871     if (single_mode == NEWMV &&
1872         args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1873       return 0;
1874     }
1875   }
1876   return 1;
1877 }
1878 
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1879 static int get_drl_refmv_count(const MACROBLOCK *const x,
1880                                const MV_REFERENCE_FRAME *ref_frame,
1881                                PREDICTION_MODE mode) {
1882   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1883   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1884   const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1885   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1886   const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1887   const int has_drl =
1888       (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1889   const int ref_set =
1890       has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1891 
1892   return ref_set;
1893 }
1894 
1895 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1896 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1897                                          const int qindex,
1898                                          const int ref_mv_idx) {
1899   if (reduce_inter_modes >= 3) return 1;
1900   // Q-index logic based pruning is enabled only for
1901   // reduce_inter_modes = 2.
1902   assert(reduce_inter_modes == 2);
1903   // When reduce_inter_modes=2, pruning happens as below based on q index.
1904   // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1905   // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1906   // For q index range between 171 and 255: no pruning.
1907   const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1908   return (ref_mv_idx >= min_prune_ref_mv_idx);
1909 }
1910 
1911 // Whether this reference motion vector can be skipped, based on initial
1912 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1913 static bool ref_mv_idx_early_breakout(
1914     const SPEED_FEATURES *const sf,
1915     const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1916     const HandleInterModeArgs *const args, int64_t ref_best_rd,
1917     int ref_mv_idx) {
1918   MACROBLOCKD *xd = &x->e_mbd;
1919   MB_MODE_INFO *mbmi = xd->mi[0];
1920   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1921   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1922   const int is_comp_pred = has_second_ref(mbmi);
1923   if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1924     if (mbmi->ref_frame[0] == LAST2_FRAME ||
1925         mbmi->ref_frame[0] == LAST3_FRAME ||
1926         mbmi->ref_frame[1] == LAST2_FRAME ||
1927         mbmi->ref_frame[1] == LAST3_FRAME) {
1928       const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1929       if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1930           REF_CAT_LEVEL) {
1931         return true;
1932       }
1933     }
1934     // TODO(any): Experiment with reduce_inter_modes for compound prediction
1935     if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1936         have_newmv_in_inter_mode(mbmi->mode)) {
1937       if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1938           mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1939         const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1940         const int do_prune = prune_ref_mv_idx_using_qindex(
1941             sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1942         if (do_prune &&
1943             (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1944              REF_CAT_LEVEL)) {
1945           return true;
1946         }
1947       }
1948     }
1949   }
1950 
1951   mbmi->ref_mv_idx = ref_mv_idx;
1952   if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1953     return true;
1954   }
1955   size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1956   const int drl_cost = get_drl_cost(
1957       mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1958   est_rd_rate += drl_cost;
1959   if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1960       mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1961     return true;
1962   }
1963   return false;
1964 }
1965 
1966 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1967 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1968                                           RD_STATS *rd_stats,
1969                                           HandleInterModeArgs *args,
1970                                           int ref_mv_idx, int64_t ref_best_rd,
1971                                           BLOCK_SIZE bsize) {
1972   MACROBLOCKD *xd = &x->e_mbd;
1973   MB_MODE_INFO *mbmi = xd->mi[0];
1974   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1975   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1976   const AV1_COMMON *cm = &cpi->common;
1977   const int is_comp_pred = has_second_ref(mbmi);
1978   const ModeCosts *mode_costs = &x->mode_costs;
1979 
1980   struct macroblockd_plane *p = xd->plane;
1981   const BUFFER_SET orig_dst = {
1982     { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1983     { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1984   };
1985   av1_init_rd_stats(rd_stats);
1986 
1987   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1988   mbmi->comp_group_idx = 0;
1989   mbmi->compound_idx = 1;
1990   if (mbmi->ref_frame[1] == INTRA_FRAME) {
1991     mbmi->ref_frame[1] = NONE_FRAME;
1992   }
1993   int16_t mode_ctx =
1994       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1995 
1996   mbmi->num_proj_ref = 0;
1997   mbmi->motion_mode = SIMPLE_TRANSLATION;
1998   mbmi->ref_mv_idx = ref_mv_idx;
1999 
2000   rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
2001   const int drl_cost =
2002       get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2003   rd_stats->rate += drl_cost;
2004 
2005   int_mv cur_mv[2];
2006   if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
2007     return INT64_MAX;
2008   }
2009   assert(have_nearmv_in_inter_mode(mbmi->mode));
2010   for (int i = 0; i < is_comp_pred + 1; ++i) {
2011     mbmi->mv[i].as_int = cur_mv[i].as_int;
2012   }
2013   const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
2014   rd_stats->rate += ref_mv_cost;
2015 
2016   if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
2017     return INT64_MAX;
2018   }
2019 
2020   mbmi->motion_mode = SIMPLE_TRANSLATION;
2021   mbmi->num_proj_ref = 0;
2022   if (is_comp_pred) {
2023     // Only compound_average
2024     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2025     mbmi->comp_group_idx = 0;
2026     mbmi->compound_idx = 1;
2027   }
2028   set_default_interp_filters(mbmi, cm->features.interp_filter);
2029 
2030   const int mi_row = xd->mi_row;
2031   const int mi_col = xd->mi_col;
2032   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
2033                                 AOM_PLANE_Y, AOM_PLANE_Y);
2034   int est_rate;
2035   int64_t est_dist;
2036   model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2037                                   NULL, NULL, NULL, NULL, NULL);
2038   return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2039 }
2040 
2041 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2042 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2043 // it is included.
mask_set_bit(int * mask,int index)2044 static INLINE void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2045 
mask_check_bit(int mask,int index)2046 static INLINE bool mask_check_bit(int mask, int index) {
2047   return (mask >> index) & 0x1;
2048 }
2049 
2050 // Before performing the full MV search in handle_inter_mode, do a simple
2051 // translation search and see if we can eliminate any motion vectors.
2052 // Returns an integer where, if the i-th bit is set, it means that the i-th
2053 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2054 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2055                                 RD_STATS *rd_stats,
2056                                 HandleInterModeArgs *const args,
2057                                 int64_t ref_best_rd, BLOCK_SIZE bsize,
2058                                 const int ref_set) {
2059   // If the number of ref mv count is equal to 1, do not prune the same. It
2060   // is better to evaluate the same than to prune it.
2061   if (ref_set == 1) return 1;
2062   AV1_COMMON *const cm = &cpi->common;
2063   const MACROBLOCKD *const xd = &x->e_mbd;
2064   const MB_MODE_INFO *const mbmi = xd->mi[0];
2065   const PREDICTION_MODE this_mode = mbmi->mode;
2066 
2067   // Only search indices if they have some chance of being good.
2068   int good_indices = 0;
2069   for (int i = 0; i < ref_set; ++i) {
2070     if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2071                                   ref_best_rd, i)) {
2072       continue;
2073     }
2074     mask_set_bit(&good_indices, i);
2075   }
2076 
2077   // Only prune in NEARMV mode, if the speed feature is set, and the block size
2078   // is large enough. If these conditions are not met, return all good indices
2079   // found so far.
2080   if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2081     return good_indices;
2082   if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2083   if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2084   // Do not prune when there is internal resizing. TODO(elliottk) fix this
2085   // so b/2384 can be resolved.
2086   if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2087       (mbmi->ref_frame[1] > 0 &&
2088        av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2089     return good_indices;
2090   }
2091 
2092   // Calculate the RD cost for the motion vectors using simple translation.
2093   int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2094   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2095     // If this index is bad, ignore it.
2096     if (!mask_check_bit(good_indices, ref_mv_idx)) {
2097       continue;
2098     }
2099     idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2100         cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2101   }
2102   // Find the index with the best RD cost.
2103   int best_idx = 0;
2104   for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2105     if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2106       best_idx = i;
2107     }
2108   }
2109   // Only include indices that are good and within a % of the best.
2110   const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2111   // If the simple translation cost is not within this multiple of the
2112   // best RD, skip it. Note that the cutoff is derived experimentally.
2113   const double ref_dth = 5;
2114   int result = 0;
2115   for (int i = 0; i < ref_set; ++i) {
2116     if (mask_check_bit(good_indices, i) &&
2117         (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2118         (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2119       mask_set_bit(&result, i);
2120     }
2121   }
2122   return result;
2123 }
2124 
2125 /*!\brief Motion mode information for inter mode search speedup.
2126  *
2127  * Used in a speed feature to search motion modes other than
2128  * SIMPLE_TRANSLATION only on winning candidates.
2129  */
2130 typedef struct motion_mode_candidate {
2131   /*!
2132    * Mode info for the motion mode candidate.
2133    */
2134   MB_MODE_INFO mbmi;
2135   /*!
2136    * Rate describing the cost of the motion vectors for this candidate.
2137    */
2138   int rate_mv;
2139   /*!
2140    * Rate before motion mode search and transform coding is applied.
2141    */
2142   int rate2_nocoeff;
2143   /*!
2144    * An integer value 0 or 1 which indicates whether or not to skip the motion
2145    * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2146    * candidate.
2147    */
2148   int skip_motion_mode;
2149   /*!
2150    * Total RD cost for this candidate.
2151    */
2152   int64_t rd_cost;
2153 } motion_mode_candidate;
2154 
2155 /*!\cond */
2156 typedef struct motion_mode_best_st_candidate {
2157   motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2158   int num_motion_mode_cand;
2159 } motion_mode_best_st_candidate;
2160 
2161 // Checks if the current reference frame matches with neighbouring block's
2162 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2163 static AOM_INLINE int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2164                                                    MB_MODE_INFO *nb_mbmi) {
2165   MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2166                                           nb_mbmi->ref_frame[1] };
2167   MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2168                                            cur_mbmi->ref_frame[1] };
2169   const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2170   int match_found = 0;
2171 
2172   for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2173     if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2174         (cur_ref_frames[i] == nb_ref_frames[1]))
2175       match_found = 1;
2176   }
2177   return match_found;
2178 }
2179 
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2180 static AOM_INLINE int find_ref_match_in_above_nbs(const int total_mi_cols,
2181                                                   MACROBLOCKD *xd) {
2182   if (!xd->up_available) return 1;
2183   const int mi_col = xd->mi_col;
2184   MB_MODE_INFO **cur_mbmi = xd->mi;
2185   // prev_row_mi points into the mi array, starting at the beginning of the
2186   // previous row.
2187   MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2188   const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2189   uint8_t mi_step;
2190   for (int above_mi_col = mi_col; above_mi_col < end_col;
2191        above_mi_col += mi_step) {
2192     MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2193     mi_step = mi_size_wide[above_mi[0]->bsize];
2194     int match_found = 0;
2195     if (is_inter_block(*above_mi))
2196       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2197     if (match_found) return 1;
2198   }
2199   return 0;
2200 }
2201 
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2202 static AOM_INLINE int find_ref_match_in_left_nbs(const int total_mi_rows,
2203                                                  MACROBLOCKD *xd) {
2204   if (!xd->left_available) return 1;
2205   const int mi_row = xd->mi_row;
2206   MB_MODE_INFO **cur_mbmi = xd->mi;
2207   // prev_col_mi points into the mi array, starting at the top of the
2208   // previous column
2209   MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2210   const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2211   uint8_t mi_step;
2212   for (int left_mi_row = mi_row; left_mi_row < end_row;
2213        left_mi_row += mi_step) {
2214     MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2215     mi_step = mi_size_high[left_mi[0]->bsize];
2216     int match_found = 0;
2217     if (is_inter_block(*left_mi))
2218       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2219     if (match_found) return 1;
2220   }
2221   return 0;
2222 }
2223 /*!\endcond */
2224 
2225 /*! \brief Struct used to hold TPL data to
2226  * narrow down parts of the inter mode search.
2227  */
2228 typedef struct {
2229   /*!
2230    * The best inter cost out of all of the reference frames.
2231    */
2232   int64_t best_inter_cost;
2233   /*!
2234    * The inter cost for each reference frame.
2235    */
2236   int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2237 } PruneInfoFromTpl;
2238 
2239 #if !CONFIG_REALTIME_ONLY
2240 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2241 static AOM_INLINE void get_block_level_tpl_stats(
2242     AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2243     PruneInfoFromTpl *inter_cost_info_from_tpl) {
2244   AV1_COMMON *const cm = &cpi->common;
2245 
2246   assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2247                  cpi->gf_frame_index < cpi->ppi->gf_group.size));
2248   const int tpl_idx = cpi->gf_frame_index;
2249   TplParams *const tpl_data = &cpi->ppi->tpl_data;
2250   if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2251   const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2252   const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2253   const int mi_wide = mi_size_wide[bsize];
2254   const int mi_high = mi_size_high[bsize];
2255   const int tpl_stride = tpl_frame->stride;
2256   const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2257   const int mi_col_sr =
2258       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2259   const int mi_col_end_sr =
2260       coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2261   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2262 
2263   const int row_step = step;
2264   const int col_step_sr =
2265       coded_to_superres_mi(step, cm->superres_scale_denominator);
2266   for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2267        row += row_step) {
2268     for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2269          col += col_step_sr) {
2270       const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2271           row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2272 
2273       // Sums up the inter cost of corresponding ref frames
2274       for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2275         inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2276             this_stats->pred_error[ref_idx];
2277       }
2278     }
2279   }
2280 
2281   // Computes the best inter cost (minimum inter_cost)
2282   int64_t best_inter_cost = INT64_MAX;
2283   for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2284     const int64_t cur_inter_cost =
2285         inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2286     // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2287     // calculating the minimum inter_cost
2288     if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2289         valid_refs[ref_idx])
2290       best_inter_cost = cur_inter_cost;
2291   }
2292   inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2293 }
2294 #endif
2295 
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2296 static AOM_INLINE int prune_modes_based_on_tpl_stats(
2297     PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2298     const PREDICTION_MODE this_mode, int prune_mode_level) {
2299   const int have_newmv = have_newmv_in_inter_mode(this_mode);
2300   if ((prune_mode_level < 2) && have_newmv) return 0;
2301 
2302   const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2303   if (best_inter_cost == INT64_MAX) return 0;
2304 
2305   const int prune_level = prune_mode_level - 1;
2306   int64_t cur_inter_cost;
2307 
2308   const int is_globalmv =
2309       (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2310   const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2311 
2312   // Thresholds used for pruning:
2313   // Lower value indicates aggressive pruning and higher value indicates
2314   // conservative pruning which is set based on ref_mv_idx and speed feature.
2315   // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2316   // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2317   static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2318     { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2319   };
2320 
2321   const int is_comp_pred = (refs[1] > INTRA_FRAME);
2322   if (!is_comp_pred) {
2323     cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2324   } else {
2325     const int64_t inter_cost_ref0 =
2326         inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2327     const int64_t inter_cost_ref1 =
2328         inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2329     // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2330     // more aggressive pruning
2331     cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2332   }
2333 
2334   // Prune the mode if cur_inter_cost is greater than threshold times
2335   // best_inter_cost
2336   if (cur_inter_cost >
2337       ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2338         best_inter_cost) >>
2339        2))
2340     return 1;
2341   return 0;
2342 }
2343 
2344 /*!\brief High level function to select parameters for compound mode.
2345  *
2346  * \ingroup inter_mode_search
2347  * The main search functionality is done in the call to av1_compound_type_rd().
2348  *
2349  * \param[in]     cpi               Top-level encoder structure.
2350  * \param[in]     x                 Pointer to struct holding all the data for
2351  *                                  the current macroblock.
2352  * \param[in]     args              HandleInterModeArgs struct holding
2353  *                                  miscellaneous arguments for inter mode
2354  *                                  search. See the documentation for this
2355  *                                  struct for a description of each member.
2356  * \param[in]     ref_best_rd       Best RD found so far for this block.
2357  *                                  It is used for early termination of this
2358  *                                  search if the RD exceeds this value.
2359  * \param[in,out] cur_mv            Current motion vector.
2360  * \param[in]     bsize             Current block size.
2361  * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2362                                     compound mode.
2363  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2364  *                                  allocated buffers for the compound
2365  *                                  predictors and masks in the compound type
2366  *                                  search.
2367  * \param[in,out] orig_dst          A prediction buffer to hold a computed
2368  *                                  prediction. This will eventually hold the
2369  *                                  final prediction, and the tmp_dst info will
2370  *                                  be copied here.
2371  * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2372  *                                  computed prediction.
2373  * \param[in,out] rate_mv           The rate associated with the motion vectors.
2374  *                                  This will be modified if a motion search is
2375  *                                  done in the motion mode search.
2376  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2377  *                                  information.
2378  * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2379  *                                  best total RD for a skip mode so far, and
2380  *                                  skip_rd[1] is the best RD for a skip mode so
2381  *                                  far in luma. This is used as a speed feature
2382  *                                  to skip the transform search if the computed
2383  *                                  skip RD for the current mode is not better
2384  *                                  than the best skip_rd so far.
2385  * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2386  *                                  predictor. If this is 0, the inter predictor
2387  *                                  has already been built and thus we can avoid
2388  *                                  repeating computation.
2389  * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2390  * a viable candidate.
2391  */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2392 static int process_compound_inter_mode(
2393     AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2394     int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2395     int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2396     const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2397     RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2398   MACROBLOCKD *xd = &x->e_mbd;
2399   MB_MODE_INFO *mbmi = xd->mi[0];
2400   const AV1_COMMON *cm = &cpi->common;
2401   const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2402                                    cm->seq_params->enable_masked_compound;
2403   int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2404                          (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2405 
2406   const int num_planes = av1_num_planes(cm);
2407   const int mi_row = xd->mi_row;
2408   const int mi_col = xd->mi_col;
2409   int is_luma_interp_done = 0;
2410   set_default_interp_filters(mbmi, cm->features.interp_filter);
2411 
2412   int64_t best_rd_compound;
2413   int64_t rd_thresh;
2414   const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2415   const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2416   rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2417                                          comp_type_rd_scale);
2418   // Select compound type and any parameters related to that type
2419   // (for example, the mask parameters if it is a masked mode) and compute
2420   // the RD
2421   *compmode_interinter_cost = av1_compound_type_rd(
2422       cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2423       orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2424       ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2425   if (ref_best_rd < INT64_MAX &&
2426       (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2427           ref_best_rd) {
2428     restore_dst_buf(xd, *orig_dst, num_planes);
2429     return 1;
2430   }
2431 
2432   // Build only uv predictor for COMPOUND_AVERAGE.
2433   // Note there is no need to call av1_enc_build_inter_predictor
2434   // for luma if COMPOUND_AVERAGE is selected because it is the first
2435   // candidate in av1_compound_type_rd, which means it used the dst_buf
2436   // rather than the tmp_buf.
2437   if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2438     if (num_planes > 1) {
2439       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2440                                     AOM_PLANE_U, num_planes - 1);
2441     }
2442     *skip_build_pred = 1;
2443   }
2444   return 0;
2445 }
2446 
2447 // Speed feature to prune out MVs that are similar to previous MVs if they
2448 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2449 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2450                                    int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2451                                    MB_MODE_INFO *mbmi, int pruning_factor) {
2452   int i;
2453   const int is_comp_pred = has_second_ref(mbmi);
2454   const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2455 
2456   // Skip the evaluation if an MV match is found.
2457   if (ref_mv_idx > 0) {
2458     for (int idx = 0; idx < ref_mv_idx; ++idx) {
2459       if (save_mv[idx][0].as_int == INVALID_MV) continue;
2460 
2461       int mv_diff = 0;
2462       for (i = 0; i < 1 + is_comp_pred; ++i) {
2463         mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2464                    abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2465       }
2466 
2467       // If this mode is not the best one, and current MV is similar to
2468       // previous stored MV, terminate this ref_mv_idx evaluation.
2469       if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2470     }
2471   }
2472 
2473   if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2474     for (i = 0; i < is_comp_pred + 1; ++i)
2475       save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2476   }
2477 
2478   return 0;
2479 }
2480 
2481 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2482  *
2483  * \ingroup inter_mode_search
2484  *
2485  * Compares the sse of zero mv and the best sse found in single new_mv. If the
2486  * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2487  * Else returns 0.
2488  *
2489  * Note that the sse of here comes from single_motion_search. So it is
2490  * interpolated with the filter in motion search, not the actual interpolation
2491  * filter used in encoding.
2492  *
2493  * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2494  * \param[in]     x                 Pointer to struct holding all the data for
2495  *                                  the current macroblock.
2496  * \param[in]     bsize             The current block_size.
2497  * \param[in]     args              The args to handle_inter_mode, used to track
2498  *                                  the best SSE.
2499  * \param[in]    prune_zero_mv_with_sse  The argument holds speed feature
2500  *                                       prune_zero_mv_with_sse value
2501  * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2502  */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args,int prune_zero_mv_with_sse)2503 static AOM_INLINE int prune_zero_mv_with_sse(
2504     const aom_variance_fn_ptr_t *fn_ptr, const MACROBLOCK *x, BLOCK_SIZE bsize,
2505     const HandleInterModeArgs *args, int prune_zero_mv_with_sse) {
2506   const MACROBLOCKD *xd = &x->e_mbd;
2507   const MB_MODE_INFO *mbmi = xd->mi[0];
2508 
2509   const int is_comp_pred = has_second_ref(mbmi);
2510   const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2511 
2512   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2513     if (xd->global_motion[refs[idx]].wmtype != IDENTITY) {
2514       // Pruning logic only works for IDENTITY type models
2515       // Note: In theory we could apply similar logic for TRANSLATION
2516       // type models, but we do not code these due to a spec bug
2517       // (see comments in gm_get_motion_vector() in av1/common/mv.h)
2518       assert(xd->global_motion[refs[idx]].wmtype != TRANSLATION);
2519       return 0;
2520     }
2521 
2522     // Don't prune if we have invalid data
2523     assert(mbmi->mv[idx].as_int == 0);
2524     if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2525       return 0;
2526     }
2527   }
2528 
2529   // Sum up the sse of ZEROMV and best NEWMV
2530   unsigned int this_sse_sum = 0;
2531   unsigned int best_sse_sum = 0;
2532   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2533     const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2534     const struct macroblockd_plane *pd = xd->plane;
2535     const struct buf_2d *src_buf = &p->src;
2536     const struct buf_2d *ref_buf = &pd->pre[idx];
2537     const uint8_t *src = src_buf->buf;
2538     const uint8_t *ref = ref_buf->buf;
2539     const int src_stride = src_buf->stride;
2540     const int ref_stride = ref_buf->stride;
2541 
2542     unsigned int this_sse;
2543     fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2544     this_sse_sum += this_sse;
2545 
2546     const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2547     best_sse_sum += best_sse;
2548   }
2549 
2550   const double mul = prune_zero_mv_with_sse > 1 ? 1.00 : 1.25;
2551   if ((double)this_sse_sum > (mul * (double)best_sse_sum)) {
2552     return 1;
2553   }
2554 
2555   return 0;
2556 }
2557 
2558 /*!\brief Searches for interpolation filter in realtime mode during winner eval
2559  *
2560  * \ingroup inter_mode_search
2561  *
2562  * Does a simple interpolation filter search during winner mode evaluation. This
2563  * is currently only used by realtime mode as \ref
2564  * av1_interpolation_filter_search is not called during realtime encoding.
2565  *
2566  * This function only searches over two possible filters. EIGHTTAP_REGULAR is
2567  * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
2568  * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
2569  *  *
2570  * \param[in]     cpi               Pointer to the compressor. Used for feature
2571  *                                  flags.
2572  * \param[in,out] x                 Pointer to macroblock. This is primarily
2573  *                                  used to access the buffers.
2574  * \param[in]     mi_row            The current row in mi unit (4X4 pixels).
2575  * \param[in]     mi_col            The current col in mi unit (4X4 pixels).
2576  * \param[in]     bsize             The current block_size.
2577  * \return Returns true if a predictor is built in xd->dst, false otherwise.
2578  */
fast_interp_search(const AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize)2579 static AOM_INLINE bool fast_interp_search(const AV1_COMP *cpi, MACROBLOCK *x,
2580                                           int mi_row, int mi_col,
2581                                           BLOCK_SIZE bsize) {
2582   static const InterpFilters filters_ref_set[3] = {
2583     { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
2584     { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
2585     { MULTITAP_SHARP, MULTITAP_SHARP }
2586   };
2587 
2588   const AV1_COMMON *const cm = &cpi->common;
2589   MACROBLOCKD *const xd = &x->e_mbd;
2590   MB_MODE_INFO *const mi = xd->mi[0];
2591   int64_t best_cost = INT64_MAX;
2592   int best_filter_index = -1;
2593   // dst_bufs[0] sores the new predictor, and dist_bifs[1] stores the best
2594   const int num_planes = av1_num_planes(cm);
2595   const int is_240p_or_lesser = AOMMIN(cm->width, cm->height) <= 240;
2596   assert(is_inter_mode(mi->mode));
2597   assert(mi->motion_mode == SIMPLE_TRANSLATION);
2598   assert(!is_inter_compound_mode(mi->mode));
2599 
2600   if (!av1_is_interp_needed(xd)) {
2601     return false;
2602   }
2603 
2604   struct macroblockd_plane *pd = xd->plane;
2605   const BUFFER_SET orig_dst = {
2606     { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2607     { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2608   };
2609   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
2610   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2611                                  tmp_buf + 2 * MAX_SB_SQUARE },
2612                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2613   const BUFFER_SET *dst_bufs[2] = { &orig_dst, &tmp_dst };
2614 
2615   for (int i = 0; i < 3; ++i) {
2616     if (is_240p_or_lesser) {
2617       if (filters_ref_set[i].x_filter == EIGHTTAP_SMOOTH) {
2618         continue;
2619       }
2620     } else {
2621       if (filters_ref_set[i].x_filter == MULTITAP_SHARP) {
2622         continue;
2623       }
2624     }
2625     int64_t cost;
2626     RD_STATS tmp_rd = { 0 };
2627 
2628     mi->interp_filters.as_filters = filters_ref_set[i];
2629     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
2630 
2631     model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
2632                        ? MODELRD_LEGACY
2633                        : MODELRD_TYPE_INTERP_FILTER](
2634         cpi, bsize, x, xd, AOM_PLANE_Y, AOM_PLANE_Y, &tmp_rd.rate, &tmp_rd.dist,
2635         &tmp_rd.skip_txfm, &tmp_rd.sse, NULL, NULL, NULL);
2636 
2637     tmp_rd.rate += av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2638                                            cm->seq_params->enable_dual_filter);
2639     cost = RDCOST(x->rdmult, tmp_rd.rate, tmp_rd.dist);
2640     if (cost < best_cost) {
2641       best_filter_index = i;
2642       best_cost = cost;
2643       swap_dst_buf(xd, dst_bufs, num_planes);
2644     }
2645   }
2646   assert(best_filter_index >= 0);
2647 
2648   mi->interp_filters.as_filters = filters_ref_set[best_filter_index];
2649 
2650   const bool is_best_pred_in_orig = &orig_dst == dst_bufs[1];
2651 
2652   if (is_best_pred_in_orig) {
2653     swap_dst_buf(xd, dst_bufs, num_planes);
2654   } else {
2655     // Note that xd->pd's bufers are kept in sync with dst_bufs[0]. So if
2656     // is_best_pred_in_orig is false, that means the current buffer is the
2657     // original one.
2658     assert(&orig_dst == dst_bufs[0]);
2659     assert(xd->plane[AOM_PLANE_Y].dst.buf == orig_dst.plane[AOM_PLANE_Y]);
2660     const int width = block_size_wide[bsize];
2661     const int height = block_size_high[bsize];
2662 #if CONFIG_AV1_HIGHBITDEPTH
2663     const bool is_hbd = is_cur_buf_hbd(xd);
2664     if (is_hbd) {
2665       aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(tmp_dst.plane[AOM_PLANE_Y]),
2666                                tmp_dst.stride[AOM_PLANE_Y],
2667                                CONVERT_TO_SHORTPTR(orig_dst.plane[AOM_PLANE_Y]),
2668                                orig_dst.stride[AOM_PLANE_Y], width, height);
2669     } else {
2670       aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2671                         orig_dst.plane[AOM_PLANE_Y],
2672                         orig_dst.stride[AOM_PLANE_Y], width, height);
2673     }
2674 #else
2675     aom_convolve_copy(tmp_dst.plane[AOM_PLANE_Y], tmp_dst.stride[AOM_PLANE_Y],
2676                       orig_dst.plane[AOM_PLANE_Y], orig_dst.stride[AOM_PLANE_Y],
2677                       width, height);
2678 #endif
2679   }
2680 
2681   // Build the YUV predictor.
2682   if (num_planes > 1) {
2683     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
2684                                   AOM_PLANE_U, AOM_PLANE_V);
2685   }
2686 
2687   return true;
2688 }
2689 
2690 /*!\brief AV1 inter mode RD computation
2691  *
2692  * \ingroup inter_mode_search
2693  * Do the RD search for a given inter mode and compute all information relevant
2694  * to the input mode. It will compute the best MV,
2695  * compound parameters (if the mode is a compound mode) and interpolation filter
2696  * parameters.
2697  *
2698  * \param[in]     cpi               Top-level encoder structure.
2699  * \param[in]     tile_data         Pointer to struct holding adaptive
2700  *                                  data/contexts/models for the tile during
2701  *                                  encoding.
2702  * \param[in]     x                 Pointer to structure holding all the data
2703  *                                  for the current macroblock.
2704  * \param[in]     bsize             Current block size.
2705  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2706  *                                  information.
2707  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2708  *                                  for only the Y plane.
2709  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2710  *                                  for only the UV planes.
2711  * \param[in]     args              HandleInterModeArgs struct holding
2712  *                                  miscellaneous arguments for inter mode
2713  *                                  search. See the documentation for this
2714  *                                  struct for a description of each member.
2715  * \param[in]     ref_best_rd       Best RD found so far for this block.
2716  *                                  It is used for early termination of this
2717  *                                  search if the RD exceeds this value.
2718  * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2719  *                                  built in this search.
2720  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2721  *                                  allocated buffers for the compound
2722  *                                  predictors and masks in the compound type
2723  *                                  search.
2724  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2725  *                                  do_tx_search (see below) is 0.
2726  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2727  *                                  a full transform search. This will compute
2728  *                                  an estimated RD for the modes without the
2729  *                                  transform search and later perform the full
2730  *                                  transform search on the best candidates.
2731  * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2732  *                                  information to perform a full transform
2733  *                                  search only on winning candidates searched
2734  *                                  with an estimate for transform coding RD.
2735  * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2736  *                                  motion mode information used in a speed
2737  *                                  feature to search motion modes other than
2738  *                                  SIMPLE_TRANSLATION only on winning
2739  *                                  candidates.
2740  * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
2741  *                                  best total RD for a skip mode so far, and
2742  *                                  skip_rd[1] is the best RD for a skip mode so
2743  *                                  far in luma. This is used as a speed feature
2744  *                                  to skip the transform search if the computed
2745  *                                  skip RD for the current mode is not better
2746  *                                  than the best skip_rd so far.
2747  * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2748  *                                         narrow down the search based on data
2749  *                                         collected in the TPL model.
2750  * \param[out]    yrd               Stores the rdcost corresponding to encoding
2751  *                                  the luma plane.
2752  *
2753  * \return The RD cost for the mode being searched.
2754  */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2755 static int64_t handle_inter_mode(
2756     AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2757     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2758     RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2759     uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2760     int64_t *best_est_rd, const int do_tx_search,
2761     InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2762     int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2763     int64_t *yrd) {
2764   const AV1_COMMON *cm = &cpi->common;
2765   const int num_planes = av1_num_planes(cm);
2766   MACROBLOCKD *xd = &x->e_mbd;
2767   MB_MODE_INFO *mbmi = xd->mi[0];
2768   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2769   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2770   const int is_comp_pred = has_second_ref(mbmi);
2771   const PREDICTION_MODE this_mode = mbmi->mode;
2772 
2773 #if CONFIG_REALTIME_ONLY
2774   const int prune_modes_based_on_tpl = 0;
2775 #else   // CONFIG_REALTIME_ONLY
2776   const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2777   const int prune_modes_based_on_tpl =
2778       cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2779       av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2780 #endif  // CONFIG_REALTIME_ONLY
2781   int i;
2782   // Reference frames for this mode
2783   const int refs[2] = { mbmi->ref_frame[0],
2784                         (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2785   int rate_mv = 0;
2786   int64_t rd = INT64_MAX;
2787   // Do first prediction into the destination buffer. Do the next
2788   // prediction into a temporary buffer. Then keep track of which one
2789   // of these currently holds the best predictor, and use the other
2790   // one for future predictions. In the end, copy from tmp_buf to
2791   // dst if necessary.
2792   struct macroblockd_plane *pd = xd->plane;
2793   const BUFFER_SET orig_dst = {
2794     { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2795     { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2796   };
2797   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2798                                  tmp_buf + 2 * MAX_SB_SQUARE },
2799                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2800 
2801   int64_t ret_val = INT64_MAX;
2802   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2803   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2804   int64_t best_rd = INT64_MAX;
2805   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2806   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2807   int64_t best_yrd = INT64_MAX;
2808   MB_MODE_INFO best_mbmi = *mbmi;
2809   int best_xskip_txfm = 0;
2810   int64_t newmv_ret_val = INT64_MAX;
2811   inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2812 
2813   // Do not prune the mode based on inter cost from tpl if the current ref frame
2814   // is the winner ref in neighbouring blocks.
2815   int ref_match_found_in_above_nb = 0;
2816   int ref_match_found_in_left_nb = 0;
2817   if (prune_modes_based_on_tpl) {
2818     ref_match_found_in_above_nb =
2819         find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2820     ref_match_found_in_left_nb =
2821         find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2822   }
2823 
2824   // First, perform a simple translation search for each of the indices. If
2825   // an index performs well, it will be fully searched in the main loop
2826   // of this function.
2827   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2828   // Save MV results from first 2 ref_mv_idx.
2829   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2830   int best_ref_mv_idx = -1;
2831   const int idx_mask =
2832       ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2833   const int16_t mode_ctx =
2834       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2835   const ModeCosts *mode_costs = &x->mode_costs;
2836   const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2837   const int base_rate =
2838       args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2839 
2840   for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2841     save_mv[i][0].as_int = INVALID_MV;
2842     save_mv[i][1].as_int = INVALID_MV;
2843   }
2844   args->start_mv_cnt = 0;
2845 
2846   // Main loop of this function. This will  iterate over all of the ref mvs
2847   // in the dynamic reference list and do the following:
2848   //    1.) Get the current MV. Create newmv MV if necessary
2849   //    2.) Search compound type and parameters if applicable
2850   //    3.) Do interpolation filter search
2851   //    4.) Build the inter predictor
2852   //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2853   //        WARPED_CAUSAL)
2854   //    6.) Update stats if best so far
2855   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2856     mbmi->ref_mv_idx = ref_mv_idx;
2857 
2858     mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2859     mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2860     const int drl_cost = get_drl_cost(
2861         mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2862     mode_info[ref_mv_idx].drl_cost = drl_cost;
2863     mode_info[ref_mv_idx].skip = 0;
2864 
2865     if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2866       // MV did not perform well in simple translation search. Skip it.
2867       continue;
2868     }
2869     if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2870         !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2871       // Skip mode if TPL model indicates it will not be beneficial.
2872       if (prune_modes_based_on_tpl_stats(
2873               inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2874               cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2875         continue;
2876     }
2877     av1_init_rd_stats(rd_stats);
2878 
2879     // Initialize compound mode data
2880     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2881     mbmi->comp_group_idx = 0;
2882     mbmi->compound_idx = 1;
2883     if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2884 
2885     mbmi->num_proj_ref = 0;
2886     mbmi->motion_mode = SIMPLE_TRANSLATION;
2887 
2888     // Compute cost for signalling this DRL index
2889     rd_stats->rate = base_rate;
2890     rd_stats->rate += drl_cost;
2891 
2892     int rs = 0;
2893     int compmode_interinter_cost = 0;
2894 
2895     int_mv cur_mv[2];
2896 
2897     // TODO(Cherma): Extend this speed feature to support compound mode
2898     int skip_repeated_ref_mv =
2899         is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2900     // Generate the current mv according to the prediction mode
2901     if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2902       continue;
2903     }
2904 
2905     // The above call to build_cur_mv does not handle NEWMV modes. Build
2906     // the mv here if we have NEWMV for any predictors.
2907     if (have_newmv_in_inter_mode(this_mode)) {
2908 #if CONFIG_COLLECT_COMPONENT_TIMING
2909       start_timing(cpi, handle_newmv_time);
2910 #endif
2911       newmv_ret_val =
2912           handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2913 #if CONFIG_COLLECT_COMPONENT_TIMING
2914       end_timing(cpi, handle_newmv_time);
2915 #endif
2916 
2917       if (newmv_ret_val != 0) continue;
2918 
2919       if (is_inter_singleref_mode(this_mode) &&
2920           cur_mv[0].as_int != INVALID_MV) {
2921         const MV_REFERENCE_FRAME ref = refs[0];
2922         const unsigned int this_sse = x->pred_sse[ref];
2923         if (this_sse < args->best_single_sse_in_refs[ref]) {
2924           args->best_single_sse_in_refs[ref] = this_sse;
2925         }
2926 
2927         if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
2928           const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
2929           const int pix_idx = num_pels_log2_lookup[bsize] - 4;
2930           const double scale_factor[3][11] = {
2931             { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
2932             { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
2933             { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
2934           };
2935           assert(pix_idx >= 0);
2936           assert(th_idx <= 2);
2937           if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
2938             continue;
2939         }
2940       }
2941 
2942       rd_stats->rate += rate_mv;
2943     }
2944     // Copy the motion vector for this mode into mbmi struct
2945     for (i = 0; i < is_comp_pred + 1; ++i) {
2946       mbmi->mv[i].as_int = cur_mv[i].as_int;
2947     }
2948 
2949     if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2950         mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2951       continue;
2952     }
2953 
2954     // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2955     // is enabled, and the current MV is similar to a previous one.
2956     if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2957         prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2958                                 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2959       continue;
2960 
2961     if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2962         (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2963       if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args,
2964                                  cpi->sf.gm_sf.prune_zero_mv_with_sse)) {
2965         continue;
2966       }
2967     }
2968 
2969     int skip_build_pred = 0;
2970     const int mi_row = xd->mi_row;
2971     const int mi_col = xd->mi_col;
2972 
2973     // Handle a compound predictor, continue if it is determined this
2974     // cannot be the best compound mode
2975     if (is_comp_pred) {
2976 #if CONFIG_COLLECT_COMPONENT_TIMING
2977       start_timing(cpi, compound_type_rd_time);
2978 #endif
2979       const int not_best_mode = process_compound_inter_mode(
2980           cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2981           rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2982           &skip_build_pred);
2983 #if CONFIG_COLLECT_COMPONENT_TIMING
2984       end_timing(cpi, compound_type_rd_time);
2985 #endif
2986       if (not_best_mode) continue;
2987     }
2988 
2989     if (!args->skip_ifs) {
2990 #if CONFIG_COLLECT_COMPONENT_TIMING
2991       start_timing(cpi, interpolation_filter_search_time);
2992 #endif
2993       // Determine the interpolation filter for this mode
2994       ret_val = av1_interpolation_filter_search(
2995           x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2996           &skip_build_pred, args, ref_best_rd);
2997 #if CONFIG_COLLECT_COMPONENT_TIMING
2998       end_timing(cpi, interpolation_filter_search_time);
2999 #endif
3000       if (args->modelled_rd != NULL && !is_comp_pred) {
3001         args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
3002       }
3003       if (ret_val != 0) {
3004         restore_dst_buf(xd, orig_dst, num_planes);
3005         continue;
3006       } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
3007                  ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
3008         restore_dst_buf(xd, orig_dst, num_planes);
3009         continue;
3010       }
3011 
3012       // Compute modelled RD if enabled
3013       if (args->modelled_rd != NULL) {
3014         if (is_comp_pred) {
3015           const int mode0 = compound_ref0_mode(this_mode);
3016           const int mode1 = compound_ref1_mode(this_mode);
3017           const int64_t mrd =
3018               AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
3019                      args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
3020           if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
3021             restore_dst_buf(xd, orig_dst, num_planes);
3022             continue;
3023           }
3024         }
3025       }
3026     }
3027 
3028     rd_stats->rate += compmode_interinter_cost;
3029     if (skip_build_pred != 1) {
3030       // Build this inter predictor if it has not been previously built
3031       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
3032                                     av1_num_planes(cm) - 1);
3033     }
3034 
3035 #if CONFIG_COLLECT_COMPONENT_TIMING
3036     start_timing(cpi, motion_mode_rd_time);
3037 #endif
3038     int rate2_nocoeff = rd_stats->rate;
3039     // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
3040     // OBMC_CAUSAL or WARPED_CAUSAL
3041     int64_t this_yrd;
3042     ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
3043                              rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
3044                              &orig_dst, best_est_rd, do_tx_search,
3045                              inter_modes_info, 0, &this_yrd);
3046 #if CONFIG_COLLECT_COMPONENT_TIMING
3047     end_timing(cpi, motion_mode_rd_time);
3048 #endif
3049     assert(
3050         IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
3051 
3052     if (ret_val != INT64_MAX) {
3053       int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3054       const THR_MODES mode_enum = get_prediction_mode_idx(
3055           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3056       // Collect mode stats for multiwinner mode processing
3057       store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
3058                               rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
3059                               cpi->sf.winner_mode_sf.multi_winner_mode_type,
3060                               do_tx_search);
3061       if (tmp_rd < best_rd) {
3062         best_yrd = this_yrd;
3063         // Update the best rd stats if we found the best mode so far
3064         best_rd_stats = *rd_stats;
3065         best_rd_stats_y = *rd_stats_y;
3066         best_rd_stats_uv = *rd_stats_uv;
3067         best_rd = tmp_rd;
3068         best_mbmi = *mbmi;
3069         best_xskip_txfm = txfm_info->skip_txfm;
3070         memcpy(best_blk_skip, txfm_info->blk_skip,
3071                sizeof(best_blk_skip[0]) * xd->height * xd->width);
3072         av1_copy_array(best_tx_type_map, xd->tx_type_map,
3073                        xd->height * xd->width);
3074         motion_mode_cand->rate_mv = rate_mv;
3075         motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
3076       }
3077 
3078       if (tmp_rd < ref_best_rd) {
3079         ref_best_rd = tmp_rd;
3080         best_ref_mv_idx = ref_mv_idx;
3081       }
3082     }
3083     restore_dst_buf(xd, orig_dst, num_planes);
3084   }
3085 
3086   if (best_rd == INT64_MAX) return INT64_MAX;
3087 
3088   // re-instate status of the best choice
3089   *rd_stats = best_rd_stats;
3090   *rd_stats_y = best_rd_stats_y;
3091   *rd_stats_uv = best_rd_stats_uv;
3092   *yrd = best_yrd;
3093   *mbmi = best_mbmi;
3094   txfm_info->skip_txfm = best_xskip_txfm;
3095   assert(IMPLIES(mbmi->comp_group_idx == 1,
3096                  mbmi->interinter_comp.type != COMPOUND_AVERAGE));
3097   memcpy(txfm_info->blk_skip, best_blk_skip,
3098          sizeof(best_blk_skip[0]) * xd->height * xd->width);
3099   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
3100 
3101   rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
3102 
3103   return rd_stats->rdcost;
3104 }
3105 
3106 /*!\brief Search for the best intrabc predictor
3107  *
3108  * \ingroup intra_mode_search
3109  * \callergraph
3110  * This function performs a motion search to find the best intrabc predictor.
3111  *
3112  * \returns Returns the best overall rdcost (including the non-intrabc modes
3113  * search before this function).
3114  */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)3115 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
3116                                        PICK_MODE_CONTEXT *ctx,
3117                                        RD_STATS *rd_stats, BLOCK_SIZE bsize,
3118                                        int64_t best_rd) {
3119   const AV1_COMMON *const cm = &cpi->common;
3120   if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
3121       !cpi->sf.mv_sf.use_intrabc || cpi->sf.rt_sf.use_nonrd_pick_mode)
3122     return INT64_MAX;
3123   const int num_planes = av1_num_planes(cm);
3124 
3125   MACROBLOCKD *const xd = &x->e_mbd;
3126   const TileInfo *tile = &xd->tile;
3127   MB_MODE_INFO *mbmi = xd->mi[0];
3128   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3129 
3130   const int mi_row = xd->mi_row;
3131   const int mi_col = xd->mi_col;
3132   const int w = block_size_wide[bsize];
3133   const int h = block_size_high[bsize];
3134   const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
3135   const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
3136 
3137   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3138   const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
3139   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3140                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3141                    mbmi_ext->mode_context);
3142   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3143   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3144   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3145   int_mv nearestmv, nearmv;
3146   av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
3147                                    0);
3148 
3149   if (nearestmv.as_int == INVALID_MV) {
3150     nearestmv.as_int = 0;
3151   }
3152   if (nearmv.as_int == INVALID_MV) {
3153     nearmv.as_int = 0;
3154   }
3155 
3156   int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
3157   if (dv_ref.as_int == 0) {
3158     av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
3159   }
3160   // Ref DV should not have sub-pel.
3161   assert((dv_ref.as_mv.col & 7) == 0);
3162   assert((dv_ref.as_mv.row & 7) == 0);
3163   mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
3164 
3165   struct buf_2d yv12_mb[MAX_MB_PLANE];
3166   av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
3167   for (int i = 0; i < num_planes; ++i) {
3168     xd->plane[i].pre[0] = yv12_mb[i];
3169   }
3170 
3171   enum IntrabcMotionDirection {
3172     IBC_MOTION_ABOVE,
3173     IBC_MOTION_LEFT,
3174     IBC_MOTION_DIRECTIONS
3175   };
3176 
3177   MB_MODE_INFO best_mbmi = *mbmi;
3178   RD_STATS best_rdstats = *rd_stats;
3179   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
3180   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
3181   av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3182 
3183   FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
3184   const SEARCH_METHODS search_method =
3185       av1_get_default_mv_search_method(x, &cpi->sf.mv_sf, bsize);
3186   const search_site_config *lookahead_search_sites =
3187       cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
3188   const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3189   av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
3190                                      &dv_ref.as_mv, start_mv,
3191                                      lookahead_search_sites, search_method,
3192                                      /*fine_search_interval=*/0);
3193   const IntraBCMVCosts *const dv_costs = x->dv_costs;
3194   av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
3195 
3196   for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
3197        dir < IBC_MOTION_DIRECTIONS; ++dir) {
3198     switch (dir) {
3199       case IBC_MOTION_ABOVE:
3200         fullms_params.mv_limits.col_min =
3201             (tile->mi_col_start - mi_col) * MI_SIZE;
3202         fullms_params.mv_limits.col_max =
3203             (tile->mi_col_end - mi_col) * MI_SIZE - w;
3204         fullms_params.mv_limits.row_min =
3205             (tile->mi_row_start - mi_row) * MI_SIZE;
3206         fullms_params.mv_limits.row_max =
3207             (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3208         break;
3209       case IBC_MOTION_LEFT:
3210         fullms_params.mv_limits.col_min =
3211             (tile->mi_col_start - mi_col) * MI_SIZE;
3212         fullms_params.mv_limits.col_max =
3213             (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3214         // TODO(aconverse@google.com): Minimize the overlap between above and
3215         // left areas.
3216         fullms_params.mv_limits.row_min =
3217             (tile->mi_row_start - mi_row) * MI_SIZE;
3218         int bottom_coded_mi_edge =
3219             AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3220         fullms_params.mv_limits.row_max =
3221             (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3222         break;
3223       default: assert(0);
3224     }
3225     assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3226     assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3227     assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3228     assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3229 
3230     av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3231 
3232     if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3233         fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3234       continue;
3235     }
3236 
3237     const int step_param = cpi->mv_search_params.mv_step_param;
3238     IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3239     int_mv best_mv, best_hash_mv;
3240     FULLPEL_MV_STATS best_mv_stats;
3241 
3242     int bestsme =
3243         av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL,
3244                               &best_mv.as_fullmv, &best_mv_stats, NULL);
3245     const int hashsme = av1_intrabc_hash_search(
3246         cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3247     if (hashsme < bestsme) {
3248       best_mv = best_hash_mv;
3249       bestsme = hashsme;
3250     }
3251 
3252     if (bestsme == INT_MAX) continue;
3253     const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3254     if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3255                                 get_fullmv_from_mv(&dv)))
3256       continue;
3257     if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3258                          cm->seq_params->mib_size_log2))
3259       continue;
3260 
3261     // DV should not have sub-pel.
3262     assert((dv.col & 7) == 0);
3263     assert((dv.row & 7) == 0);
3264     memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3265     mbmi->filter_intra_mode_info.use_filter_intra = 0;
3266     mbmi->use_intrabc = 1;
3267     mbmi->mode = DC_PRED;
3268     mbmi->uv_mode = UV_DC_PRED;
3269     mbmi->motion_mode = SIMPLE_TRANSLATION;
3270     mbmi->mv[0].as_mv = dv;
3271     mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3272     mbmi->skip_txfm = 0;
3273     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3274                                   av1_num_planes(cm) - 1);
3275 
3276     // TODO(aconverse@google.com): The full motion field defining discount
3277     // in MV_COST_WEIGHT is too large. Explore other values.
3278     const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3279                                         dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3280     const int rate_mode = x->mode_costs.intrabc_cost[1];
3281     RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3282     if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3283                          &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3284       continue;
3285     rd_stats_yuv.rdcost =
3286         RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3287     if (rd_stats_yuv.rdcost < best_rd) {
3288       best_rd = rd_stats_yuv.rdcost;
3289       best_mbmi = *mbmi;
3290       best_rdstats = rd_stats_yuv;
3291       memcpy(best_blk_skip, txfm_info->blk_skip,
3292              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3293       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3294     }
3295   }
3296   *mbmi = best_mbmi;
3297   *rd_stats = best_rdstats;
3298   memcpy(txfm_info->blk_skip, best_blk_skip,
3299          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3300   av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3301 #if CONFIG_RD_DEBUG
3302   mbmi->rd_stats = *rd_stats;
3303 #endif
3304   return best_rd;
3305 }
3306 
3307 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3308 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3309 // the typedef will prevent doxygen from finding this function and generating
3310 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3311 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3312 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3313                                struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3314                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3315   const AV1_COMMON *const cm = &cpi->common;
3316   MACROBLOCKD *const xd = &x->e_mbd;
3317   MB_MODE_INFO *const mbmi = xd->mi[0];
3318   const int num_planes = av1_num_planes(cm);
3319   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3320   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3321   uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
3322   int64_t dist_y = 0, dist_uv = 0;
3323 
3324   ctx->rd_stats.skip_txfm = 0;
3325   mbmi->ref_frame[0] = INTRA_FRAME;
3326   mbmi->ref_frame[1] = NONE_FRAME;
3327   mbmi->use_intrabc = 0;
3328   mbmi->mv[0].as_int = 0;
3329   mbmi->skip_mode = 0;
3330 
3331   const int64_t intra_yrd =
3332       av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3333                                  &y_skip_txfm, bsize, best_rd, ctx);
3334 
3335   // Initialize default mode evaluation params
3336   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3337 
3338   if (intra_yrd < best_rd) {
3339     // Search intra modes for uv planes if needed
3340     if (num_planes > 1) {
3341       // Set up the tx variables for reproducing the y predictions in case we
3342       // need it for chroma-from-luma.
3343       if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3344         memcpy(txfm_info->blk_skip, ctx->blk_skip,
3345                sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3346         av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3347       }
3348       const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3349       av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3350                                   &dist_uv, &uv_skip_txfm, bsize,
3351                                   max_uv_tx_size);
3352     }
3353 
3354     // Intra block is always coded as non-skip
3355     rd_cost->rate =
3356         rate_y + rate_uv +
3357         x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3358     rd_cost->dist = dist_y + dist_uv;
3359     rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3360     rd_cost->skip_txfm = 0;
3361   } else {
3362     rd_cost->rate = INT_MAX;
3363   }
3364 
3365   if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3366     best_rd = rd_cost->rdcost;
3367   if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3368     ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3369     memcpy(ctx->blk_skip, txfm_info->blk_skip,
3370            sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3371     assert(rd_cost->rate != INT_MAX);
3372   }
3373   if (rd_cost->rate == INT_MAX) return;
3374 
3375   ctx->mic = *xd->mi[0];
3376   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3377                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
3378   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3379 }
3380 
3381 static AOM_INLINE void calc_target_weighted_pred(
3382     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3383     const uint8_t *above, int above_stride, const uint8_t *left,
3384     int left_stride);
3385 
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3386 static AOM_INLINE void rd_pick_skip_mode(
3387     RD_STATS *rd_cost, InterModeSearchState *search_state,
3388     const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3389     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3390   const AV1_COMMON *const cm = &cpi->common;
3391   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3392   const int num_planes = av1_num_planes(cm);
3393   MACROBLOCKD *const xd = &x->e_mbd;
3394   MB_MODE_INFO *const mbmi = xd->mi[0];
3395 
3396   x->compound_idx = 1;  // COMPOUND_AVERAGE
3397   RD_STATS skip_mode_rd_stats;
3398   av1_invalid_rd_stats(&skip_mode_rd_stats);
3399 
3400   if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3401       skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3402     return;
3403   }
3404 
3405   const MV_REFERENCE_FRAME ref_frame =
3406       LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3407   const MV_REFERENCE_FRAME second_ref_frame =
3408       LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3409   const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3410   const THR_MODES mode_index =
3411       get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3412 
3413   if (mode_index == THR_INVALID) {
3414     return;
3415   }
3416 
3417   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3418        cpi->sf.inter_sf.disable_onesided_comp) &&
3419       cpi->all_one_sided_refs) {
3420     return;
3421   }
3422 
3423   mbmi->mode = this_mode;
3424   mbmi->uv_mode = UV_DC_PRED;
3425   mbmi->ref_frame[0] = ref_frame;
3426   mbmi->ref_frame[1] = second_ref_frame;
3427   const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3428   if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3429     MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3430     if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3431         mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3432       return;
3433     }
3434     av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3435                      xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3436                      mbmi_ext->mode_context);
3437     // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3438     // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3439     av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3440   }
3441 
3442   assert(this_mode == NEAREST_NEARESTMV);
3443   if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3444     return;
3445   }
3446 
3447   mbmi->filter_intra_mode_info.use_filter_intra = 0;
3448   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3449   mbmi->comp_group_idx = 0;
3450   mbmi->compound_idx = x->compound_idx;
3451   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3452   mbmi->motion_mode = SIMPLE_TRANSLATION;
3453   mbmi->ref_mv_idx = 0;
3454   mbmi->skip_mode = mbmi->skip_txfm = 1;
3455   mbmi->palette_mode_info.palette_size[0] = 0;
3456   mbmi->palette_mode_info.palette_size[1] = 0;
3457 
3458   set_default_interp_filters(mbmi, cm->features.interp_filter);
3459 
3460   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3461   for (int i = 0; i < num_planes; i++) {
3462     xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3463     xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3464   }
3465 
3466   BUFFER_SET orig_dst;
3467   for (int i = 0; i < num_planes; i++) {
3468     orig_dst.plane[i] = xd->plane[i].dst.buf;
3469     orig_dst.stride[i] = xd->plane[i].dst.stride;
3470   }
3471 
3472   // Compare the use of skip_mode with the best intra/inter mode obtained.
3473   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3474   int64_t best_intra_inter_mode_cost = INT64_MAX;
3475   if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3476     const ModeCosts *mode_costs = &x->mode_costs;
3477     best_intra_inter_mode_cost = RDCOST(
3478         x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3479         rd_cost->dist);
3480     // Account for non-skip mode rate in total rd stats
3481     rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3482     av1_rd_cost_update(x->rdmult, rd_cost);
3483   }
3484 
3485   // Obtain the rdcost for skip_mode.
3486   skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst,
3487                best_intra_inter_mode_cost);
3488 
3489   if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3490       (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3491     assert(mode_index != THR_INVALID);
3492     search_state->best_mbmode.skip_mode = 1;
3493     search_state->best_mbmode = *mbmi;
3494     memset(search_state->best_mbmode.inter_tx_size,
3495            search_state->best_mbmode.tx_size,
3496            sizeof(search_state->best_mbmode.inter_tx_size));
3497     set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3498                   search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3499                   xd);
3500     search_state->best_mode_index = mode_index;
3501 
3502     // Update rd_cost
3503     rd_cost->rate = skip_mode_rd_stats.rate;
3504     rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3505     rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3506 
3507     search_state->best_rd = rd_cost->rdcost;
3508     search_state->best_skip2 = 1;
3509     search_state->best_mode_skippable = 1;
3510 
3511     x->txfm_search_info.skip_txfm = 1;
3512   }
3513 }
3514 
3515 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3516 static AOM_INLINE MB_MODE_INFO *get_winner_mode_stats(
3517     MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3518     int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3519     RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3520     THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3521     int mode_idx) {
3522   MB_MODE_INFO *winner_mbmi;
3523   if (multi_winner_mode_type) {
3524     assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3525     WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3526     winner_mbmi = &winner_mode_stat->mbmi;
3527 
3528     *winner_rd_cost = &winner_mode_stat->rd_cost;
3529     *winner_rate_y = winner_mode_stat->rate_y;
3530     *winner_rate_uv = winner_mode_stat->rate_uv;
3531     *winner_mode_index = winner_mode_stat->mode_index;
3532   } else {
3533     winner_mbmi = best_mbmode;
3534     *winner_rd_cost = best_rd_cost;
3535     *winner_rate_y = best_rate_y;
3536     *winner_rate_uv = best_rate_uv;
3537     *winner_mode_index = *best_mode_index;
3538   }
3539   return winner_mbmi;
3540 }
3541 
3542 // speed feature: fast intra/inter transform type search
3543 // Used for speed >= 2
3544 // When this speed feature is on, in rd mode search, only DCT is used.
3545 // After the mode is determined, this function is called, to select
3546 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3547 static AOM_INLINE void refine_winner_mode_tx(
3548     const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3549     PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3550     MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3551     int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3552   const AV1_COMMON *const cm = &cpi->common;
3553   MACROBLOCKD *const xd = &x->e_mbd;
3554   MB_MODE_INFO *const mbmi = xd->mi[0];
3555   TxfmSearchParams *txfm_params = &x->txfm_search_params;
3556   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3557   int64_t best_rd;
3558   const int num_planes = av1_num_planes(cm);
3559 
3560   if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
3561                                          rd_cost->skip_txfm))
3562     return;
3563 
3564   // Set params for winner mode evaluation
3565   set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3566 
3567   // No best mode identified so far
3568   if (*best_mode_index == THR_INVALID) return;
3569 
3570   best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3571   for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3572     RD_STATS *winner_rd_stats = NULL;
3573     int winner_rate_y = 0, winner_rate_uv = 0;
3574     THR_MODES winner_mode_index = 0;
3575 
3576     // TODO(any): Combine best mode and multi-winner mode processing paths
3577     // Get winner mode stats for current mode index
3578     MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3579         x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3580         &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3581         cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3582 
3583     if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3584         winner_mode_index != THR_INVALID &&
3585         is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
3586                                           rd_cost->skip_txfm)) {
3587       RD_STATS rd_stats = *winner_rd_stats;
3588       int skip_blk = 0;
3589       RD_STATS rd_stats_y, rd_stats_uv;
3590       const int skip_ctx = av1_get_skip_txfm_context(xd);
3591 
3592       *mbmi = *winner_mbmi;
3593 
3594       set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3595 
3596       // Select prediction reference frames.
3597       for (int i = 0; i < num_planes; i++) {
3598         xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3599         if (has_second_ref(mbmi))
3600           xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3601       }
3602 
3603       if (is_inter_mode(mbmi->mode)) {
3604         const int mi_row = xd->mi_row;
3605         const int mi_col = xd->mi_col;
3606         bool is_predictor_built = false;
3607         const PREDICTION_MODE prediction_mode = mbmi->mode;
3608         // Do interpolation filter search for realtime mode if applicable.
3609         if (cpi->sf.winner_mode_sf.winner_mode_ifs &&
3610             cpi->oxcf.mode == REALTIME &&
3611             cm->current_frame.reference_mode == SINGLE_REFERENCE &&
3612             is_inter_mode(prediction_mode) &&
3613             mbmi->motion_mode == SIMPLE_TRANSLATION &&
3614             !is_inter_compound_mode(prediction_mode)) {
3615           is_predictor_built =
3616               fast_interp_search(cpi, x, mi_row, mi_col, bsize);
3617         }
3618         if (!is_predictor_built) {
3619           av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3620                                         av1_num_planes(cm) - 1);
3621         }
3622         if (mbmi->motion_mode == OBMC_CAUSAL)
3623           av1_build_obmc_inter_predictors_sb(cm, xd);
3624 
3625         av1_subtract_plane(x, bsize, 0);
3626         if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3627             !xd->lossless[mbmi->segment_id]) {
3628           av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3629                                               INT64_MAX);
3630           assert(rd_stats_y.rate != INT_MAX);
3631         } else {
3632           av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3633                                             INT64_MAX);
3634           memset(mbmi->inter_tx_size, mbmi->tx_size,
3635                  sizeof(mbmi->inter_tx_size));
3636           for (int i = 0; i < xd->height * xd->width; ++i)
3637             set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3638         }
3639       } else {
3640         av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3641                                           INT64_MAX);
3642       }
3643 
3644       if (num_planes > 1) {
3645         av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3646       } else {
3647         av1_init_rd_stats(&rd_stats_uv);
3648       }
3649 
3650       const ModeCosts *mode_costs = &x->mode_costs;
3651       if (is_inter_mode(mbmi->mode) &&
3652           RDCOST(x->rdmult,
3653                  mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3654                      rd_stats_uv.rate,
3655                  (rd_stats_y.dist + rd_stats_uv.dist)) >
3656               RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3657                      (rd_stats_y.sse + rd_stats_uv.sse))) {
3658         skip_blk = 1;
3659         rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3660         rd_stats_uv.rate = 0;
3661         rd_stats_y.dist = rd_stats_y.sse;
3662         rd_stats_uv.dist = rd_stats_uv.sse;
3663       } else {
3664         skip_blk = 0;
3665         rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3666       }
3667       int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3668                       winner_rate_y - winner_rate_uv;
3669       int64_t this_rd =
3670           RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3671       if (best_rd > this_rd) {
3672         *best_mbmode = *mbmi;
3673         *best_mode_index = winner_mode_index;
3674         av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3675         av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3676         rd_cost->rate = this_rate;
3677         rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3678         rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3679         rd_cost->rdcost = this_rd;
3680         best_rd = this_rd;
3681         *best_skip2 = skip_blk;
3682       }
3683     }
3684   }
3685 }
3686 
3687 /*!\cond */
3688 typedef struct {
3689   // Mask for each reference frame, specifying which prediction modes to NOT try
3690   // during search.
3691   uint32_t pred_modes[REF_FRAMES];
3692   // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3693   // reference frames (i, j).
3694   // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3695   // (NONE_FRAME).
3696   bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3697 } mode_skip_mask_t;
3698 /*!\endcond */
3699 
3700 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3701 static AOM_INLINE void disable_reference(
3702     MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3703   for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3704     ref_combo[ref][ref2 + 1] = true;
3705   }
3706 }
3707 
3708 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3709 static AOM_INLINE void disable_inter_references_except_altref(
3710     bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3711   disable_reference(LAST_FRAME, ref_combo);
3712   disable_reference(LAST2_FRAME, ref_combo);
3713   disable_reference(LAST3_FRAME, ref_combo);
3714   disable_reference(GOLDEN_FRAME, ref_combo);
3715   disable_reference(BWDREF_FRAME, ref_combo);
3716   disable_reference(ALTREF2_FRAME, ref_combo);
3717 }
3718 
3719 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3720   { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
3721   { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
3722   { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
3723   { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
3724   { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
3725   { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3726   { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
3727   { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
3728 };
3729 
3730 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3731 
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3732 static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
3733                                          REF_SET ref_set) {
3734   if (ref_set == REF_SET_FULL) {
3735     // Everything available by default.
3736     memset(mask, 0, sizeof(*mask));
3737   } else {
3738     // All modes available by default.
3739     memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3740     // All references disabled first.
3741     for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3742       for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3743         mask->ref_combo[ref1][ref2 + 1] = true;
3744       }
3745     }
3746     const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3747     int num_ref_combos;
3748 
3749     // Then enable reduced set of references explicitly.
3750     switch (ref_set) {
3751       case REF_SET_REDUCED:
3752         ref_set_combos = reduced_ref_combos;
3753         num_ref_combos =
3754             (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3755         break;
3756       case REF_SET_REALTIME:
3757         ref_set_combos = real_time_ref_combos;
3758         num_ref_combos =
3759             (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3760         break;
3761       default: assert(0); num_ref_combos = 0;
3762     }
3763 
3764     for (int i = 0; i < num_ref_combos; ++i) {
3765       const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3766       mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3767     }
3768   }
3769 }
3770 
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3771 static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
3772                                            const AV1_COMP *cpi, MACROBLOCK *x,
3773                                            BLOCK_SIZE bsize) {
3774   const AV1_COMMON *const cm = &cpi->common;
3775   const struct segmentation *const seg = &cm->seg;
3776   MACROBLOCKD *const xd = &x->e_mbd;
3777   MB_MODE_INFO *const mbmi = xd->mi[0];
3778   unsigned char segment_id = mbmi->segment_id;
3779   const SPEED_FEATURES *const sf = &cpi->sf;
3780   const INTER_MODE_SPEED_FEATURES *const inter_sf = &sf->inter_sf;
3781   REF_SET ref_set = REF_SET_FULL;
3782 
3783   if (sf->rt_sf.use_real_time_ref_set)
3784     ref_set = REF_SET_REALTIME;
3785   else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3786     ref_set = REF_SET_REDUCED;
3787 
3788   default_skip_mask(mask, ref_set);
3789 
3790   int min_pred_mv_sad = INT_MAX;
3791   MV_REFERENCE_FRAME ref_frame;
3792   if (ref_set == REF_SET_REALTIME) {
3793     // For real-time encoding, we only look at a subset of ref frames. So the
3794     // threshold for pruning should be computed from this subset as well.
3795     const int num_rt_refs =
3796         sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3797     for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3798       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3799       if (ref != INTRA_FRAME) {
3800         min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3801       }
3802     }
3803   } else {
3804     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3805       min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3806   }
3807 
3808   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3809     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3810       // Skip checking missing reference in both single and compound reference
3811       // modes.
3812       disable_reference(ref_frame, mask->ref_combo);
3813     } else {
3814       // Skip fixed mv modes for poor references
3815       if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3816         mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3817       }
3818     }
3819     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3820         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3821       // Reference not used for the segment.
3822       disable_reference(ref_frame, mask->ref_combo);
3823     }
3824   }
3825   // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3826   // is disabled for this segment. This is to prevent the possibility that we
3827   // end up unable to pick any mode.
3828   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3829     // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3830     // unless ARNR filtering is enabled in which case we want
3831     // an unfiltered alternative. We allow near/nearest as well
3832     // because they may result in zero-zero MVs but be cheaper.
3833     if (cpi->rc.is_src_frame_alt_ref &&
3834         (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3835       disable_inter_references_except_altref(mask->ref_combo);
3836 
3837       mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3838       const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3839       int_mv near_mv, nearest_mv, global_mv;
3840       get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3841                   &x->mbmi_ext);
3842       get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3843       get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3844 
3845       if (near_mv.as_int != global_mv.as_int)
3846         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3847       if (nearest_mv.as_int != global_mv.as_int)
3848         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3849     }
3850   }
3851 
3852   if (cpi->rc.is_src_frame_alt_ref) {
3853     if (inter_sf->alt_ref_search_fp &&
3854         (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3855       mask->pred_modes[ALTREF_FRAME] = 0;
3856       disable_inter_references_except_altref(mask->ref_combo);
3857       disable_reference(INTRA_FRAME, mask->ref_combo);
3858     }
3859   }
3860 
3861   if (inter_sf->alt_ref_search_fp) {
3862     if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
3863       int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
3864       // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3865       // those are past frames
3866       MV_REFERENCE_FRAME start_frame =
3867           inter_sf->alt_ref_search_fp == 1 ? ALTREF2_FRAME : BWDREF_FRAME;
3868       for (ref_frame = start_frame; ref_frame <= ALTREF_FRAME; ref_frame++) {
3869         if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3870             0) {
3871           // Prune inter modes when relative dist of ALTREF2 and ALTREF is close
3872           // to the relative dist of LAST_FRAME.
3873           if (inter_sf->alt_ref_search_fp == 1 &&
3874               (abs(cpi->ref_frame_dist_info
3875                        .ref_relative_dist[ref_frame - LAST_FRAME]) >
3876                1.5 * abs(cpi->ref_frame_dist_info
3877                              .ref_relative_dist[LAST_FRAME - LAST_FRAME]))) {
3878             continue;
3879           }
3880           if (x->pred_mv_sad[ref_frame] > sad_thresh)
3881             mask->pred_modes[ref_frame] |= INTER_ALL;
3882         }
3883       }
3884     }
3885   }
3886 
3887   if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
3888     if (x->best_pred_mv_sad[0] < INT_MAX) {
3889       int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
3890       const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
3891 
3892       // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
3893       for (int ref_idx = 0; ref_idx < 2; ref_idx++) {
3894         ref_frame = prune_ref_list[ref_idx];
3895         if (x->pred_mv_sad[ref_frame] > sad_thresh)
3896           mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3897       }
3898     }
3899   }
3900 
3901   if (bsize > sf->part_sf.max_intra_bsize) {
3902     disable_reference(INTRA_FRAME, mask->ref_combo);
3903   }
3904 
3905   if (!cpi->oxcf.tool_cfg.enable_global_motion) {
3906     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3907       mask->pred_modes[ref_frame] |= (1 << GLOBALMV);
3908       mask->pred_modes[ref_frame] |= (1 << GLOBAL_GLOBALMV);
3909     }
3910   }
3911 
3912   mask->pred_modes[INTRA_FRAME] |=
3913       ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3914 
3915   // Prune reference frames which are not the closest to the current
3916   // frame and with large pred_mv_sad.
3917   if (inter_sf->prune_single_ref) {
3918     assert(inter_sf->prune_single_ref > 0 && inter_sf->prune_single_ref < 3);
3919     const double prune_threshes[2] = { 1.20, 1.05 };
3920 
3921     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3922       const RefFrameDistanceInfo *const ref_frame_dist_info =
3923           &cpi->ref_frame_dist_info;
3924       const int is_closest_ref =
3925           (ref_frame == ref_frame_dist_info->nearest_past_ref) ||
3926           (ref_frame == ref_frame_dist_info->nearest_future_ref);
3927 
3928       if (!is_closest_ref) {
3929         const int dir =
3930             (ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3931                 ? 0
3932                 : 1;
3933         if (x->best_pred_mv_sad[dir] < INT_MAX &&
3934             x->pred_mv_sad[ref_frame] >
3935                 prune_threshes[inter_sf->prune_single_ref - 1] *
3936                     x->best_pred_mv_sad[dir])
3937           mask->pred_modes[ref_frame] |= INTER_SINGLE_ALL;
3938       }
3939     }
3940   }
3941 }
3942 
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3943 static AOM_INLINE void init_neighbor_pred_buf(
3944     const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
3945     int is_hbd) {
3946   if (is_hbd) {
3947     const int len = sizeof(uint16_t);
3948     args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3949     args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3950                                                  (MAX_SB_SQUARE >> 1) * len);
3951     args->above_pred_buf[2] =
3952         CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3953     args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3954     args->left_pred_buf[1] =
3955         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3956     args->left_pred_buf[2] =
3957         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3958   } else {
3959     args->above_pred_buf[0] = obmc_buffer->above_pred;
3960     args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3961     args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3962     args->left_pred_buf[0] = obmc_buffer->left_pred;
3963     args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3964     args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3965   }
3966 }
3967 
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3968 static AOM_INLINE int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3969                                       MV_REFERENCE_FRAME ref_frame) {
3970   const AV1_COMMON *const cm = &cpi->common;
3971   MV_REFERENCE_FRAME rf[2];
3972   av1_set_ref_frame(rf, ref_frame);
3973 
3974   if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3975 
3976   if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3977                                        cm->cur_frame->ref_display_order_hint)) {
3978     return 1;
3979   }
3980 
3981   return 0;
3982 }
3983 
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3984 static AOM_INLINE int is_ref_frame_used_by_compound_ref(
3985     int ref_frame, int skip_ref_frame_mask) {
3986   for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3987     if (!(skip_ref_frame_mask & (1 << r))) {
3988       const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3989       if (rf[0] == ref_frame || rf[1] == ref_frame) {
3990         return 1;
3991       }
3992     }
3993   }
3994   return 0;
3995 }
3996 
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3997 static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3998                                                  const MB_MODE_INFO *mi_cache) {
3999   if (!mi_cache) {
4000     return 0;
4001   }
4002 
4003   if (ref_frame < REF_FRAMES) {
4004     return (ref_frame == mi_cache->ref_frame[0] ||
4005             ref_frame == mi_cache->ref_frame[1]);
4006   }
4007 
4008   // if we are here, then the current mode is compound.
4009   MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
4010   return ref_frame == cached_ref_type;
4011 }
4012 
4013 // Please add/modify parameter setting in this function, making it consistent
4014 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])4015 static AOM_INLINE void set_params_rd_pick_inter_mode(
4016     const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
4017     BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
4018     unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
4019     struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
4020   const AV1_COMMON *const cm = &cpi->common;
4021   MACROBLOCKD *const xd = &x->e_mbd;
4022   MB_MODE_INFO *const mbmi = xd->mi[0];
4023   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
4024   unsigned char segment_id = mbmi->segment_id;
4025 
4026   init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
4027   av1_collect_neighbors_ref_counts(xd);
4028   estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
4029                            ref_costs_comp);
4030 
4031   const int mi_row = xd->mi_row;
4032   const int mi_col = xd->mi_col;
4033   x->best_pred_mv_sad[0] = INT_MAX;
4034   x->best_pred_mv_sad[1] = INT_MAX;
4035 
4036   for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
4037        ++ref_frame) {
4038     x->pred_mv_sad[ref_frame] = INT_MAX;
4039     mbmi_ext->mode_context[ref_frame] = 0;
4040     mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4041     if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
4042       // Skip the ref frame if the mask says skip and the ref is not used by
4043       // compound ref.
4044       if (skip_ref_frame_mask & (1 << ref_frame) &&
4045           !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
4046           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4047         continue;
4048       }
4049       assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
4050       setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
4051     }
4052     if (cpi->sf.inter_sf.alt_ref_search_fp ||
4053         cpi->sf.inter_sf.prune_single_ref ||
4054         cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
4055       // Store the best pred_mv_sad across all past frames
4056       if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
4057           0)
4058         x->best_pred_mv_sad[0] =
4059             AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
4060       else
4061         // Store the best pred_mv_sad across all future frames
4062         x->best_pred_mv_sad[1] =
4063             AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
4064     }
4065   }
4066 
4067   if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
4068     // No second reference on RT ref set, so no need to initialize
4069     for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
4070          ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
4071       mbmi_ext->mode_context[ref_frame] = 0;
4072       mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
4073       const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
4074       if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
4075             (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
4076         continue;
4077       }
4078 
4079       if (skip_ref_frame_mask & (1 << ref_frame) &&
4080           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
4081         continue;
4082       }
4083       // Ref mv list population is not required, when compound references are
4084       // pruned.
4085       if (prune_ref_frame(cpi, x, ref_frame)) continue;
4086 
4087       av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
4088                        xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
4089                        mbmi_ext->mode_context);
4090       // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
4091       // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
4092       av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
4093     }
4094   }
4095 
4096   av1_count_overlappable_neighbors(cm, xd);
4097   const FRAME_UPDATE_TYPE update_type =
4098       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
4099   int use_actual_frame_probs = 1;
4100   int prune_obmc;
4101 #if CONFIG_FPMT_TEST
4102   use_actual_frame_probs =
4103       (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
4104   if (!use_actual_frame_probs) {
4105     prune_obmc = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize] <
4106                  cpi->sf.inter_sf.prune_obmc_prob_thresh;
4107   }
4108 #endif
4109   if (use_actual_frame_probs) {
4110     prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
4111                  cpi->sf.inter_sf.prune_obmc_prob_thresh;
4112   }
4113   if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
4114     if (check_num_overlappable_neighbors(mbmi) &&
4115         is_motion_variation_allowed_bsize(bsize)) {
4116       int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4117       int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4118                                        MAX_SB_SIZE >> 1 };
4119       int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
4120                                         MAX_SB_SIZE >> 1 };
4121       int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
4122       av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
4123                                           dst_width1, dst_height1,
4124                                           args->above_pred_stride);
4125       av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
4126                                          dst_width2, dst_height2,
4127                                          args->left_pred_stride);
4128       const int num_planes = av1_num_planes(cm);
4129       av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
4130                            mi_col, 0, num_planes);
4131       calc_target_weighted_pred(
4132           cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
4133           args->left_pred_buf[0], args->left_pred_stride[0]);
4134     }
4135   }
4136 
4137   init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
4138 
4139   // Set params for mode evaluation
4140   set_mode_eval_params(cpi, x, MODE_EVAL);
4141 
4142   x->comp_rd_stats_idx = 0;
4143 
4144   for (int idx = 0; idx < REF_FRAMES; idx++) {
4145     args->best_single_sse_in_refs[idx] = INT32_MAX;
4146   }
4147 }
4148 
init_single_inter_mode_search_state(InterModeSearchState * search_state)4149 static AOM_INLINE void init_single_inter_mode_search_state(
4150     InterModeSearchState *search_state) {
4151   for (int dir = 0; dir < 2; ++dir) {
4152     for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4153       for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
4154         SingleInterModeState *state;
4155 
4156         state = &search_state->single_state[dir][mode][ref_frame];
4157         state->ref_frame = NONE_FRAME;
4158         state->rd = INT64_MAX;
4159 
4160         state = &search_state->single_state_modelled[dir][mode][ref_frame];
4161         state->ref_frame = NONE_FRAME;
4162         state->rd = INT64_MAX;
4163 
4164         search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
4165       }
4166     }
4167   }
4168 
4169   for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4170     search_state->best_single_rd[ref_frame] = INT64_MAX;
4171     search_state->best_single_mode[ref_frame] = PRED_MODE_INVALID;
4172   }
4173   av1_zero(search_state->single_state_cnt);
4174   av1_zero(search_state->single_state_modelled_cnt);
4175 }
4176 
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)4177 static AOM_INLINE void init_inter_mode_search_state(
4178     InterModeSearchState *search_state, const AV1_COMP *cpi,
4179     const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
4180   init_intra_mode_search_state(&search_state->intra_search_state);
4181   av1_invalid_rd_stats(&search_state->best_y_rdcost);
4182 
4183   search_state->best_rd = best_rd_so_far;
4184   search_state->best_skip_rd[0] = INT64_MAX;
4185   search_state->best_skip_rd[1] = INT64_MAX;
4186 
4187   av1_zero(search_state->best_mbmode);
4188 
4189   search_state->best_rate_y = INT_MAX;
4190 
4191   search_state->best_rate_uv = INT_MAX;
4192 
4193   search_state->best_mode_skippable = 0;
4194 
4195   search_state->best_skip2 = 0;
4196 
4197   search_state->best_mode_index = THR_INVALID;
4198 
4199   const MACROBLOCKD *const xd = &x->e_mbd;
4200   const MB_MODE_INFO *const mbmi = xd->mi[0];
4201   const unsigned char segment_id = mbmi->segment_id;
4202 
4203   search_state->num_available_refs = 0;
4204   memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
4205   memset(search_state->dist_order_refs, -1,
4206          sizeof(search_state->dist_order_refs));
4207 
4208   for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
4209     search_state->mode_threshold[i] = 0;
4210   const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
4211   for (int i = LAST_NEW_MV_INDEX + 1; i < SINGLE_REF_MODE_END; ++i)
4212     search_state->mode_threshold[i] =
4213         ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4214         RD_THRESH_FAC_FRAC_BITS;
4215 
4216   search_state->best_intra_rd = INT64_MAX;
4217 
4218   search_state->best_pred_sse = UINT_MAX;
4219 
4220   av1_zero(search_state->single_newmv);
4221   av1_zero(search_state->single_newmv_rate);
4222   av1_zero(search_state->single_newmv_valid);
4223   for (int i = SINGLE_INTER_MODE_START; i < SINGLE_INTER_MODE_END; ++i) {
4224     for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4225       for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4226         search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4227         search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4228       }
4229     }
4230   }
4231 
4232   for (int i = 0; i < REFERENCE_MODES; ++i) {
4233     search_state->best_pred_rd[i] = INT64_MAX;
4234   }
4235 
4236   if (cpi->common.current_frame.reference_mode != SINGLE_REFERENCE) {
4237     for (int i = SINGLE_REF_MODE_END; i < THR_INTER_MODE_END; ++i)
4238       search_state->mode_threshold[i] =
4239           ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
4240           RD_THRESH_FAC_FRAC_BITS;
4241 
4242     for (int i = COMP_INTER_MODE_START; i < COMP_INTER_MODE_END; ++i) {
4243       for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
4244         for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
4245           search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
4246           search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
4247         }
4248       }
4249     }
4250 
4251     init_single_inter_mode_search_state(search_state);
4252   }
4253 }
4254 
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)4255 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
4256                            const MV_REFERENCE_FRAME *ref_frame,
4257                            const PREDICTION_MODE this_mode) {
4258   if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
4259     return true;
4260   }
4261 
4262   return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
4263 }
4264 
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)4265 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
4266                                       BLOCK_SIZE bsize,
4267                                       PREDICTION_MODE curr_mode,
4268                                       const MV_REFERENCE_FRAME *ref_frames) {
4269   const int comp_pred = ref_frames[1] > INTRA_FRAME;
4270   if (comp_pred) {
4271     if (!is_comp_ref_allowed(bsize)) return 1;
4272     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
4273       return 1;
4274     }
4275 
4276     const AV1_COMMON *const cm = &cpi->common;
4277     if (frame_is_intra_only(cm)) return 1;
4278 
4279     const CurrentFrame *const current_frame = &cm->current_frame;
4280     if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
4281 
4282     const struct segmentation *const seg = &cm->seg;
4283     const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
4284     // Do not allow compound prediction if the segment level reference frame
4285     // feature is in use as in this case there can only be one reference.
4286     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
4287   }
4288 
4289   if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
4290     // Mode must be compatible
4291     if (!is_interintra_allowed_bsize(bsize)) return 1;
4292     if (!is_interintra_allowed_mode(curr_mode)) return 1;
4293   }
4294 
4295   return 0;
4296 }
4297 
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4298 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4299                                         BLOCK_SIZE bsize, int mib_size) {
4300   const int sb_size_mask = mib_size - 1;
4301   const MACROBLOCKD *const xd = &x->e_mbd;
4302   const int mi_row = xd->mi_row;
4303   const int mi_col = xd->mi_col;
4304   const int mi_row_in_sb = mi_row & sb_size_mask;
4305   const int mi_col_in_sb = mi_col & sb_size_mask;
4306   const int mi_w = mi_size_wide[bsize];
4307   const int mi_h = mi_size_high[bsize];
4308   int picked_ref_frames_mask = 0;
4309   for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4310     for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4311       picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4312     }
4313   }
4314   return picked_ref_frames_mask;
4315 }
4316 
4317 // Check if reference frame pair of the current block matches with the given
4318 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4319 static INLINE int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4320                                        const MV_REFERENCE_FRAME *ref_frames) {
4321   return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4322           (ref_frames[1] == mbmi->ref_frame[1]));
4323 }
4324 
4325 // Case 1: return 0, means don't skip this mode
4326 // Case 2: return 1, means skip this mode completely
4327 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4328 static int inter_mode_search_order_independent_skip(
4329     const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4330     InterModeSearchState *search_state, int skip_ref_frame_mask,
4331     PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4332   if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4333     return 1;
4334   }
4335 
4336   const int ref_type = av1_ref_frame_type(ref_frame);
4337   if (!cpi->sf.rt_sf.use_real_time_ref_set)
4338     if (prune_ref_frame(cpi, x, ref_type)) return 1;
4339 
4340   // This is only used in motion vector unit test.
4341   if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4342       ref_frame[0] == INTRA_FRAME)
4343     return 1;
4344 
4345   const AV1_COMMON *const cm = &cpi->common;
4346   if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4347     return 1;
4348   }
4349 
4350   // Reuse the prediction mode in cache
4351   if (x->use_mb_mode_cache) {
4352     const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4353     const PREDICTION_MODE cached_mode = cached_mi->mode;
4354     const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4355     const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4356 
4357     // If the cached mode is intra, then we just need to match the mode.
4358     if (is_mode_intra(cached_mode) && mode != cached_mode) {
4359       return 1;
4360     }
4361 
4362     // If the cached mode is single inter mode, then we match the mode and
4363     // reference frame.
4364     if (cached_mode_is_single) {
4365       if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4366         return 1;
4367       }
4368     } else {
4369       // If the cached mode is compound, then we need to consider several cases.
4370       const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4371       if (mode_is_single) {
4372         // If the mode is single, we know the modes can't match. But we might
4373         // still want to search it if compound mode depends on the current mode.
4374         int skip_motion_mode_only = 0;
4375         if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4376           skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4377         } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4378           skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4379         } else if (cached_mode == NEW_NEWMV) {
4380           skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4381                                    ref_frame[0] == cached_frame[1]);
4382         }
4383 
4384         return 1 + skip_motion_mode_only;
4385       } else {
4386         // If both modes are compound, then everything must match.
4387         if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4388             ref_frame[1] != cached_frame[1]) {
4389           return 1;
4390         }
4391       }
4392     }
4393   }
4394 
4395   const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4396   // If no valid mode has been found so far in PARTITION_NONE when finding a
4397   // valid partition is required, do not skip mode.
4398   if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4399       x->must_find_valid_partition)
4400     return 0;
4401 
4402   const SPEED_FEATURES *const sf = &cpi->sf;
4403   // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4404   // frames
4405   if (sf->inter_sf.prune_nearmv_using_neighbors &&
4406       (mode == NEAR_NEARMV || mode == NEARMV)) {
4407     const MACROBLOCKD *const xd = &x->e_mbd;
4408     if (search_state->best_rd != INT64_MAX && xd->left_available &&
4409         xd->up_available) {
4410       const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4411                                                     { 1, 1, 0 },
4412                                                     { 2, 1, 0 } };
4413       const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4414 
4415       assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4416              qindex_sub_range < 3);
4417       const int num_ref_frame_pair_match_thresh =
4418           thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4419                     [qindex_sub_range];
4420 
4421       assert(num_ref_frame_pair_match_thresh <= 2 &&
4422              num_ref_frame_pair_match_thresh >= 0);
4423       int num_ref_frame_pair_match = 0;
4424 
4425       num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4426       num_ref_frame_pair_match +=
4427           match_ref_frame_pair(xd->above_mbmi, ref_frame);
4428 
4429       // Pruning based on ref frame pair match with neighbors.
4430       if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4431     }
4432   }
4433 
4434   int skip_motion_mode = 0;
4435   if (mbmi->partition != PARTITION_NONE) {
4436     int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4437     if (ref_type <= ALTREF_FRAME && skip_ref) {
4438       // Since the compound ref modes depends on the motion estimation result of
4439       // two single ref modes (best mv of single ref modes as the start point),
4440       // if current single ref mode is marked skip, we need to check if it will
4441       // be used in compound ref modes.
4442       if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4443         // Found a not skipped compound ref mode which contains current
4444         // single ref. So this single ref can't be skipped completely
4445         // Just skip its motion mode search, still try its simple
4446         // transition mode.
4447         skip_motion_mode = 1;
4448         skip_ref = 0;
4449       }
4450     }
4451     // If we are reusing the prediction from cache, and the current frame is
4452     // required by the cache, then we cannot prune it.
4453     if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4454       skip_ref = 0;
4455       // If the cache only needs the current reference type for compound
4456       // prediction, then we can skip motion mode search.
4457       skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4458                           x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4459     }
4460     if (skip_ref) return 1;
4461   }
4462 
4463   if (ref_frame[0] == INTRA_FRAME) {
4464     if (mode != DC_PRED) {
4465       // Disable intra modes other than DC_PRED for blocks with low variance
4466       // Threshold for intra skipping based on source variance
4467       // TODO(debargha): Specialize the threshold for super block sizes
4468       const unsigned int skip_intra_var_thresh = 64;
4469       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4470           x->source_variance < skip_intra_var_thresh)
4471         return 1;
4472     }
4473   }
4474 
4475   if (skip_motion_mode) return 2;
4476 
4477   return 0;
4478 }
4479 
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4480 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4481                              const MV_REFERENCE_FRAME *ref_frames,
4482                              const AV1_COMMON *cm) {
4483   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4484   mbmi->ref_mv_idx = 0;
4485   mbmi->mode = curr_mode;
4486   mbmi->uv_mode = UV_DC_PRED;
4487   mbmi->ref_frame[0] = ref_frames[0];
4488   mbmi->ref_frame[1] = ref_frames[1];
4489   pmi->palette_size[0] = 0;
4490   pmi->palette_size[1] = 0;
4491   mbmi->filter_intra_mode_info.use_filter_intra = 0;
4492   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4493   mbmi->motion_mode = SIMPLE_TRANSLATION;
4494   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4495   set_default_interp_filters(mbmi, cm->features.interp_filter);
4496 }
4497 
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4498 static AOM_INLINE void collect_single_states(MACROBLOCK *x,
4499                                              InterModeSearchState *search_state,
4500                                              const MB_MODE_INFO *const mbmi) {
4501   int i, j;
4502   const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4503   const PREDICTION_MODE this_mode = mbmi->mode;
4504   const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4505   const int mode_offset = INTER_OFFSET(this_mode);
4506   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4507 
4508   // Simple rd
4509   int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4510   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4511     const int64_t rd =
4512         search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4513     if (rd < simple_rd) simple_rd = rd;
4514   }
4515 
4516   // Insertion sort of single_state
4517   const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4518   SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4519   i = search_state->single_state_cnt[dir][mode_offset];
4520   for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4521     state_s[j] = state_s[j - 1];
4522   state_s[j] = this_state_s;
4523   search_state->single_state_cnt[dir][mode_offset]++;
4524 
4525   // Modelled rd
4526   int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4527   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4528     const int64_t rd =
4529         search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4530     if (rd < modelled_rd) modelled_rd = rd;
4531   }
4532 
4533   // Insertion sort of single_state_modelled
4534   const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4535   SingleInterModeState *state_m =
4536       search_state->single_state_modelled[dir][mode_offset];
4537   i = search_state->single_state_modelled_cnt[dir][mode_offset];
4538   for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4539     state_m[j] = state_m[j - 1];
4540   state_m[j] = this_state_m;
4541   search_state->single_state_modelled_cnt[dir][mode_offset]++;
4542 }
4543 
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4544 static AOM_INLINE void analyze_single_states(
4545     const AV1_COMP *cpi, InterModeSearchState *search_state) {
4546   const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4547   assert(prune_level >= 1);
4548   int i, j, dir, mode;
4549 
4550   for (dir = 0; dir < 2; ++dir) {
4551     int64_t best_rd;
4552     SingleInterModeState(*state)[FWD_REFS];
4553     const int prune_factor = prune_level >= 2 ? 6 : 5;
4554 
4555     // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4556     // reference frames for all the modes (NEARESTMV and NEARMV may not
4557     // have same motion vectors). Always keep the best of each mode
4558     // because it might form the best possible combination with other mode.
4559     state = search_state->single_state[dir];
4560     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4561                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4562     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4563       for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4564         if (state[mode][i].rd != INT64_MAX &&
4565             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4566           state[mode][i].valid = 0;
4567         }
4568       }
4569     }
4570 
4571     state = search_state->single_state_modelled[dir];
4572     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4573                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4574     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4575       for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4576         if (state[mode][i].rd != INT64_MAX &&
4577             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4578           state[mode][i].valid = 0;
4579         }
4580       }
4581     }
4582   }
4583 
4584   // Ordering by simple rd first, then by modelled rd
4585   for (dir = 0; dir < 2; ++dir) {
4586     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4587       const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4588       const int state_cnt_m =
4589           search_state->single_state_modelled_cnt[dir][mode];
4590       SingleInterModeState *state_s = search_state->single_state[dir][mode];
4591       SingleInterModeState *state_m =
4592           search_state->single_state_modelled[dir][mode];
4593       int count = 0;
4594       const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4595       for (i = 0; i < state_cnt_s; ++i) {
4596         if (state_s[i].rd == INT64_MAX) break;
4597         if (state_s[i].valid) {
4598           search_state->single_rd_order[dir][mode][count++] =
4599               state_s[i].ref_frame;
4600         }
4601       }
4602       if (count >= max_candidates) continue;
4603 
4604       for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4605         if (state_m[i].rd == INT64_MAX) break;
4606         if (!state_m[i].valid) continue;
4607         const int ref_frame = state_m[i].ref_frame;
4608         int match = 0;
4609         // Check if existing already
4610         for (j = 0; j < count; ++j) {
4611           if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4612             match = 1;
4613             break;
4614           }
4615         }
4616         if (match) continue;
4617         // Check if this ref_frame is removed in simple rd
4618         int valid = 1;
4619         for (j = 0; j < state_cnt_s; ++j) {
4620           if (ref_frame == state_s[j].ref_frame) {
4621             valid = state_s[j].valid;
4622             break;
4623           }
4624         }
4625         if (valid) {
4626           search_state->single_rd_order[dir][mode][count++] = ref_frame;
4627         }
4628       }
4629     }
4630   }
4631 }
4632 
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4633 static int compound_skip_get_candidates(
4634     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4635     const int dir, const PREDICTION_MODE mode) {
4636   const int mode_offset = INTER_OFFSET(mode);
4637   const SingleInterModeState *state =
4638       search_state->single_state[dir][mode_offset];
4639   const SingleInterModeState *state_modelled =
4640       search_state->single_state_modelled[dir][mode_offset];
4641 
4642   int max_candidates = 0;
4643   for (int i = 0; i < FWD_REFS; ++i) {
4644     if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4645     max_candidates++;
4646   }
4647 
4648   int candidates = max_candidates;
4649   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4650     candidates = AOMMIN(2, max_candidates);
4651   }
4652   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4653     if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4654         state[0].ref_frame == state_modelled[0].ref_frame)
4655       candidates = 1;
4656     if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4657   }
4658 
4659   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4660     // Limit the number of candidates to 1 in each direction for compound
4661     // prediction
4662     candidates = AOMMIN(1, candidates);
4663   }
4664   return candidates;
4665 }
4666 
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4667 static int compound_skip_by_single_states(
4668     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4669     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4670     const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4671   const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4672   const int mode[2] = { compound_ref0_mode(this_mode),
4673                         compound_ref1_mode(this_mode) };
4674   const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4675   const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4676                             refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4677   int ref_searched[2] = { 0, 0 };
4678   int ref_mv_match[2] = { 1, 1 };
4679   int i, j;
4680 
4681   for (i = 0; i < 2; ++i) {
4682     const SingleInterModeState *state =
4683         search_state->single_state[mode_dir[i]][mode_offset[i]];
4684     const int state_cnt =
4685         search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4686     for (j = 0; j < state_cnt; ++j) {
4687       if (state[j].ref_frame == refs[i]) {
4688         ref_searched[i] = 1;
4689         break;
4690       }
4691     }
4692   }
4693 
4694   const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4695   for (i = 0; i < 2; ++i) {
4696     if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4697       continue;
4698     }
4699     const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4700     for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4701       int_mv single_mv;
4702       int_mv comp_mv;
4703       get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4704                   &x->mbmi_ext);
4705       get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4706       if (single_mv.as_int != comp_mv.as_int) {
4707         ref_mv_match[i] = 0;
4708         break;
4709       }
4710     }
4711   }
4712 
4713   for (i = 0; i < 2; ++i) {
4714     if (!ref_searched[i] || !ref_mv_match[i]) continue;
4715     const int candidates =
4716         compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4717     const MV_REFERENCE_FRAME *ref_order =
4718         search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4719     int match = 0;
4720     for (j = 0; j < candidates; ++j) {
4721       if (refs[i] == ref_order[j]) {
4722         match = 1;
4723         break;
4724       }
4725     }
4726     if (!match) return 1;
4727   }
4728 
4729   return 0;
4730 }
4731 
4732 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4733 static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
4734                                    const MV_REFERENCE_FRAME *ref_frames,
4735                                    int *const is_ref_match) {
4736   if (is_inter_block(mbmi)) {
4737     is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4738     is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4739     if (has_second_ref(mbmi)) {
4740       is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4741       is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4742     }
4743   }
4744 }
4745 
4746 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4747 static INLINE int compound_skip_using_neighbor_refs(
4748     MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4749     const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4750   // Exclude non-extended compound modes from pruning
4751   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4752       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4753     return 0;
4754 
4755   if (prune_ext_comp_using_neighbors >= 3) return 1;
4756 
4757   int is_ref_match[2] = { 0 };  // 0 - match for forward refs
4758                                 // 1 - match for backward refs
4759   // Check if ref frames of this block matches with left neighbor.
4760   if (xd->left_available)
4761     match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4762 
4763   // Check if ref frames of this block matches with above neighbor.
4764   if (xd->up_available)
4765     match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4766 
4767   // Combine ref frame match with neighbors in forward and backward refs.
4768   const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4769 
4770   // Pruning based on ref frame match with neighbors.
4771   if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4772   return 1;
4773 }
4774 
4775 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4776 static INLINE void update_best_single_mode(InterModeSearchState *search_state,
4777                                            const PREDICTION_MODE this_mode,
4778                                            const MV_REFERENCE_FRAME ref_frame,
4779                                            int64_t this_rd) {
4780   if (this_rd < search_state->best_single_rd[ref_frame]) {
4781     search_state->best_single_rd[ref_frame] = this_rd;
4782     search_state->best_single_mode[ref_frame] = this_mode;
4783   }
4784 }
4785 
4786 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4787 static INLINE int skip_compound_using_best_single_mode_ref(
4788     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4789     const PREDICTION_MODE *best_single_mode,
4790     int prune_comp_using_best_single_mode_ref) {
4791   // Exclude non-extended compound modes from pruning
4792   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4793       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4794     return 0;
4795 
4796   assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4797   const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4798   // Get ref frame direction corresponding to NEWMV
4799   // 0 - NEWMV corresponding to forward direction
4800   // 1 - NEWMV corresponding to backward direction
4801   const int newmv_dir = comp_mode_ref0 != NEWMV;
4802 
4803   // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4804   // have NEWMV as single mode winner.
4805   // Example: For an extended-compound mode,
4806   // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4807   // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4808   // - Avoid pruning this mode, if best single mode corresponding to ref frame
4809   //   ALTREF_FRAME is NEWMV
4810   const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4811   if (single_mode == NEWMV) return 0;
4812 
4813   // Avoid pruning the compound mode when best single mode is not available
4814   if (prune_comp_using_best_single_mode_ref == 1)
4815     if (single_mode == MB_MODE_COUNT) return 0;
4816   return 1;
4817 }
4818 
compare_int64(const void * a,const void * b)4819 static int compare_int64(const void *a, const void *b) {
4820   int64_t a64 = *((int64_t *)a);
4821   int64_t b64 = *((int64_t *)b);
4822   if (a64 < b64) {
4823     return -1;
4824   } else if (a64 == b64) {
4825     return 0;
4826   } else {
4827     return 1;
4828   }
4829 }
4830 
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4831 static INLINE void update_search_state(
4832     InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4833     PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4834     const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4835     THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4836   const MACROBLOCKD *xd = &x->e_mbd;
4837   const MB_MODE_INFO *mbmi = xd->mi[0];
4838   const int skip_ctx = av1_get_skip_txfm_context(xd);
4839   const int skip_txfm =
4840       mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4841   const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4842 
4843   search_state->best_rd = new_best_rd_stats->rdcost;
4844   search_state->best_mode_index = new_best_mode;
4845   *best_rd_stats_dst = *new_best_rd_stats;
4846   search_state->best_mbmode = *mbmi;
4847   search_state->best_skip2 = skip_txfm;
4848   search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4849   // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4850   // rate_uv because av1_txfm_search process is replaced by rd estimation.
4851   // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4852   // These two values will be updated when av1_txfm_search is called.
4853   if (txfm_search_done) {
4854     search_state->best_rate_y =
4855         new_best_rd_stats_y->rate +
4856         x->mode_costs.skip_txfm_cost[skip_ctx]
4857                                     [new_best_rd_stats->skip_txfm || skip_txfm];
4858     search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4859   }
4860   search_state->best_y_rdcost = *new_best_rd_stats_y;
4861   memcpy(ctx->blk_skip, txfm_info->blk_skip,
4862          sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4863   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4864 }
4865 
4866 // Find the best RD for a reference frame (among single reference modes)
4867 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4868 static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4869   assert(ref_frame_rd[0] == INT64_MAX);
4870   int64_t ref_copy[REF_FRAMES - 1];
4871   memcpy(ref_copy, ref_frame_rd + 1,
4872          sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4873   qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4874 
4875   int64_t cutoff = ref_copy[0];
4876   // The cut-off is within 10% of the best.
4877   if (cutoff != INT64_MAX) {
4878     assert(cutoff < INT64_MAX / 200);
4879     cutoff = (110 * cutoff) / 100;
4880   }
4881   ref_frame_rd[0] = cutoff;
4882 }
4883 
4884 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4885 static INLINE bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4886                                         MV_REFERENCE_FRAME frame1,
4887                                         MV_REFERENCE_FRAME frame2) {
4888   assert(frame2 > 0);
4889   return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4890          ref_frame_rd[frame2] <= ref_frame_rd[0];
4891 }
4892 
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4893 static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
4894     const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4895     HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4896     PICK_MODE_CONTEXT *const ctx,
4897     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4898     const motion_mode_best_st_candidate *const best_motion_mode_cands,
4899     int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4900     InterModeSearchState *const search_state, int64_t *yrd) {
4901   const AV1_COMMON *const cm = &cpi->common;
4902   const int num_planes = av1_num_planes(cm);
4903   MACROBLOCKD *const xd = &x->e_mbd;
4904   MB_MODE_INFO *const mbmi = xd->mi[0];
4905   InterModesInfo *const inter_modes_info = x->inter_modes_info;
4906   const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4907 
4908   for (int cand = 0; cand < num_best_cand; cand++) {
4909     RD_STATS rd_stats;
4910     RD_STATS rd_stats_y;
4911     RD_STATS rd_stats_uv;
4912     av1_init_rd_stats(&rd_stats);
4913     av1_init_rd_stats(&rd_stats_y);
4914     av1_init_rd_stats(&rd_stats_uv);
4915     int rate_mv;
4916 
4917     rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4918     args->skip_motion_mode =
4919         best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4920     *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4921     rd_stats.rate =
4922         best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4923 
4924     // Continue if the best candidate is compound.
4925     if (!is_inter_singleref_mode(mbmi->mode)) continue;
4926 
4927     x->txfm_search_info.skip_txfm = 0;
4928     struct macroblockd_plane *pd = xd->plane;
4929     const BUFFER_SET orig_dst = {
4930       { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4931       { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4932     };
4933 
4934     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4935     // Initialize motion mode to simple translation
4936     // Calculation of switchable rate depends on it.
4937     mbmi->motion_mode = 0;
4938     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4939     for (int i = 0; i < num_planes; i++) {
4940       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4941       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4942     }
4943 
4944     int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4945                            search_state->best_skip_rd[1] };
4946     int64_t this_yrd = INT64_MAX;
4947     int64_t ret_value = motion_mode_rd(
4948         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4949         search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4950         do_tx_search, inter_modes_info, 1, &this_yrd);
4951 
4952     if (ret_value != INT64_MAX) {
4953       rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4954       const THR_MODES mode_enum = get_prediction_mode_idx(
4955           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4956       // Collect mode stats for multiwinner mode processing
4957       store_winner_mode_stats(
4958           &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4959           mode_enum, NULL, bsize, rd_stats.rdcost,
4960           cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4961       if (rd_stats.rdcost < search_state->best_rd) {
4962         *yrd = this_yrd;
4963         update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4964                             &rd_stats_uv, mode_enum, x, do_tx_search);
4965         if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4966       }
4967     }
4968   }
4969 }
4970 
4971 /*!\cond */
4972 // Arguments for speed feature pruning of inter mode search
4973 typedef struct {
4974   int *skip_motion_mode;
4975   mode_skip_mask_t *mode_skip_mask;
4976   InterModeSearchState *search_state;
4977   int skip_ref_frame_mask;
4978   int reach_first_comp_mode;
4979   int mode_thresh_mul_fact;
4980   int num_single_modes_processed;
4981   int prune_cpd_using_sr_stats_ready;
4982 } InterModeSFArgs;
4983 /*!\endcond */
4984 
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args,int is_low_temp_var)4985 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4986                            int64_t *ref_frame_rd, int midx,
4987                            InterModeSFArgs *args, int is_low_temp_var) {
4988   const SPEED_FEATURES *const sf = &cpi->sf;
4989   MACROBLOCKD *const xd = &x->e_mbd;
4990   // Get the actual prediction mode we are trying in this iteration
4991   const THR_MODES mode_enum = av1_default_mode_order[midx];
4992   const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4993   const PREDICTION_MODE this_mode = mode_def->mode;
4994   const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4995   const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4996   const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4997   const int comp_pred = second_ref_frame > INTRA_FRAME;
4998 
4999   if (ref_frame == INTRA_FRAME) return 1;
5000 
5001   const FRAME_UPDATE_TYPE update_type =
5002       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
5003   if (sf->inter_sf.skip_arf_compound && update_type == ARF_UPDATE &&
5004       comp_pred) {
5005     return 1;
5006   }
5007 
5008   // This is for real time encoding.
5009   if (is_low_temp_var && !comp_pred && ref_frame != LAST_FRAME &&
5010       this_mode != NEARESTMV)
5011     return 1;
5012 
5013   // Check if this mode should be skipped because it is incompatible with the
5014   // current frame
5015   if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
5016     return 1;
5017   const int ret = inter_mode_search_order_independent_skip(
5018       cpi, x, args->mode_skip_mask, args->search_state,
5019       args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
5020   if (ret == 1) return 1;
5021   *(args->skip_motion_mode) = (ret == 2);
5022 
5023   // We've reached the first compound prediction mode, get stats from the
5024   // single reference predictors to help with pruning.
5025   // Disable this pruning logic if interpolation filter search was skipped for
5026   // single prediction modes as it can result in aggressive pruning of compound
5027   // prediction modes due to the absence of modelled_rd populated by
5028   // av1_interpolation_filter_search().
5029   // TODO(Remya): Check the impact of the sf
5030   // 'prune_comp_search_by_single_result' if compound prediction modes are
5031   // enabled in future for REALTIME encode.
5032   if (!sf->interp_sf.skip_interp_filter_search &&
5033       sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
5034       args->reach_first_comp_mode == 0) {
5035     analyze_single_states(cpi, args->search_state);
5036     args->reach_first_comp_mode = 1;
5037   }
5038 
5039   // Prune aggressively when best mode is skippable.
5040   int mul_fact = args->search_state->best_mode_skippable
5041                      ? args->mode_thresh_mul_fact
5042                      : (1 << MODE_THRESH_QBITS);
5043   int64_t mode_threshold =
5044       (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
5045       MODE_THRESH_QBITS;
5046 
5047   if (args->search_state->best_rd < mode_threshold) return 1;
5048 
5049   // Skip this compound mode based on the RD results from the single prediction
5050   // modes
5051   if (!sf->interp_sf.skip_interp_filter_search &&
5052       sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
5053     if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
5054                                        ref_frame, second_ref_frame, x))
5055       return 1;
5056   }
5057 
5058   if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
5059     // After we done with single reference modes, find the 2nd best RD
5060     // for a reference frame. Only search compound modes that have a reference
5061     // frame at least as good as the 2nd best.
5062     if (!args->prune_cpd_using_sr_stats_ready &&
5063         args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
5064       find_top_ref(ref_frame_rd);
5065       args->prune_cpd_using_sr_stats_ready = 1;
5066     }
5067     if (args->prune_cpd_using_sr_stats_ready &&
5068         !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
5069       return 1;
5070   }
5071 
5072   // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
5073   if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
5074       (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
5075     return 1;
5076   }
5077 
5078   if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
5079     if (compound_skip_using_neighbor_refs(
5080             xd, this_mode, ref_frames,
5081             sf->inter_sf.prune_ext_comp_using_neighbors))
5082       return 1;
5083   }
5084 
5085   if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
5086     if (skip_compound_using_best_single_mode_ref(
5087             this_mode, ref_frames, args->search_state->best_single_mode,
5088             sf->inter_sf.prune_comp_using_best_single_mode_ref))
5089       return 1;
5090   }
5091 
5092   if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
5093     const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
5094     if (skip_nearest_near_mv_using_refmv_weight(
5095             x, this_mode, ref_frame_type,
5096             args->search_state->best_mbmode.mode)) {
5097       // Ensure the mode is pruned only when the current block has obtained a
5098       // valid inter mode.
5099       assert(is_inter_mode(args->search_state->best_mbmode.mode));
5100       return 1;
5101     }
5102   }
5103 
5104   if (sf->rt_sf.prune_inter_modes_with_golden_ref &&
5105       ref_frame == GOLDEN_FRAME && !comp_pred) {
5106     const int subgop_size = AOMMIN(cpi->ppi->gf_group.size, FIXED_GF_INTERVAL);
5107     if (cpi->rc.frames_since_golden > (subgop_size >> 2) &&
5108         args->search_state->best_mbmode.ref_frame[0] != GOLDEN_FRAME) {
5109       if ((bsize > BLOCK_16X16 && this_mode == NEWMV) || this_mode == NEARMV)
5110         return 1;
5111     }
5112   }
5113 
5114   return 0;
5115 }
5116 
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)5117 static void record_best_compound(REFERENCE_MODE reference_mode,
5118                                  RD_STATS *rd_stats, int comp_pred, int rdmult,
5119                                  InterModeSearchState *search_state,
5120                                  int compmode_cost) {
5121   int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
5122 
5123   if (reference_mode == REFERENCE_MODE_SELECT) {
5124     single_rate = rd_stats->rate - compmode_cost;
5125     hybrid_rate = rd_stats->rate;
5126   } else {
5127     single_rate = rd_stats->rate;
5128     hybrid_rate = rd_stats->rate + compmode_cost;
5129   }
5130 
5131   single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
5132   hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
5133 
5134   if (!comp_pred) {
5135     if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
5136       search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
5137   } else {
5138     if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
5139       search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
5140   }
5141   if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
5142     search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
5143 }
5144 
5145 // Does a transform search over a list of the best inter mode candidates.
5146 // This is called if the original mode search computed an RD estimate
5147 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)5148 static void tx_search_best_inter_candidates(
5149     AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
5150     int64_t best_rd_so_far, BLOCK_SIZE bsize,
5151     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
5152     InterModeSearchState *search_state, RD_STATS *rd_cost,
5153     PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
5154   AV1_COMMON *const cm = &cpi->common;
5155   MACROBLOCKD *const xd = &x->e_mbd;
5156   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5157   const ModeCosts *mode_costs = &x->mode_costs;
5158   const int num_planes = av1_num_planes(cm);
5159   const int skip_ctx = av1_get_skip_txfm_context(xd);
5160   MB_MODE_INFO *const mbmi = xd->mi[0];
5161   InterModesInfo *inter_modes_info = x->inter_modes_info;
5162   inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
5163   search_state->best_rd = best_rd_so_far;
5164   search_state->best_mode_index = THR_INVALID;
5165   // Initialize best mode stats for winner mode processing
5166   x->winner_mode_count = 0;
5167   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5168                           NULL, bsize, best_rd_so_far,
5169                           cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5170   inter_modes_info->num =
5171       inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
5172           ? inter_modes_info->num
5173           : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
5174   const int64_t top_est_rd =
5175       inter_modes_info->num > 0
5176           ? inter_modes_info
5177                 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
5178           : INT64_MAX;
5179   *yrd = INT64_MAX;
5180   int64_t best_rd_in_this_partition = INT64_MAX;
5181   int num_inter_mode_cands = inter_modes_info->num;
5182   int newmv_mode_evaled = 0;
5183   int max_allowed_cands = INT_MAX;
5184   if (cpi->sf.inter_sf.limit_inter_mode_cands) {
5185     // The bound on the no. of inter mode candidates, beyond which the
5186     // candidates are limited if a newmv mode got evaluated, is set as
5187     // max_allowed_cands + 1.
5188     const int num_allowed_cands[5] = { INT_MAX, 10, 9, 6, 2 };
5189     assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 4);
5190     max_allowed_cands =
5191         num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
5192   }
5193 
5194   int num_mode_thresh = INT_MAX;
5195   if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
5196     // Bound the no. of transform searches per prediction mode beyond a
5197     // threshold.
5198     const int num_mode_thresh_ary[4] = { INT_MAX, 4, 3, 0 };
5199     assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 3);
5200     num_mode_thresh =
5201         num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
5202   }
5203 
5204   int num_tx_cands = 0;
5205   int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
5206   // Iterate over best inter mode candidates and perform tx search
5207   for (int j = 0; j < num_inter_mode_cands; ++j) {
5208     const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
5209     *mbmi = inter_modes_info->mbmi_arr[data_idx];
5210     const PREDICTION_MODE prediction_mode = mbmi->mode;
5211     int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
5212     if (curr_est_rd * 0.80 > top_est_rd) break;
5213 
5214     if (num_tx_cands > num_mode_thresh) {
5215       if ((prediction_mode != NEARESTMV &&
5216            num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 1) ||
5217           (prediction_mode == NEARESTMV &&
5218            num_tx_search_modes[prediction_mode - INTER_MODE_START] >= 2))
5219         continue;
5220     }
5221 
5222     txfm_info->skip_txfm = 0;
5223     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5224 
5225     // Select prediction reference frames.
5226     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
5227     for (int i = 0; i < num_planes; i++) {
5228       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
5229       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
5230     }
5231 
5232     bool is_predictor_built = false;
5233 
5234     // Initialize RD stats
5235     RD_STATS rd_stats;
5236     RD_STATS rd_stats_y;
5237     RD_STATS rd_stats_uv;
5238     const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
5239     int64_t skip_rd = INT64_MAX;
5240     const int txfm_rd_gate_level = get_txfm_rd_gate_level(
5241         cm->seq_params->enable_masked_compound,
5242         cpi->sf.inter_sf.txfm_rd_gate_level, bsize, TX_SEARCH_DEFAULT,
5243         /*eval_motion_mode=*/0);
5244     if (txfm_rd_gate_level) {
5245       // Check if the mode is good enough based on skip RD
5246       int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
5247       skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
5248       int eval_txfm = check_txfm_eval(x, bsize, search_state->best_skip_rd[0],
5249                                       skip_rd, txfm_rd_gate_level, 0);
5250       if (!eval_txfm) continue;
5251     }
5252 
5253     // Build the prediction for this mode
5254     if (!is_predictor_built) {
5255       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
5256                                     av1_num_planes(cm) - 1);
5257     }
5258     if (mbmi->motion_mode == OBMC_CAUSAL) {
5259       av1_build_obmc_inter_predictors_sb(cm, xd);
5260     }
5261 
5262     num_tx_cands++;
5263     if (have_newmv_in_inter_mode(prediction_mode)) newmv_mode_evaled = 1;
5264     num_tx_search_modes[prediction_mode - INTER_MODE_START]++;
5265     int64_t this_yrd = INT64_MAX;
5266     // Do the transform search
5267     if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
5268                          mode_rate, search_state->best_rd)) {
5269       continue;
5270     } else {
5271       const int y_rate =
5272           rd_stats.skip_txfm
5273               ? mode_costs->skip_txfm_cost[skip_ctx][1]
5274               : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
5275       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
5276 
5277       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
5278         inter_mode_data_push(
5279             tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
5280             rd_stats_y.rate + rd_stats_uv.rate +
5281                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
5282       }
5283     }
5284     rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
5285     if (rd_stats.rdcost < best_rd_in_this_partition) {
5286       best_rd_in_this_partition = rd_stats.rdcost;
5287       *yrd = this_yrd;
5288     }
5289 
5290     const THR_MODES mode_enum = get_prediction_mode_idx(
5291         prediction_mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
5292 
5293     // Collect mode stats for multiwinner mode processing
5294     const int txfm_search_done = 1;
5295     store_winner_mode_stats(
5296         &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
5297         NULL, bsize, rd_stats.rdcost,
5298         cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5299 
5300     if (rd_stats.rdcost < search_state->best_rd) {
5301       update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5302                           &rd_stats_uv, mode_enum, x, txfm_search_done);
5303       search_state->best_skip_rd[0] = skip_rd;
5304       // Limit the total number of modes to be evaluated if the first is valid
5305       // and transform skip or compound
5306       if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
5307         if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
5308           // Evaluate more candidates at high quantizers where occurrence of
5309           // transform skip is high.
5310           const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
5311           const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
5312           num_inter_mode_cands =
5313               AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
5314         } else if (!j && has_second_ref(&search_state->best_mbmode)) {
5315           const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
5316           // Evaluate more candidates at low quantizers where occurrence of
5317           // single reference mode is high.
5318           const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
5319                                                 { 10, 7, 5, 3 } };
5320           const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
5321           num_inter_mode_cands = AOMMIN(
5322               max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
5323         }
5324       }
5325     }
5326     // If the number of candidates evaluated exceeds max_allowed_cands, break if
5327     // a newmv mode was evaluated already.
5328     if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
5329   }
5330 }
5331 
5332 // Indicates number of winner simple translation modes to be used
5333 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
5334 
5335 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
5336 // speed feature. This list consists of modes that have only searched
5337 // SIMPLE_TRANSLATION. The final list will be used to search other motion
5338 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)5339 static void handle_winner_cand(
5340     MB_MODE_INFO *const mbmi,
5341     motion_mode_best_st_candidate *best_motion_mode_cands,
5342     int max_winner_motion_mode_cand, int64_t this_rd,
5343     motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
5344   // Number of current motion mode candidates in list
5345   const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
5346   int valid_motion_mode_cand_loc = num_motion_mode_cand;
5347 
5348   // find the best location to insert new motion mode candidate
5349   for (int j = 0; j < num_motion_mode_cand; j++) {
5350     if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
5351       valid_motion_mode_cand_loc = j;
5352       break;
5353     }
5354   }
5355 
5356   // Insert motion mode if location is found
5357   if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5358     if (num_motion_mode_cand > 0 &&
5359         valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5360       memmove(
5361           &best_motion_mode_cands
5362                ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5363           &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5364           (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5365            valid_motion_mode_cand_loc) *
5366               sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5367     motion_mode_cand->mbmi = *mbmi;
5368     motion_mode_cand->rd_cost = this_rd;
5369     motion_mode_cand->skip_motion_mode = skip_motion_mode;
5370     best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5371         *motion_mode_cand;
5372     best_motion_mode_cands->num_motion_mode_cand =
5373         AOMMIN(max_winner_motion_mode_cand,
5374                best_motion_mode_cands->num_motion_mode_cand + 1);
5375   }
5376 }
5377 
5378 /*!\brief Search intra modes in interframes
5379  *
5380  * \ingroup intra_mode_search
5381  *
5382  * This function searches for the best intra mode when the current frame is an
5383  * interframe. This function however does *not* handle luma palette mode.
5384  * Palette mode is currently handled by \ref av1_search_palette_mode.
5385  *
5386  * This function will first iterate through the luma mode candidates to find the
5387  * best luma intra mode. Once the best luma mode it's found, it will then search
5388  * for the best chroma mode. Because palette mode is currently not handled by
5389  * here, a cache of uv mode is stored in
5390  * InterModeSearchState::intra_search_state so it can be reused later by \ref
5391  * av1_search_palette_mode.
5392  *
5393  * \param[in,out] search_state      Struct keep track of the prediction mode
5394  *                                  search state in interframe.
5395  *
5396  * \param[in]     cpi               Top-level encoder structure.
5397  * \param[in,out] x                 Pointer to struct holding all the data for
5398  *                                  the current prediction block.
5399  * \param[out]    rd_cost           Stores the best rd_cost among all the
5400  *                                  prediction modes searched.
5401  * \param[in]     bsize             Current block size.
5402  * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5403  *                                  copy the tx_type and txfm_skip arrays.
5404  *                                  for only the Y plane.
5405  * \param[in]     sf_args           Stores the list of intra mode candidates
5406  *                                  to be searched.
5407  * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5408  *                                      current ref frame is an intra frame.
5409  * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5410  *                                  terminate chroma intra mode search.
5411  *
5412  * \remark If a new best mode is found, search_state and rd_costs are updated
5413  * correspondingly. While x is also modified, it is only used as a temporary
5414  * buffer, and the final decisions are stored in search_state.
5415  */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,const InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5416 static AOM_INLINE void search_intra_modes_in_interframe(
5417     InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5418     RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5419     const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5420     int64_t yrd_threshold) {
5421   const AV1_COMMON *const cm = &cpi->common;
5422   const SPEED_FEATURES *const sf = &cpi->sf;
5423   const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
5424   MACROBLOCKD *const xd = &x->e_mbd;
5425   MB_MODE_INFO *const mbmi = xd->mi[0];
5426   IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5427 
5428   int is_best_y_mode_intra = 0;
5429   RD_STATS best_intra_rd_stats_y;
5430   int64_t best_rd_y = INT64_MAX;
5431   int best_mode_cost_y = -1;
5432   MB_MODE_INFO best_mbmi = *xd->mi[0];
5433   THR_MODES best_mode_enum = THR_INVALID;
5434   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5435   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5436   const int num_4x4 = bsize_to_num_blk(bsize);
5437 
5438   // Performs luma search
5439   int64_t best_model_rd = INT64_MAX;
5440   int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5441   for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5442     top_intra_model_rd[i] = INT64_MAX;
5443   }
5444   for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5445     if (sf->intra_sf.skip_intra_in_interframe &&
5446         search_state->intra_search_state.skip_intra_modes)
5447       break;
5448     set_y_mode_and_delta_angle(
5449         mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
5450     assert(mbmi->mode < INTRA_MODE_END);
5451 
5452     // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5453     if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5454       continue;
5455 
5456     const THR_MODES mode_enum =
5457         get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5458     if ((!intra_mode_cfg->enable_smooth_intra ||
5459          cpi->sf.intra_sf.disable_smooth_intra) &&
5460         (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5461          mbmi->mode == SMOOTH_V_PRED))
5462       continue;
5463     if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
5464       continue;
5465     if (av1_is_directional_mode(mbmi->mode) &&
5466         !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
5467         mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5468       continue;
5469     const PREDICTION_MODE this_mode = mbmi->mode;
5470 
5471     assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5472     assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5473     init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5474     x->txfm_search_info.skip_txfm = 0;
5475 
5476     if (this_mode != DC_PRED) {
5477       // Only search the oblique modes if the best so far is
5478       // one of the neighboring directional modes
5479       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5480           (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5481         if (search_state->best_mode_index != THR_INVALID &&
5482             search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5483           continue;
5484       }
5485       if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5486         if (conditional_skipintra(
5487                 this_mode, search_state->intra_search_state.best_intra_mode))
5488           continue;
5489       }
5490     }
5491 
5492     RD_STATS intra_rd_stats_y;
5493     int mode_cost_y;
5494     int64_t intra_rd_y = INT64_MAX;
5495     const int is_luma_result_valid = av1_handle_intra_y_mode(
5496         intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5497         &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5498         &best_model_rd, top_intra_model_rd);
5499     if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5500       is_best_y_mode_intra = 1;
5501       if (intra_rd_y < best_rd_y) {
5502         best_intra_rd_stats_y = intra_rd_stats_y;
5503         best_mode_cost_y = mode_cost_y;
5504         best_rd_y = intra_rd_y;
5505         best_mbmi = *mbmi;
5506         best_mode_enum = mode_enum;
5507         memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5508                sizeof(best_blk_skip[0]) * num_4x4);
5509         av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5510       }
5511     }
5512   }
5513 
5514   if (!is_best_y_mode_intra) {
5515     return;
5516   }
5517 
5518   assert(best_rd_y < INT64_MAX);
5519 
5520   // Restores the best luma mode
5521   *mbmi = best_mbmi;
5522   memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5523          sizeof(best_blk_skip[0]) * num_4x4);
5524   av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5525 
5526   // Performs chroma search
5527   RD_STATS intra_rd_stats, intra_rd_stats_uv;
5528   av1_init_rd_stats(&intra_rd_stats);
5529   av1_init_rd_stats(&intra_rd_stats_uv);
5530   const int num_planes = av1_num_planes(cm);
5531   if (num_planes > 1) {
5532     const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5533         intra_search_state, cpi, x, bsize, &intra_rd_stats,
5534         &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5535 
5536     if (!intra_uv_mode_valid) {
5537       return;
5538     }
5539   }
5540 
5541   // Merge the luma and chroma rd stats
5542   assert(best_mode_cost_y >= 0);
5543   intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5544   if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5545     // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5546     // in the tokenonly rate, but for intra blocks, tx_size is always coded
5547     // (prediction granularity), so we account for it in the full rate,
5548     // not the tokenonly rate.
5549     best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5550   }
5551 
5552   const ModeCosts *mode_costs = &x->mode_costs;
5553   const PREDICTION_MODE mode = mbmi->mode;
5554   if (num_planes > 1 && xd->is_chroma_ref) {
5555     const int uv_mode_cost =
5556         mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5557     intra_rd_stats.rate +=
5558         intra_rd_stats_uv.rate +
5559         intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5560   }
5561 
5562   // Intra block is always coded as non-skip
5563   intra_rd_stats.skip_txfm = 0;
5564   intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5565   // Add in the cost of the no skip flag.
5566   const int skip_ctx = av1_get_skip_txfm_context(xd);
5567   intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5568   // Calculate the final RD estimate for this mode.
5569   const int64_t this_rd =
5570       RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5571   // Keep record of best intra rd
5572   if (this_rd < search_state->best_intra_rd) {
5573     search_state->best_intra_rd = this_rd;
5574     intra_search_state->best_intra_mode = mode;
5575   }
5576 
5577   for (int i = 0; i < REFERENCE_MODES; ++i) {
5578     search_state->best_pred_rd[i] =
5579         AOMMIN(search_state->best_pred_rd[i], this_rd);
5580   }
5581 
5582   intra_rd_stats.rdcost = this_rd;
5583 
5584   // Collect mode stats for multiwinner mode processing
5585   const int txfm_search_done = 1;
5586   store_winner_mode_stats(
5587       &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5588       &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5589       cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5590   if (intra_rd_stats.rdcost < search_state->best_rd) {
5591     update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5592                         &best_intra_rd_stats_y, &intra_rd_stats_uv,
5593                         best_mode_enum, x, txfm_search_done);
5594   }
5595 }
5596 
5597 #if !CONFIG_REALTIME_ONLY
5598 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5599 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5600 static AOM_INLINE void calculate_cost_from_tpl_data(
5601     const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
5602     int mi_col, int64_t *inter_cost, int64_t *intra_cost) {
5603   const AV1_COMMON *const cm = &cpi->common;
5604   // Only consider full SB.
5605   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5606   const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5607   const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5608                   (block_size_high[sb_size] / tpl_bsize_1d);
5609   SuperBlockEnc *sb_enc = &x->sb_enc;
5610   if (sb_enc->tpl_data_count == len) {
5611     const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5612     const int tpl_stride = sb_enc->tpl_stride;
5613     const int tplw = mi_size_wide[tpl_bsize];
5614     const int tplh = mi_size_high[tpl_bsize];
5615     const int nw = mi_size_wide[bsize] / tplw;
5616     const int nh = mi_size_high[bsize] / tplh;
5617     if (nw >= 1 && nh >= 1) {
5618       const int of_h = mi_row % mi_size_high[sb_size];
5619       const int of_w = mi_col % mi_size_wide[sb_size];
5620       const int start = of_h / tplh * tpl_stride + of_w / tplw;
5621 
5622       for (int k = 0; k < nh; k++) {
5623         for (int l = 0; l < nw; l++) {
5624           *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5625           *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5626         }
5627       }
5628       *inter_cost /= nw * nh;
5629       *intra_cost /= nw * nh;
5630     }
5631   }
5632 }
5633 #endif  // !CONFIG_REALTIME_ONLY
5634 
5635 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5636 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,const SPEED_FEATURES * const sf,int64_t inter_cost,int64_t intra_cost)5637 static AOM_INLINE void skip_intra_modes_in_interframe(
5638     AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5639     InterModeSearchState *search_state, const SPEED_FEATURES *const sf,
5640     int64_t inter_cost, int64_t intra_cost) {
5641   MACROBLOCKD *const xd = &x->e_mbd;
5642   const int comp_pred = search_state->best_mbmode.ref_frame[1] > INTRA_FRAME;
5643   if (sf->rt_sf.prune_intra_mode_based_on_mv_range &&
5644       bsize > sf->part_sf.max_intra_bsize && !comp_pred) {
5645     const MV best_mv = search_state->best_mbmode.mv[0].as_mv;
5646     const int mv_thresh = 16 << sf->rt_sf.prune_intra_mode_based_on_mv_range;
5647     if (abs(best_mv.row) < mv_thresh && abs(best_mv.col) < mv_thresh &&
5648         x->source_variance > 128) {
5649       search_state->intra_search_state.skip_intra_modes = 1;
5650       return;
5651     }
5652   }
5653 
5654   const unsigned int src_var_thresh_intra_skip = 1;
5655   const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5656   if (!(skip_intra_in_interframe &&
5657         (x->source_variance > src_var_thresh_intra_skip)))
5658     return;
5659 
5660   // Prune intra search based on best inter mode being transfrom skip.
5661   if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5662     const int qindex_thresh[2] = { 200, MAXQ };
5663     const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5664     if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5665         (x->qindex <= qindex_thresh[ind])) {
5666       search_state->intra_search_state.skip_intra_modes = 1;
5667       return;
5668     } else if ((skip_intra_in_interframe >= 4) &&
5669                (inter_cost < 0 || intra_cost < 0)) {
5670       search_state->intra_search_state.skip_intra_modes = 1;
5671       return;
5672     }
5673   }
5674   // Use ML model to prune intra search.
5675   if (inter_cost >= 0 && intra_cost >= 0) {
5676     const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5677                                      ? &av1_intrap_nn_config
5678                                      : &av1_intrap_hd_nn_config;
5679     float nn_features[6];
5680     float scores[2] = { 0.0f };
5681 
5682     nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5683     nn_features[1] = (float)mi_size_wide_log2[bsize];
5684     nn_features[2] = (float)mi_size_high_log2[bsize];
5685     nn_features[3] = (float)intra_cost;
5686     nn_features[4] = (float)inter_cost;
5687     const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5688     const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5689     nn_features[5] = (float)(ac_q_max / ac_q);
5690 
5691     av1_nn_predict(nn_features, nn_config, 1, scores);
5692 
5693     // For two parameters, the max prob returned from av1_nn_softmax equals
5694     // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5695     // calling of av1_nn_softmax.
5696     const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5697     assert(skip_intra_in_interframe <= 5);
5698     if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5699       search_state->intra_search_state.skip_intra_modes = 1;
5700     }
5701   }
5702 }
5703 
skip_interp_filter_search(const AV1_COMP * cpi,int is_single_pred)5704 static AOM_INLINE bool skip_interp_filter_search(const AV1_COMP *cpi,
5705                                                  int is_single_pred) {
5706   const MODE encoding_mode = cpi->oxcf.mode;
5707   if (encoding_mode == REALTIME) {
5708     return (cpi->common.current_frame.reference_mode == SINGLE_REFERENCE &&
5709             (cpi->sf.interp_sf.skip_interp_filter_search ||
5710              cpi->sf.winner_mode_sf.winner_mode_ifs));
5711   } else if (encoding_mode == GOOD) {
5712     // Skip interpolation filter search for single prediction modes.
5713     return (cpi->sf.interp_sf.skip_interp_filter_search && is_single_pred);
5714   }
5715   return false;
5716 }
5717 
get_block_temp_var(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)5718 static AOM_INLINE int get_block_temp_var(const AV1_COMP *cpi,
5719                                          const MACROBLOCK *x,
5720                                          BLOCK_SIZE bsize) {
5721   const AV1_COMMON *const cm = &cpi->common;
5722   const SPEED_FEATURES *const sf = &cpi->sf;
5723 
5724   if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION ||
5725       !sf->rt_sf.short_circuit_low_temp_var ||
5726       !sf->rt_sf.prune_inter_modes_using_temp_var) {
5727     return 0;
5728   }
5729 
5730   const int mi_row = x->e_mbd.mi_row;
5731   const int mi_col = x->e_mbd.mi_col;
5732   int is_low_temp_var = 0;
5733 
5734   if (cm->seq_params->sb_size == BLOCK_64X64)
5735     is_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
5736         &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5737   else
5738     is_low_temp_var = av1_get_force_skip_low_temp_var(
5739         &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
5740 
5741   return is_low_temp_var;
5742 }
5743 
5744 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5745 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5746                             struct macroblock *x, struct RD_STATS *rd_cost,
5747                             BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5748                             int64_t best_rd_so_far) {
5749   AV1_COMMON *const cm = &cpi->common;
5750   const FeatureFlags *const features = &cm->features;
5751   const int num_planes = av1_num_planes(cm);
5752   const SPEED_FEATURES *const sf = &cpi->sf;
5753   MACROBLOCKD *const xd = &x->e_mbd;
5754   MB_MODE_INFO *const mbmi = xd->mi[0];
5755   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5756   int i;
5757   const ModeCosts *mode_costs = &x->mode_costs;
5758   const int *comp_inter_cost =
5759       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5760 
5761   InterModeSearchState search_state;
5762   init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5763   INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5764     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5765     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5766   };
5767   HandleInterModeArgs args = { { NULL },
5768                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5769                                { NULL },
5770                                { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5771                                  MAX_SB_SIZE >> 1 },
5772                                NULL,
5773                                NULL,
5774                                NULL,
5775                                search_state.modelled_rd,
5776                                INT_MAX,
5777                                INT_MAX,
5778                                search_state.simple_rd,
5779                                0,
5780                                false,
5781                                interintra_modes,
5782                                { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5783                                { { 0, 0 } },
5784                                { 0 },
5785                                0,
5786                                0,
5787                                -1,
5788                                -1,
5789                                -1,
5790                                { 0 },
5791                                { 0 },
5792                                UINT_MAX };
5793   // Currently, is_low_temp_var is used in real time encoding.
5794   const int is_low_temp_var = get_block_temp_var(cpi, x, bsize);
5795 
5796   for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5797   // Indicates the appropriate number of simple translation winner modes for
5798   // exhaustive motion mode evaluation
5799   const int max_winner_motion_mode_cand =
5800       num_winner_motion_modes[sf->winner_mode_sf.motion_mode_for_winner_cand];
5801   assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5802   motion_mode_candidate motion_mode_cand;
5803   motion_mode_best_st_candidate best_motion_mode_cands;
5804   // Initializing the number of motion mode candidates to zero.
5805   best_motion_mode_cands.num_motion_mode_cand = 0;
5806   for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5807     best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5808 
5809   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5810 
5811   av1_invalid_rd_stats(rd_cost);
5812 
5813   for (i = 0; i < REF_FRAMES; ++i) {
5814     x->warp_sample_info[i].num = -1;
5815   }
5816 
5817   // Ref frames that are selected by square partition blocks.
5818   int picked_ref_frames_mask = 0;
5819   if (sf->inter_sf.prune_ref_frame_for_rect_partitions &&
5820       mbmi->partition != PARTITION_NONE) {
5821     // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5822     // partition blocks. prune_ref_frame_for_rect_partitions >=2
5823     // implies prune for vert, horiz and extended partition blocks.
5824     if ((mbmi->partition != PARTITION_VERT &&
5825          mbmi->partition != PARTITION_HORZ) ||
5826         sf->inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5827       picked_ref_frames_mask =
5828           fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5829     }
5830   }
5831 
5832 #if CONFIG_COLLECT_COMPONENT_TIMING
5833   start_timing(cpi, set_params_rd_pick_inter_mode_time);
5834 #endif
5835   // Skip ref frames that never selected by square blocks.
5836   const int skip_ref_frame_mask =
5837       picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5838   mode_skip_mask_t mode_skip_mask;
5839   unsigned int ref_costs_single[REF_FRAMES];
5840   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5841   struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5842   // init params, set frame modes, speed features
5843   set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5844                                 skip_ref_frame_mask, ref_costs_single,
5845                                 ref_costs_comp, yv12_mb);
5846 #if CONFIG_COLLECT_COMPONENT_TIMING
5847   end_timing(cpi, set_params_rd_pick_inter_mode_time);
5848 #endif
5849 
5850   int64_t best_est_rd = INT64_MAX;
5851   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5852   // If do_tx_search is 0, only estimated RD should be computed.
5853   // If do_tx_search is 1, all modes have TX search performed.
5854   const int do_tx_search =
5855       !((sf->inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5856         (sf->inter_sf.inter_mode_rd_model_estimation == 2 &&
5857          num_pels_log2_lookup[bsize] > 8));
5858   InterModesInfo *inter_modes_info = x->inter_modes_info;
5859   inter_modes_info->num = 0;
5860 
5861   // Temporary buffers used by handle_inter_mode().
5862   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5863 
5864   // The best RD found for the reference frame, among single reference modes.
5865   // Note that the 0-th element will contain a cut-off that is later used
5866   // to determine if we should skip a compound mode.
5867   int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5868                                        INT64_MAX, INT64_MAX, INT64_MAX,
5869                                        INT64_MAX, INT64_MAX };
5870 
5871   // Prepared stats used later to check if we could skip intra mode eval.
5872   int64_t inter_cost = -1;
5873   int64_t intra_cost = -1;
5874   // Need to tweak the threshold for hdres speed 0 & 1.
5875   const int mi_row = xd->mi_row;
5876   const int mi_col = xd->mi_col;
5877 
5878   // Obtain the relevant tpl stats for pruning inter modes
5879   PruneInfoFromTpl inter_cost_info_from_tpl;
5880 #if !CONFIG_REALTIME_ONLY
5881   if (sf->inter_sf.prune_inter_modes_based_on_tpl) {
5882     // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5883     // prune_ref_by_selective_ref_frame()
5884     // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
5885     // prune_ref_by_selective_ref_frame()
5886     // Populating valid_refs[idx] = 1 ensures that
5887     // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5888     // pruned ref frame.
5889     int valid_refs[INTER_REFS_PER_FRAME];
5890     for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5891       const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5892       valid_refs[frame - 1] =
5893           x->tpl_keep_ref_frame[frame] ||
5894           !prune_ref_by_selective_ref_frame(
5895               cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5896     }
5897     av1_zero(inter_cost_info_from_tpl);
5898     get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5899                               &inter_cost_info_from_tpl);
5900   }
5901 
5902   const int do_pruning =
5903       (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5904   if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5905       cpi->oxcf.algo_cfg.enable_tpl_model)
5906     calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5907                                  &intra_cost);
5908 #endif  // !CONFIG_REALTIME_ONLY
5909 
5910   // Initialize best mode stats for winner mode processing.
5911   const int max_winner_mode_count =
5912       winner_mode_count_allowed[sf->winner_mode_sf.multi_winner_mode_type];
5913   zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
5914   x->winner_mode_count = 0;
5915   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5916                           NULL, bsize, best_rd_so_far,
5917                           sf->winner_mode_sf.multi_winner_mode_type, 0);
5918 
5919   int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5920   if (sf->inter_sf.prune_inter_modes_if_skippable) {
5921     // Higher multiplication factor values for lower quantizers.
5922     mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5923   }
5924 
5925   // Initialize arguments for mode loop speed features
5926   InterModeSFArgs sf_args = { &args.skip_motion_mode,
5927                               &mode_skip_mask,
5928                               &search_state,
5929                               skip_ref_frame_mask,
5930                               0,
5931                               mode_thresh_mul_fact,
5932                               0,
5933                               0 };
5934   int64_t best_inter_yrd = INT64_MAX;
5935 
5936   // This is the main loop of this function. It loops over all possible inter
5937   // modes and calls handle_inter_mode() to compute the RD for each.
5938   // Here midx is just an iterator index that should not be used by itself
5939   // except to keep track of the number of modes searched. It should be used
5940   // with av1_default_mode_order to get the enum that defines the mode, which
5941   // can be used with av1_mode_defs to get the prediction mode and the ref
5942   // frames.
5943   // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5944   // good speedup for real time case. If we decide to use compound mode in real
5945   // time, maybe we can modify av1_default_mode_order table.
5946   THR_MODES mode_start = THR_INTER_MODE_START;
5947   THR_MODES mode_end = THR_INTER_MODE_END;
5948   const CurrentFrame *const current_frame = &cm->current_frame;
5949   if (current_frame->reference_mode == SINGLE_REFERENCE) {
5950     mode_start = SINGLE_REF_MODE_START;
5951     mode_end = SINGLE_REF_MODE_END;
5952   }
5953 
5954   for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5955     // Get the actual prediction mode we are trying in this iteration
5956     const THR_MODES mode_enum = av1_default_mode_order[midx];
5957     const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5958     const PREDICTION_MODE this_mode = mode_def->mode;
5959     const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5960 
5961     const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5962     const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5963     const int is_single_pred =
5964         ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5965     const int comp_pred = second_ref_frame > INTRA_FRAME;
5966 
5967     init_mbmi(mbmi, this_mode, ref_frames, cm);
5968 
5969     txfm_info->skip_txfm = 0;
5970     sf_args.num_single_modes_processed += is_single_pred;
5971     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5972 #if CONFIG_COLLECT_COMPONENT_TIMING
5973     start_timing(cpi, skip_inter_mode_time);
5974 #endif
5975     // Apply speed features to decide if this inter mode can be skipped
5976     const int is_skip_inter_mode = skip_inter_mode(
5977         cpi, x, bsize, ref_frame_rd, midx, &sf_args, is_low_temp_var);
5978 #if CONFIG_COLLECT_COMPONENT_TIMING
5979     end_timing(cpi, skip_inter_mode_time);
5980 #endif
5981     if (is_skip_inter_mode) continue;
5982 
5983     // Select prediction reference frames.
5984     for (i = 0; i < num_planes; i++) {
5985       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5986       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5987     }
5988 
5989     mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5990     mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5991     mbmi->filter_intra_mode_info.use_filter_intra = 0;
5992     mbmi->ref_mv_idx = 0;
5993 
5994     const int64_t ref_best_rd = search_state.best_rd;
5995     RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5996     av1_init_rd_stats(&rd_stats);
5997 
5998     const int ref_frame_cost = comp_pred
5999                                    ? ref_costs_comp[ref_frame][second_ref_frame]
6000                                    : ref_costs_single[ref_frame];
6001     const int compmode_cost =
6002         is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
6003     const int real_compmode_cost =
6004         cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
6005             ? compmode_cost
6006             : 0;
6007     // Point to variables that are maintained between loop iterations
6008     args.single_newmv = search_state.single_newmv;
6009     args.single_newmv_rate = search_state.single_newmv_rate;
6010     args.single_newmv_valid = search_state.single_newmv_valid;
6011     args.single_comp_cost = real_compmode_cost;
6012     args.ref_frame_cost = ref_frame_cost;
6013     args.best_pred_sse = search_state.best_pred_sse;
6014     args.skip_ifs = skip_interp_filter_search(cpi, is_single_pred);
6015 
6016     int64_t skip_rd[2] = { search_state.best_skip_rd[0],
6017                            search_state.best_skip_rd[1] };
6018     int64_t this_yrd = INT64_MAX;
6019 #if CONFIG_COLLECT_COMPONENT_TIMING
6020     start_timing(cpi, handle_inter_mode_time);
6021 #endif
6022     int64_t this_rd = handle_inter_mode(
6023         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
6024         ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
6025         inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
6026         &this_yrd);
6027 #if CONFIG_COLLECT_COMPONENT_TIMING
6028     end_timing(cpi, handle_inter_mode_time);
6029 #endif
6030     if (current_frame->reference_mode != SINGLE_REFERENCE) {
6031       if (!args.skip_ifs &&
6032           sf->inter_sf.prune_comp_search_by_single_result > 0 &&
6033           is_inter_singleref_mode(this_mode)) {
6034         collect_single_states(x, &search_state, mbmi);
6035       }
6036 
6037       if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
6038           is_inter_singleref_mode(this_mode))
6039         update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
6040     }
6041 
6042     if (this_rd == INT64_MAX) continue;
6043 
6044     if (mbmi->skip_txfm) {
6045       rd_stats_y.rate = 0;
6046       rd_stats_uv.rate = 0;
6047     }
6048 
6049     if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
6050         this_rd < ref_frame_rd[ref_frame]) {
6051       ref_frame_rd[ref_frame] = this_rd;
6052     }
6053 
6054     // Did this mode help, i.e., is it the new best mode
6055     if (this_rd < search_state.best_rd) {
6056       assert(IMPLIES(comp_pred,
6057                      cm->current_frame.reference_mode != SINGLE_REFERENCE));
6058       search_state.best_pred_sse = x->pred_sse[ref_frame];
6059       best_inter_yrd = this_yrd;
6060       update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
6061                           &rd_stats_uv, mode_enum, x, do_tx_search);
6062       if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
6063       // skip_rd[0] is the best total rd for a skip mode so far.
6064       // skip_rd[1] is the best total rd for a skip mode so far in luma.
6065       // When do_tx_search = 1, both skip_rd[0] and skip_rd[1] are updated.
6066       // When do_tx_search = 0, skip_rd[1] is updated.
6067       search_state.best_skip_rd[1] = skip_rd[1];
6068     }
6069     if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6070       // Add this mode to motion mode candidate list for motion mode search
6071       // if using motion_mode_for_winner_cand speed feature
6072       handle_winner_cand(mbmi, &best_motion_mode_cands,
6073                          max_winner_motion_mode_cand, this_rd,
6074                          &motion_mode_cand, args.skip_motion_mode);
6075     }
6076 
6077     /* keep record of best compound/single-only prediction */
6078     record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
6079                          x->rdmult, &search_state, compmode_cost);
6080   }
6081 
6082 #if CONFIG_COLLECT_COMPONENT_TIMING
6083   start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6084 #endif
6085   if (sf->winner_mode_sf.motion_mode_for_winner_cand) {
6086     // For the single ref winner candidates, evaluate other motion modes (non
6087     // simple translation).
6088     evaluate_motion_mode_for_winner_candidates(
6089         cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
6090         &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
6091         &search_state, &best_inter_yrd);
6092   }
6093 #if CONFIG_COLLECT_COMPONENT_TIMING
6094   end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
6095 #endif
6096 
6097 #if CONFIG_COLLECT_COMPONENT_TIMING
6098   start_timing(cpi, do_tx_search_time);
6099 #endif
6100   if (do_tx_search != 1) {
6101     // A full tx search has not yet been done, do tx search for
6102     // top mode candidates
6103     tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
6104                                     yv12_mb, mi_row, mi_col, &search_state,
6105                                     rd_cost, ctx, &best_inter_yrd);
6106   }
6107 #if CONFIG_COLLECT_COMPONENT_TIMING
6108   end_timing(cpi, do_tx_search_time);
6109 #endif
6110 
6111 #if CONFIG_COLLECT_COMPONENT_TIMING
6112   start_timing(cpi, handle_intra_mode_time);
6113 #endif
6114   // Gate intra mode evaluation if best of inter is skip except when source
6115   // variance is extremely low and also based on max intra bsize.
6116   skip_intra_modes_in_interframe(cm, x, bsize, &search_state, sf, inter_cost,
6117                                  intra_cost);
6118 
6119   const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
6120   search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
6121                                    &sf_args, intra_ref_frame_cost,
6122                                    best_inter_yrd);
6123 #if CONFIG_COLLECT_COMPONENT_TIMING
6124   end_timing(cpi, handle_intra_mode_time);
6125 #endif
6126 
6127 #if CONFIG_COLLECT_COMPONENT_TIMING
6128   start_timing(cpi, refine_winner_mode_tx_time);
6129 #endif
6130   int winner_mode_count =
6131       sf->winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
6132   // In effect only when fast tx search speed features are enabled.
6133   refine_winner_mode_tx(
6134       cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
6135       &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
6136       search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
6137 #if CONFIG_COLLECT_COMPONENT_TIMING
6138   end_timing(cpi, refine_winner_mode_tx_time);
6139 #endif
6140 
6141   // Initialize default mode evaluation params
6142   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
6143 
6144   // Only try palette mode when the best mode so far is an intra mode.
6145   const int try_palette =
6146       cpi->oxcf.tool_cfg.enable_palette &&
6147       av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
6148       !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
6149   RD_STATS this_rd_cost;
6150   int this_skippable = 0;
6151   if (try_palette) {
6152 #if CONFIG_COLLECT_COMPONENT_TIMING
6153     start_timing(cpi, av1_search_palette_mode_time);
6154 #endif
6155     this_skippable = av1_search_palette_mode(
6156         &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
6157         ctx, &this_rd_cost, search_state.best_rd);
6158 #if CONFIG_COLLECT_COMPONENT_TIMING
6159     end_timing(cpi, av1_search_palette_mode_time);
6160 #endif
6161     if (this_rd_cost.rdcost < search_state.best_rd) {
6162       search_state.best_mode_index = THR_DC;
6163       mbmi->mv[0].as_int = 0;
6164       rd_cost->rate = this_rd_cost.rate;
6165       rd_cost->dist = this_rd_cost.dist;
6166       rd_cost->rdcost = this_rd_cost.rdcost;
6167       search_state.best_rd = rd_cost->rdcost;
6168       search_state.best_mbmode = *mbmi;
6169       search_state.best_skip2 = 0;
6170       search_state.best_mode_skippable = this_skippable;
6171       memcpy(ctx->blk_skip, txfm_info->blk_skip,
6172              sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
6173       av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
6174     }
6175   }
6176 
6177   search_state.best_mbmode.skip_mode = 0;
6178   if (cm->current_frame.skip_mode_info.skip_mode_flag &&
6179       is_comp_ref_allowed(bsize)) {
6180     const struct segmentation *const seg = &cm->seg;
6181     unsigned char segment_id = mbmi->segment_id;
6182     if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
6183       rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
6184     }
6185   }
6186 
6187   // Make sure that the ref_mv_idx is only nonzero when we're
6188   // using a mode which can support ref_mv_idx
6189   if (search_state.best_mbmode.ref_mv_idx != 0 &&
6190       !(search_state.best_mbmode.mode == NEWMV ||
6191         search_state.best_mbmode.mode == NEW_NEWMV ||
6192         have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
6193     search_state.best_mbmode.ref_mv_idx = 0;
6194   }
6195 
6196   if (search_state.best_mode_index == THR_INVALID ||
6197       search_state.best_rd >= best_rd_so_far) {
6198     rd_cost->rate = INT_MAX;
6199     rd_cost->rdcost = INT64_MAX;
6200     return;
6201   }
6202 
6203   const InterpFilter interp_filter = features->interp_filter;
6204   assert((interp_filter == SWITCHABLE) ||
6205          (interp_filter ==
6206           search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
6207          !is_inter_block(&search_state.best_mbmode));
6208   assert((interp_filter == SWITCHABLE) ||
6209          (interp_filter ==
6210           search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
6211          !is_inter_block(&search_state.best_mbmode));
6212 
6213   if (!cpi->rc.is_src_frame_alt_ref && sf->inter_sf.adaptive_rd_thresh) {
6214     av1_update_rd_thresh_fact(
6215         cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
6216         search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
6217   }
6218 
6219   // macroblock modes
6220   *mbmi = search_state.best_mbmode;
6221   txfm_info->skip_txfm |= search_state.best_skip2;
6222 
6223   // Note: this section is needed since the mode may have been forced to
6224   // GLOBALMV by the all-zero mode handling of ref-mv.
6225   if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
6226     // Correct the interp filters for GLOBALMV
6227     if (is_nontrans_global_motion(xd, xd->mi[0])) {
6228       int_interpfilters filters =
6229           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
6230       assert(mbmi->interp_filters.as_int == filters.as_int);
6231       (void)filters;
6232     }
6233   }
6234 
6235   txfm_info->skip_txfm |= search_state.best_mode_skippable;
6236 
6237   assert(search_state.best_mode_index != THR_INVALID);
6238 
6239 #if CONFIG_INTERNAL_STATS
6240   store_coding_context(x, ctx, search_state.best_mode_index,
6241                        search_state.best_mode_skippable);
6242 #else
6243   store_coding_context(x, ctx, search_state.best_mode_skippable);
6244 #endif  // CONFIG_INTERNAL_STATS
6245 
6246   if (mbmi->palette_mode_info.palette_size[1] > 0) {
6247     assert(try_palette);
6248     av1_restore_uv_color_map(cpi, x);
6249   }
6250 }
6251 
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)6252 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
6253                                         TileDataEnc *tile_data, MACROBLOCK *x,
6254                                         int mi_row, int mi_col,
6255                                         RD_STATS *rd_cost, BLOCK_SIZE bsize,
6256                                         PICK_MODE_CONTEXT *ctx,
6257                                         int64_t best_rd_so_far) {
6258   const AV1_COMMON *const cm = &cpi->common;
6259   const FeatureFlags *const features = &cm->features;
6260   MACROBLOCKD *const xd = &x->e_mbd;
6261   MB_MODE_INFO *const mbmi = xd->mi[0];
6262   unsigned char segment_id = mbmi->segment_id;
6263   const int comp_pred = 0;
6264   int i;
6265   unsigned int ref_costs_single[REF_FRAMES];
6266   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
6267   const ModeCosts *mode_costs = &x->mode_costs;
6268   const int *comp_inter_cost =
6269       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
6270   InterpFilter best_filter = SWITCHABLE;
6271   int64_t this_rd = INT64_MAX;
6272   int rate2 = 0;
6273   const int64_t distortion2 = 0;
6274   (void)mi_row;
6275   (void)mi_col;
6276   (void)tile_data;
6277 
6278   av1_collect_neighbors_ref_counts(xd);
6279 
6280   estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
6281                            ref_costs_comp);
6282 
6283   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
6284   for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
6285 
6286   rd_cost->rate = INT_MAX;
6287 
6288   assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
6289 
6290   mbmi->palette_mode_info.palette_size[0] = 0;
6291   mbmi->palette_mode_info.palette_size[1] = 0;
6292   mbmi->filter_intra_mode_info.use_filter_intra = 0;
6293   mbmi->mode = GLOBALMV;
6294   mbmi->motion_mode = SIMPLE_TRANSLATION;
6295   mbmi->uv_mode = UV_DC_PRED;
6296   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
6297     mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6298   else
6299     mbmi->ref_frame[0] = LAST_FRAME;
6300   mbmi->ref_frame[1] = NONE_FRAME;
6301   mbmi->mv[0].as_int =
6302       gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
6303                            features->allow_high_precision_mv, bsize, mi_col,
6304                            mi_row, features->cur_frame_force_integer_mv)
6305           .as_int;
6306   mbmi->tx_size = max_txsize_lookup[bsize];
6307   x->txfm_search_info.skip_txfm = 1;
6308 
6309   mbmi->ref_mv_idx = 0;
6310 
6311   mbmi->motion_mode = SIMPLE_TRANSLATION;
6312   av1_count_overlappable_neighbors(cm, xd);
6313   if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
6314     int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
6315     mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
6316     // Select the samples according to motion vector difference
6317     if (mbmi->num_proj_ref > 1) {
6318       mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
6319                                              mbmi->num_proj_ref, bsize);
6320     }
6321   }
6322 
6323   const InterpFilter interp_filter = features->interp_filter;
6324   set_default_interp_filters(mbmi, interp_filter);
6325 
6326   if (interp_filter != SWITCHABLE) {
6327     best_filter = interp_filter;
6328   } else {
6329     best_filter = EIGHTTAP_REGULAR;
6330     if (av1_is_interp_needed(xd)) {
6331       int rs;
6332       int best_rs = INT_MAX;
6333       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
6334         mbmi->interp_filters = av1_broadcast_interp_filter(i);
6335         rs = av1_get_switchable_rate(x, xd, interp_filter,
6336                                      cm->seq_params->enable_dual_filter);
6337         if (rs < best_rs) {
6338           best_rs = rs;
6339           best_filter = mbmi->interp_filters.as_filters.y_filter;
6340         }
6341       }
6342     }
6343   }
6344   // Set the appropriate filter
6345   mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
6346   rate2 += av1_get_switchable_rate(x, xd, interp_filter,
6347                                    cm->seq_params->enable_dual_filter);
6348 
6349   if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
6350     rate2 += comp_inter_cost[comp_pred];
6351 
6352   // Estimate the reference frame signaling cost and add it
6353   // to the rolling cost variable.
6354   rate2 += ref_costs_single[LAST_FRAME];
6355   this_rd = RDCOST(x->rdmult, rate2, distortion2);
6356 
6357   rd_cost->rate = rate2;
6358   rd_cost->dist = distortion2;
6359   rd_cost->rdcost = this_rd;
6360 
6361   if (this_rd >= best_rd_so_far) {
6362     rd_cost->rate = INT_MAX;
6363     rd_cost->rdcost = INT64_MAX;
6364     return;
6365   }
6366 
6367   assert((interp_filter == SWITCHABLE) ||
6368          (interp_filter == mbmi->interp_filters.as_filters.y_filter));
6369 
6370   if (cpi->sf.inter_sf.adaptive_rd_thresh) {
6371     av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
6372                               cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
6373                               THR_GLOBALMV, THR_INTER_MODE_START,
6374                               THR_INTER_MODE_END, THR_DC, MAX_MODES);
6375   }
6376 
6377 #if CONFIG_INTERNAL_STATS
6378   store_coding_context(x, ctx, THR_GLOBALMV, 0);
6379 #else
6380   store_coding_context(x, ctx, 0);
6381 #endif  // CONFIG_INTERNAL_STATS
6382 }
6383 
6384 /*!\cond */
6385 struct calc_target_weighted_pred_ctxt {
6386   const OBMCBuffer *obmc_buffer;
6387   const uint8_t *tmp;
6388   int tmp_stride;
6389   int overlap;
6390 };
6391 /*!\endcond */
6392 
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6393 static INLINE void calc_target_weighted_pred_above(
6394     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6395     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6396   (void)nb_mi;
6397   (void)num_planes;
6398   (void)rel_mi_row;
6399   (void)dir;
6400 
6401   struct calc_target_weighted_pred_ctxt *ctxt =
6402       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6403 
6404   const int bw = xd->width << MI_SIZE_LOG2;
6405   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6406 
6407   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6408   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6409   const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6410   const int is_hbd = is_cur_buf_hbd(xd);
6411 
6412   if (!is_hbd) {
6413     for (int row = 0; row < ctxt->overlap; ++row) {
6414       const uint8_t m0 = mask1d[row];
6415       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6416       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6417         wsrc[col] = m1 * tmp[col];
6418         mask[col] = m0;
6419       }
6420       wsrc += bw;
6421       mask += bw;
6422       tmp += ctxt->tmp_stride;
6423     }
6424   } else {
6425     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6426 
6427     for (int row = 0; row < ctxt->overlap; ++row) {
6428       const uint8_t m0 = mask1d[row];
6429       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6430       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6431         wsrc[col] = m1 * tmp16[col];
6432         mask[col] = m0;
6433       }
6434       wsrc += bw;
6435       mask += bw;
6436       tmp16 += ctxt->tmp_stride;
6437     }
6438   }
6439 }
6440 
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6441 static INLINE void calc_target_weighted_pred_left(
6442     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6443     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6444   (void)nb_mi;
6445   (void)num_planes;
6446   (void)rel_mi_col;
6447   (void)dir;
6448 
6449   struct calc_target_weighted_pred_ctxt *ctxt =
6450       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6451 
6452   const int bw = xd->width << MI_SIZE_LOG2;
6453   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6454 
6455   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6456   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6457   const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6458   const int is_hbd = is_cur_buf_hbd(xd);
6459 
6460   if (!is_hbd) {
6461     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6462       for (int col = 0; col < ctxt->overlap; ++col) {
6463         const uint8_t m0 = mask1d[col];
6464         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6465         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6466                     (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6467         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6468       }
6469       wsrc += bw;
6470       mask += bw;
6471       tmp += ctxt->tmp_stride;
6472     }
6473   } else {
6474     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6475 
6476     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6477       for (int col = 0; col < ctxt->overlap; ++col) {
6478         const uint8_t m0 = mask1d[col];
6479         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6480         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6481                     (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6482         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6483       }
6484       wsrc += bw;
6485       mask += bw;
6486       tmp16 += ctxt->tmp_stride;
6487     }
6488   }
6489 }
6490 
6491 // This function has a structure similar to av1_build_obmc_inter_prediction
6492 //
6493 // The OBMC predictor is computed as:
6494 //
6495 //  PObmc(x,y) =
6496 //    AOM_BLEND_A64(Mh(x),
6497 //                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6498 //                  PLeft(x, y))
6499 //
6500 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6501 // rounding, this can be written as:
6502 //
6503 //  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6504 //    Mh(x) * Mv(y) * P(x,y) +
6505 //      Mh(x) * Cv(y) * Pabove(x,y) +
6506 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6507 //
6508 // Where :
6509 //
6510 //  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6511 //  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6512 //
6513 // This function computes 'wsrc' and 'mask' as:
6514 //
6515 //  wsrc(x, y) =
6516 //    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6517 //      Mh(x) * Cv(y) * Pabove(x,y) +
6518 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6519 //
6520 //  mask(x, y) = Mh(x) * Mv(y)
6521 //
6522 // These can then be used to efficiently approximate the error for any
6523 // predictor P in the context of the provided neighbouring predictors by
6524 // computing:
6525 //
6526 //  error(x, y) =
6527 //    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6528 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6529 static AOM_INLINE void calc_target_weighted_pred(
6530     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6531     const uint8_t *above, int above_stride, const uint8_t *left,
6532     int left_stride) {
6533   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6534   const int bw = xd->width << MI_SIZE_LOG2;
6535   const int bh = xd->height << MI_SIZE_LOG2;
6536   const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6537   int32_t *mask_buf = obmc_buffer->mask;
6538   int32_t *wsrc_buf = obmc_buffer->wsrc;
6539 
6540   const int is_hbd = is_cur_buf_hbd(xd);
6541   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6542 
6543   // plane 0 should not be sub-sampled
6544   assert(xd->plane[0].subsampling_x == 0);
6545   assert(xd->plane[0].subsampling_y == 0);
6546 
6547   av1_zero_array(wsrc_buf, bw * bh);
6548   for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6549 
6550   // handle above row
6551   if (xd->up_available) {
6552     const int overlap =
6553         AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6554     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6555                                                    above_stride, overlap };
6556     foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6557                                   max_neighbor_obmc[mi_size_wide_log2[bsize]],
6558                                   calc_target_weighted_pred_above, &ctxt);
6559   }
6560 
6561   for (int i = 0; i < bw * bh; ++i) {
6562     wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6563     mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6564   }
6565 
6566   // handle left column
6567   if (xd->left_available) {
6568     const int overlap =
6569         AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6570     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6571                                                    left_stride, overlap };
6572     foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6573                                  max_neighbor_obmc[mi_size_high_log2[bsize]],
6574                                  calc_target_weighted_pred_left, &ctxt);
6575   }
6576 
6577   if (!is_hbd) {
6578     const uint8_t *src = x->plane[0].src.buf;
6579 
6580     for (int row = 0; row < bh; ++row) {
6581       for (int col = 0; col < bw; ++col) {
6582         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6583       }
6584       wsrc_buf += bw;
6585       src += x->plane[0].src.stride;
6586     }
6587   } else {
6588     const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6589 
6590     for (int row = 0; row < bh; ++row) {
6591       for (int col = 0; col < bw; ++col) {
6592         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6593       }
6594       wsrc_buf += bw;
6595       src += x->plane[0].src.stride;
6596     }
6597   }
6598 }
6599