• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2020, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "av1/common/pred_common.h"
13 #include "av1/encoder/interp_search.h"
14 #include "av1/encoder/model_rd.h"
15 #include "av1/encoder/rdopt_utils.h"
16 #include "av1/encoder/reconinter_enc.h"
17 
18 // return mv_diff
is_interp_filter_good_match(const INTERPOLATION_FILTER_STATS * st,MB_MODE_INFO * const mi,int skip_level)19 static INLINE int is_interp_filter_good_match(
20     const INTERPOLATION_FILTER_STATS *st, MB_MODE_INFO *const mi,
21     int skip_level) {
22   const int is_comp = has_second_ref(mi);
23   int i;
24 
25   for (i = 0; i < 1 + is_comp; ++i) {
26     if (st->ref_frames[i] != mi->ref_frame[i]) return INT_MAX;
27   }
28 
29   if (skip_level == 1 && is_comp) {
30     if (st->comp_type != mi->interinter_comp.type) return INT_MAX;
31     if (st->compound_idx != mi->compound_idx) return INT_MAX;
32   }
33 
34   int mv_diff = 0;
35   for (i = 0; i < 1 + is_comp; ++i) {
36     mv_diff += abs(st->mv[i].as_mv.row - mi->mv[i].as_mv.row) +
37                abs(st->mv[i].as_mv.col - mi->mv[i].as_mv.col);
38   }
39   return mv_diff;
40 }
41 
save_interp_filter_search_stat(MB_MODE_INFO * const mbmi,int64_t rd,unsigned int pred_sse,INTERPOLATION_FILTER_STATS * interp_filter_stats,int interp_filter_stats_idx)42 static INLINE int save_interp_filter_search_stat(
43     MB_MODE_INFO *const mbmi, int64_t rd, unsigned int pred_sse,
44     INTERPOLATION_FILTER_STATS *interp_filter_stats,
45     int interp_filter_stats_idx) {
46   if (interp_filter_stats_idx < MAX_INTERP_FILTER_STATS) {
47     INTERPOLATION_FILTER_STATS stat = { mbmi->interp_filters,
48                                         { mbmi->mv[0], mbmi->mv[1] },
49                                         { mbmi->ref_frame[0],
50                                           mbmi->ref_frame[1] },
51                                         mbmi->interinter_comp.type,
52                                         mbmi->compound_idx,
53                                         rd,
54                                         pred_sse };
55     interp_filter_stats[interp_filter_stats_idx] = stat;
56     interp_filter_stats_idx++;
57   }
58   return interp_filter_stats_idx;
59 }
60 
find_interp_filter_in_stats(MB_MODE_INFO * const mbmi,INTERPOLATION_FILTER_STATS * interp_filter_stats,int interp_filter_stats_idx,int skip_level)61 static INLINE int find_interp_filter_in_stats(
62     MB_MODE_INFO *const mbmi, INTERPOLATION_FILTER_STATS *interp_filter_stats,
63     int interp_filter_stats_idx, int skip_level) {
64   // [skip_levels][single or comp]
65   const int thr[2][2] = { { 0, 0 }, { 3, 7 } };
66   const int is_comp = has_second_ref(mbmi);
67 
68   // Find good enough match.
69   // TODO(yunqing): Separate single-ref mode and comp mode stats for fast
70   // search.
71   int best = INT_MAX;
72   int match = -1;
73   for (int j = 0; j < interp_filter_stats_idx; ++j) {
74     const INTERPOLATION_FILTER_STATS *st = &interp_filter_stats[j];
75     const int mv_diff = is_interp_filter_good_match(st, mbmi, skip_level);
76     // Exact match is found.
77     if (mv_diff == 0) {
78       match = j;
79       break;
80     } else if (mv_diff < best && mv_diff <= thr[skip_level - 1][is_comp]) {
81       best = mv_diff;
82       match = j;
83     }
84   }
85 
86   if (match != -1) {
87     mbmi->interp_filters = interp_filter_stats[match].filters;
88     return match;
89   }
90   return -1;  // no match result found
91 }
92 
av1_find_interp_filter_match(MB_MODE_INFO * const mbmi,const AV1_COMP * const cpi,const InterpFilter assign_filter,const int need_search,INTERPOLATION_FILTER_STATS * interp_filter_stats,int interp_filter_stats_idx)93 int av1_find_interp_filter_match(
94     MB_MODE_INFO *const mbmi, const AV1_COMP *const cpi,
95     const InterpFilter assign_filter, const int need_search,
96     INTERPOLATION_FILTER_STATS *interp_filter_stats,
97     int interp_filter_stats_idx) {
98   int match_found_idx = -1;
99   if (cpi->sf.interp_sf.use_interp_filter && need_search)
100     match_found_idx = find_interp_filter_in_stats(
101         mbmi, interp_filter_stats, interp_filter_stats_idx,
102         cpi->sf.interp_sf.use_interp_filter);
103 
104   if (!need_search || match_found_idx == -1)
105     set_default_interp_filters(mbmi, assign_filter);
106   return match_found_idx;
107 }
108 
swap_dst_buf(MACROBLOCKD * xd,const BUFFER_SET * dst_bufs[2],int num_planes)109 static INLINE void swap_dst_buf(MACROBLOCKD *xd, const BUFFER_SET *dst_bufs[2],
110                                 int num_planes) {
111   const BUFFER_SET *buf0 = dst_bufs[0];
112   dst_bufs[0] = dst_bufs[1];
113   dst_bufs[1] = buf0;
114   restore_dst_buf(xd, *dst_bufs[0], num_planes);
115 }
116 
get_switchable_rate(MACROBLOCK * const x,const int_interpfilters filters,const int ctx[2],int dual_filter)117 static INLINE int get_switchable_rate(MACROBLOCK *const x,
118                                       const int_interpfilters filters,
119                                       const int ctx[2], int dual_filter) {
120   const InterpFilter filter0 = filters.as_filters.y_filter;
121   int inter_filter_cost =
122       x->mode_costs.switchable_interp_costs[ctx[0]][filter0];
123   if (dual_filter) {
124     const InterpFilter filter1 = filters.as_filters.x_filter;
125     inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx[1]][filter1];
126   }
127   return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
128 }
129 
130 // Build inter predictor and calculate model rd
131 // for a given plane.
interp_model_rd_eval(MACROBLOCK * const x,const AV1_COMP * const cpi,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int plane_from,int plane_to,RD_STATS * rd_stats,int is_skip_build_pred)132 static INLINE void interp_model_rd_eval(
133     MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
134     const BUFFER_SET *const orig_dst, int plane_from, int plane_to,
135     RD_STATS *rd_stats, int is_skip_build_pred) {
136   const AV1_COMMON *cm = &cpi->common;
137   MACROBLOCKD *const xd = &x->e_mbd;
138   RD_STATS tmp_rd_stats;
139   av1_init_rd_stats(&tmp_rd_stats);
140 
141   // Skip inter predictor if the predictor is already available.
142   if (!is_skip_build_pred) {
143     const int mi_row = xd->mi_row;
144     const int mi_col = xd->mi_col;
145     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
146                                   plane_from, plane_to);
147   }
148 
149   model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model
150                      ? MODELRD_LEGACY
151                      : MODELRD_TYPE_INTERP_FILTER](
152       cpi, bsize, x, xd, plane_from, plane_to, &tmp_rd_stats.rate,
153       &tmp_rd_stats.dist, &tmp_rd_stats.skip_txfm, &tmp_rd_stats.sse, NULL,
154       NULL, NULL);
155 
156   av1_merge_rd_stats(rd_stats, &tmp_rd_stats);
157 }
158 
159 // calculate the rdcost of given interpolation_filter
interpolation_filter_rd(MACROBLOCK * const x,const AV1_COMP * const cpi,const TileDataEnc * tile_data,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t * const rd,RD_STATS * rd_stats_luma,RD_STATS * rd_stats,int * const switchable_rate,const BUFFER_SET * dst_bufs[2],int filter_idx,const int switchable_ctx[2],const int skip_pred)160 static INLINE int64_t interpolation_filter_rd(
161     MACROBLOCK *const x, const AV1_COMP *const cpi,
162     const TileDataEnc *tile_data, BLOCK_SIZE bsize,
163     const BUFFER_SET *const orig_dst, int64_t *const rd,
164     RD_STATS *rd_stats_luma, RD_STATS *rd_stats, int *const switchable_rate,
165     const BUFFER_SET *dst_bufs[2], int filter_idx, const int switchable_ctx[2],
166     const int skip_pred) {
167   const AV1_COMMON *cm = &cpi->common;
168   const InterpSearchFlags *interp_search_flags = &cpi->interp_search_flags;
169   const int num_planes = av1_num_planes(cm);
170   MACROBLOCKD *const xd = &x->e_mbd;
171   MB_MODE_INFO *const mbmi = xd->mi[0];
172   RD_STATS this_rd_stats_luma, this_rd_stats;
173 
174   // Initialize rd_stats structures to default values.
175   av1_init_rd_stats(&this_rd_stats_luma);
176   this_rd_stats = *rd_stats_luma;
177   const int_interpfilters last_best = mbmi->interp_filters;
178   mbmi->interp_filters = filter_sets[filter_idx];
179   const int tmp_rs =
180       get_switchable_rate(x, mbmi->interp_filters, switchable_ctx,
181                           cm->seq_params->enable_dual_filter);
182 
183   int64_t min_rd = RDCOST(x->rdmult, tmp_rs, 0);
184   if (min_rd > *rd) {
185     mbmi->interp_filters = last_best;
186     return 0;
187   }
188 
189   (void)tile_data;
190 
191   assert(skip_pred != 2);
192   assert((rd_stats_luma->rate >= 0) && (rd_stats->rate >= 0));
193   assert((rd_stats_luma->dist >= 0) && (rd_stats->dist >= 0));
194   assert((rd_stats_luma->sse >= 0) && (rd_stats->sse >= 0));
195   assert((rd_stats_luma->skip_txfm == 0) || (rd_stats_luma->skip_txfm == 1));
196   assert((rd_stats->skip_txfm == 0) || (rd_stats->skip_txfm == 1));
197   assert((skip_pred >= 0) &&
198          (skip_pred <= interp_search_flags->default_interp_skip_flags));
199 
200   // When skip_txfm pred is equal to default_interp_skip_flags,
201   // skip both luma and chroma MC.
202   // For mono-chrome images:
203   // num_planes = 1 and cpi->default_interp_skip_flags = 1,
204   // skip_pred = 1: skip both luma and chroma
205   // skip_pred = 0: Evaluate luma and as num_planes=1,
206   // skip chroma evaluation
207   int tmp_skip_pred =
208       (skip_pred == interp_search_flags->default_interp_skip_flags)
209           ? INTERP_SKIP_LUMA_SKIP_CHROMA
210           : skip_pred;
211 
212   switch (tmp_skip_pred) {
213     case INTERP_EVAL_LUMA_EVAL_CHROMA:
214       // skip_pred = 0: Evaluate both luma and chroma.
215       // Luma MC
216       interp_model_rd_eval(x, cpi, bsize, orig_dst, AOM_PLANE_Y, AOM_PLANE_Y,
217                            &this_rd_stats_luma, 0);
218       this_rd_stats = this_rd_stats_luma;
219 #if CONFIG_COLLECT_RD_STATS == 3
220       RD_STATS rd_stats_y;
221       av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
222                                           INT64_MAX);
223       PrintPredictionUnitStats(cpi, tile_data, x, &rd_stats_y, bsize);
224 #endif  // CONFIG_COLLECT_RD_STATS == 3
225       AOM_FALLTHROUGH_INTENDED;
226     case INTERP_SKIP_LUMA_EVAL_CHROMA:
227       // skip_pred = 1: skip luma evaluation (retain previous best luma stats)
228       // and do chroma evaluation.
229       for (int plane = 1; plane < num_planes; ++plane) {
230         int64_t tmp_rd =
231             RDCOST(x->rdmult, tmp_rs + this_rd_stats.rate, this_rd_stats.dist);
232         if (tmp_rd >= *rd) {
233           mbmi->interp_filters = last_best;
234           return 0;
235         }
236         interp_model_rd_eval(x, cpi, bsize, orig_dst, plane, plane,
237                              &this_rd_stats, 0);
238       }
239       break;
240     case INTERP_SKIP_LUMA_SKIP_CHROMA:
241       // both luma and chroma evaluation is skipped
242       this_rd_stats = *rd_stats;
243       break;
244     case INTERP_EVAL_INVALID:
245     default: assert(0); return 0;
246   }
247   int64_t tmp_rd =
248       RDCOST(x->rdmult, tmp_rs + this_rd_stats.rate, this_rd_stats.dist);
249 
250   if (tmp_rd < *rd) {
251     *rd = tmp_rd;
252     *switchable_rate = tmp_rs;
253     if (skip_pred != interp_search_flags->default_interp_skip_flags) {
254       if (skip_pred == INTERP_EVAL_LUMA_EVAL_CHROMA) {
255         // Overwrite the data as current filter is the best one
256         *rd_stats_luma = this_rd_stats_luma;
257         *rd_stats = this_rd_stats;
258         // As luma MC data is computed, no need to recompute after the search
259         x->recalc_luma_mc_data = 0;
260       } else if (skip_pred == INTERP_SKIP_LUMA_EVAL_CHROMA) {
261         // As luma MC data is not computed, update of luma data can be skipped
262         *rd_stats = this_rd_stats;
263         // As luma MC data is not recomputed and current filter is the best,
264         // indicate the possibility of recomputing MC data
265         // If current buffer contains valid MC data, toggle to indicate that
266         // luma MC data needs to be recomputed
267         x->recalc_luma_mc_data ^= 1;
268       }
269       swap_dst_buf(xd, dst_bufs, num_planes);
270     }
271     return 1;
272   }
273   mbmi->interp_filters = last_best;
274   return 0;
275 }
276 
is_pred_filter_search_allowed(const AV1_COMP * const cpi,MACROBLOCKD * xd,BLOCK_SIZE bsize,int_interpfilters * af,int_interpfilters * lf)277 static INLINE INTERP_PRED_TYPE is_pred_filter_search_allowed(
278     const AV1_COMP *const cpi, MACROBLOCKD *xd, BLOCK_SIZE bsize,
279     int_interpfilters *af, int_interpfilters *lf) {
280   const AV1_COMMON *cm = &cpi->common;
281   const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
282   const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
283   const int bsl = mi_size_wide_log2[bsize];
284   int is_horiz_eq = 0, is_vert_eq = 0;
285 
286   if (above_mbmi && is_inter_block(above_mbmi))
287     *af = above_mbmi->interp_filters;
288 
289   if (left_mbmi && is_inter_block(left_mbmi)) *lf = left_mbmi->interp_filters;
290 
291   if (af->as_filters.x_filter != INTERP_INVALID)
292     is_horiz_eq = af->as_filters.x_filter == lf->as_filters.x_filter;
293   if (af->as_filters.y_filter != INTERP_INVALID)
294     is_vert_eq = af->as_filters.y_filter == lf->as_filters.y_filter;
295 
296   INTERP_PRED_TYPE pred_filter_type = (is_vert_eq << 1) + is_horiz_eq;
297   const int mi_row = xd->mi_row;
298   const int mi_col = xd->mi_col;
299   int pred_filter_enable =
300       cpi->sf.interp_sf.cb_pred_filter_search
301           ? (((mi_row + mi_col) >> bsl) +
302              get_chessboard_index(cm->current_frame.frame_number)) &
303                 0x1
304           : 0;
305   pred_filter_enable &= is_horiz_eq || is_vert_eq;
306   // pred_filter_search = 0: pred_filter is disabled
307   // pred_filter_search = 1: pred_filter is enabled and only horz pred matching
308   // pred_filter_search = 2: pred_filter is enabled and only vert pred matching
309   // pred_filter_search = 3: pred_filter is enabled and
310   //                         both vert, horz pred matching
311   return pred_filter_enable * pred_filter_type;
312 }
313 
find_best_interp_rd_facade(MACROBLOCK * const x,const AV1_COMP * const cpi,const TileDataEnc * tile_data,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t * const rd,RD_STATS * rd_stats_y,RD_STATS * rd_stats,int * const switchable_rate,const BUFFER_SET * dst_bufs[2],const int switchable_ctx[2],const int skip_pred,uint16_t allow_interp_mask,int is_w4_or_h4)314 static DUAL_FILTER_TYPE find_best_interp_rd_facade(
315     MACROBLOCK *const x, const AV1_COMP *const cpi,
316     const TileDataEnc *tile_data, BLOCK_SIZE bsize,
317     const BUFFER_SET *const orig_dst, int64_t *const rd, RD_STATS *rd_stats_y,
318     RD_STATS *rd_stats, int *const switchable_rate,
319     const BUFFER_SET *dst_bufs[2], const int switchable_ctx[2],
320     const int skip_pred, uint16_t allow_interp_mask, int is_w4_or_h4) {
321   int tmp_skip_pred = skip_pred;
322   DUAL_FILTER_TYPE best_filt_type = REG_REG;
323 
324   // If no filter are set to be evaluated, return from function
325   if (allow_interp_mask == 0x0) return best_filt_type;
326   // For block width or height is 4, skip the pred evaluation of SHARP_SHARP
327   tmp_skip_pred = is_w4_or_h4
328                       ? cpi->interp_search_flags.default_interp_skip_flags
329                       : skip_pred;
330 
331   // Loop over the all filter types and evaluate for only allowed filter types
332   for (int filt_type = SHARP_SHARP; filt_type >= REG_REG; --filt_type) {
333     const int is_filter_allowed =
334         get_interp_filter_allowed_mask(allow_interp_mask, filt_type);
335     if (is_filter_allowed)
336       if (interpolation_filter_rd(x, cpi, tile_data, bsize, orig_dst, rd,
337                                   rd_stats_y, rd_stats, switchable_rate,
338                                   dst_bufs, filt_type, switchable_ctx,
339                                   tmp_skip_pred))
340         best_filt_type = filt_type;
341     tmp_skip_pred = skip_pred;
342   }
343   return best_filt_type;
344 }
345 
pred_dual_interp_filter_rd(MACROBLOCK * const x,const AV1_COMP * const cpi,const TileDataEnc * tile_data,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t * const rd,RD_STATS * rd_stats_y,RD_STATS * rd_stats,int * const switchable_rate,const BUFFER_SET * dst_bufs[2],const int switchable_ctx[2],const int skip_pred,INTERP_PRED_TYPE pred_filt_type,int_interpfilters * af,int_interpfilters * lf)346 static INLINE void pred_dual_interp_filter_rd(
347     MACROBLOCK *const x, const AV1_COMP *const cpi,
348     const TileDataEnc *tile_data, BLOCK_SIZE bsize,
349     const BUFFER_SET *const orig_dst, int64_t *const rd, RD_STATS *rd_stats_y,
350     RD_STATS *rd_stats, int *const switchable_rate,
351     const BUFFER_SET *dst_bufs[2], const int switchable_ctx[2],
352     const int skip_pred, INTERP_PRED_TYPE pred_filt_type, int_interpfilters *af,
353     int_interpfilters *lf) {
354   (void)lf;
355   assert(pred_filt_type > INTERP_HORZ_NEQ_VERT_NEQ);
356   assert(pred_filt_type < INTERP_PRED_TYPE_ALL);
357   uint16_t allowed_interp_mask = 0;
358 
359   if (pred_filt_type == INTERP_HORZ_EQ_VERT_NEQ) {
360     // pred_filter_search = 1: Only horizontal filter is matching
361     allowed_interp_mask =
362         av1_interp_dual_filt_mask[pred_filt_type - 1][af->as_filters.x_filter];
363   } else if (pred_filt_type == INTERP_HORZ_NEQ_VERT_EQ) {
364     // pred_filter_search = 2: Only vertical filter is matching
365     allowed_interp_mask =
366         av1_interp_dual_filt_mask[pred_filt_type - 1][af->as_filters.y_filter];
367   } else {
368     // pred_filter_search = 3: Both horizontal and vertical filter are matching
369     int filt_type =
370         af->as_filters.x_filter + af->as_filters.y_filter * SWITCHABLE_FILTERS;
371     set_interp_filter_allowed_mask(&allowed_interp_mask, filt_type);
372   }
373   // REG_REG is already been evaluated in the beginning
374   reset_interp_filter_allowed_mask(&allowed_interp_mask, REG_REG);
375   find_best_interp_rd_facade(x, cpi, tile_data, bsize, orig_dst, rd, rd_stats_y,
376                              rd_stats, switchable_rate, dst_bufs,
377                              switchable_ctx, skip_pred, allowed_interp_mask, 0);
378 }
379 // Evaluate dual filter type
380 // a) Using above, left block interp filter
381 // b) Find the best horizontal filter and
382 //    then evaluate corresponding vertical filters.
fast_dual_interp_filter_rd(MACROBLOCK * const x,const AV1_COMP * const cpi,const TileDataEnc * tile_data,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t * const rd,RD_STATS * rd_stats_y,RD_STATS * rd_stats,int * const switchable_rate,const BUFFER_SET * dst_bufs[2],const int switchable_ctx[2],const int skip_hor,const int skip_ver)383 static INLINE void fast_dual_interp_filter_rd(
384     MACROBLOCK *const x, const AV1_COMP *const cpi,
385     const TileDataEnc *tile_data, BLOCK_SIZE bsize,
386     const BUFFER_SET *const orig_dst, int64_t *const rd, RD_STATS *rd_stats_y,
387     RD_STATS *rd_stats, int *const switchable_rate,
388     const BUFFER_SET *dst_bufs[2], const int switchable_ctx[2],
389     const int skip_hor, const int skip_ver) {
390   const InterpSearchFlags *interp_search_flags = &cpi->interp_search_flags;
391   MACROBLOCKD *const xd = &x->e_mbd;
392   MB_MODE_INFO *const mbmi = xd->mi[0];
393   INTERP_PRED_TYPE pred_filter_type = INTERP_HORZ_NEQ_VERT_NEQ;
394   int_interpfilters af = av1_broadcast_interp_filter(INTERP_INVALID);
395   int_interpfilters lf = af;
396 
397   if (!have_newmv_in_inter_mode(mbmi->mode)) {
398     pred_filter_type = is_pred_filter_search_allowed(cpi, xd, bsize, &af, &lf);
399   }
400 
401   if (pred_filter_type) {
402     pred_dual_interp_filter_rd(x, cpi, tile_data, bsize, orig_dst, rd,
403                                rd_stats_y, rd_stats, switchable_rate, dst_bufs,
404                                switchable_ctx, (skip_hor & skip_ver),
405                                pred_filter_type, &af, &lf);
406   } else {
407     const int bw = block_size_wide[bsize];
408     const int bh = block_size_high[bsize];
409     int best_dual_mode = 0;
410     int skip_pred =
411         bw <= 4 ? interp_search_flags->default_interp_skip_flags : skip_hor;
412     // TODO(any): Make use of find_best_interp_rd_facade()
413     // if speed impact is negligible
414     for (int i = (SWITCHABLE_FILTERS - 1); i >= 1; --i) {
415       if (interpolation_filter_rd(x, cpi, tile_data, bsize, orig_dst, rd,
416                                   rd_stats_y, rd_stats, switchable_rate,
417                                   dst_bufs, i, switchable_ctx, skip_pred)) {
418         best_dual_mode = i;
419       }
420       skip_pred = skip_hor;
421     }
422     // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes
423     skip_pred =
424         bh <= 4 ? interp_search_flags->default_interp_skip_flags : skip_ver;
425     for (int i = (best_dual_mode + (SWITCHABLE_FILTERS * 2));
426          i >= (best_dual_mode + SWITCHABLE_FILTERS); i -= SWITCHABLE_FILTERS) {
427       interpolation_filter_rd(x, cpi, tile_data, bsize, orig_dst, rd,
428                               rd_stats_y, rd_stats, switchable_rate, dst_bufs,
429                               i, switchable_ctx, skip_pred);
430       skip_pred = skip_ver;
431     }
432   }
433 }
434 
435 // Find the best interp filter if dual_interp_filter = 0
find_best_non_dual_interp_filter(MACROBLOCK * const x,const AV1_COMP * const cpi,const TileDataEnc * tile_data,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst,int64_t * const rd,RD_STATS * rd_stats_y,RD_STATS * rd_stats,int * const switchable_rate,const BUFFER_SET * dst_bufs[2],const int switchable_ctx[2],const int skip_ver,const int skip_hor)436 static INLINE void find_best_non_dual_interp_filter(
437     MACROBLOCK *const x, const AV1_COMP *const cpi,
438     const TileDataEnc *tile_data, BLOCK_SIZE bsize,
439     const BUFFER_SET *const orig_dst, int64_t *const rd, RD_STATS *rd_stats_y,
440     RD_STATS *rd_stats, int *const switchable_rate,
441     const BUFFER_SET *dst_bufs[2], const int switchable_ctx[2],
442     const int skip_ver, const int skip_hor) {
443   const InterpSearchFlags *interp_search_flags = &cpi->interp_search_flags;
444   int8_t i;
445   MACROBLOCKD *const xd = &x->e_mbd;
446   MB_MODE_INFO *const mbmi = xd->mi[0];
447 
448   uint16_t interp_filter_search_mask =
449       interp_search_flags->interp_filter_search_mask;
450 
451   if (cpi->sf.interp_sf.adaptive_interp_filter_search == 2) {
452     const FRAME_UPDATE_TYPE update_type =
453         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
454     const int ctx0 = av1_get_pred_context_switchable_interp(xd, 0);
455     const int ctx1 = av1_get_pred_context_switchable_interp(xd, 1);
456     const int *switchable_interp_p0 =
457         cpi->ppi->frame_probs.switchable_interp_probs[update_type][ctx0];
458     const int *switchable_interp_p1 =
459         cpi->ppi->frame_probs.switchable_interp_probs[update_type][ctx1];
460     static const int thr[7] = { 0, 8, 8, 8, 8, 0, 8 };
461     const int thresh = thr[update_type];
462     for (i = 0; i < SWITCHABLE_FILTERS; i++) {
463       // For non-dual case, the 2 dir's prob should be identical.
464       assert(switchable_interp_p0[i] == switchable_interp_p1[i]);
465       if (switchable_interp_p0[i] < thresh &&
466           switchable_interp_p1[i] < thresh) {
467         DUAL_FILTER_TYPE filt_type = i + SWITCHABLE_FILTERS * i;
468         reset_interp_filter_allowed_mask(&interp_filter_search_mask, filt_type);
469       }
470     }
471   }
472 
473   // Regular filter evaluation should have been done and hence the same should
474   // be the winner
475   assert(x->e_mbd.mi[0]->interp_filters.as_int == filter_sets[0].as_int);
476   if ((skip_hor & skip_ver) != interp_search_flags->default_interp_skip_flags) {
477     INTERP_PRED_TYPE pred_filter_type = INTERP_HORZ_NEQ_VERT_NEQ;
478     int_interpfilters af = av1_broadcast_interp_filter(INTERP_INVALID);
479     int_interpfilters lf = af;
480 
481     pred_filter_type = is_pred_filter_search_allowed(cpi, xd, bsize, &af, &lf);
482     if (pred_filter_type) {
483       assert(af.as_filters.x_filter != INTERP_INVALID);
484       int filter_idx = SWITCHABLE * af.as_filters.x_filter;
485       // This assert tells that (filter_x == filter_y) for non-dual filter case
486       assert(filter_sets[filter_idx].as_filters.x_filter ==
487              filter_sets[filter_idx].as_filters.y_filter);
488       if (cpi->sf.interp_sf.adaptive_interp_filter_search &&
489           !(get_interp_filter_allowed_mask(interp_filter_search_mask,
490                                            filter_idx))) {
491         return;
492       }
493       if (filter_idx) {
494         interpolation_filter_rd(x, cpi, tile_data, bsize, orig_dst, rd,
495                                 rd_stats_y, rd_stats, switchable_rate, dst_bufs,
496                                 filter_idx, switchable_ctx,
497                                 (skip_hor & skip_ver));
498       }
499       return;
500     }
501   }
502   // Reuse regular filter's modeled rd data for sharp filter for following
503   // cases
504   // 1) When bsize is 4x4
505   // 2) When block width is 4 (i.e. 4x8/4x16 blocks) and MV in vertical
506   // direction is full-pel
507   // 3) When block height is 4 (i.e. 8x4/16x4 blocks) and MV in horizontal
508   // direction is full-pel
509   // TODO(any): Optimize cases 2 and 3 further if luma MV in relavant direction
510   // alone is full-pel
511 
512   if ((bsize == BLOCK_4X4) ||
513       (block_size_wide[bsize] == 4 &&
514        skip_ver == interp_search_flags->default_interp_skip_flags) ||
515       (block_size_high[bsize] == 4 &&
516        skip_hor == interp_search_flags->default_interp_skip_flags)) {
517     int skip_pred = skip_hor & skip_ver;
518     uint16_t allowed_interp_mask = 0;
519 
520     // REG_REG filter type is evaluated beforehand, hence skip it
521     set_interp_filter_allowed_mask(&allowed_interp_mask, SHARP_SHARP);
522     set_interp_filter_allowed_mask(&allowed_interp_mask, SMOOTH_SMOOTH);
523     if (cpi->sf.interp_sf.adaptive_interp_filter_search)
524       allowed_interp_mask &= interp_filter_search_mask;
525 
526     find_best_interp_rd_facade(x, cpi, tile_data, bsize, orig_dst, rd,
527                                rd_stats_y, rd_stats, switchable_rate, dst_bufs,
528                                switchable_ctx, skip_pred, allowed_interp_mask,
529                                1);
530   } else {
531     int skip_pred = (skip_hor & skip_ver);
532     for (i = (SWITCHABLE_FILTERS + 1); i < DUAL_FILTER_SET_SIZE;
533          i += (SWITCHABLE_FILTERS + 1)) {
534       // This assert tells that (filter_x == filter_y) for non-dual filter case
535       assert(filter_sets[i].as_filters.x_filter ==
536              filter_sets[i].as_filters.y_filter);
537       if (cpi->sf.interp_sf.adaptive_interp_filter_search &&
538           !(get_interp_filter_allowed_mask(interp_filter_search_mask, i))) {
539         continue;
540       }
541       interpolation_filter_rd(x, cpi, tile_data, bsize, orig_dst, rd,
542                               rd_stats_y, rd_stats, switchable_rate, dst_bufs,
543                               i, switchable_ctx, skip_pred);
544       // In first iteration, smooth filter is evaluated. If smooth filter
545       // (which is less sharper) is the winner among regular and smooth filters,
546       // sharp filter evaluation is skipped
547       // TODO(any): Refine this gating based on modelled rd only (i.e., by not
548       // accounting switchable filter rate)
549       if (cpi->sf.interp_sf.skip_sharp_interp_filter_search &&
550           skip_pred != interp_search_flags->default_interp_skip_flags) {
551         if (mbmi->interp_filters.as_int == filter_sets[SMOOTH_SMOOTH].as_int)
552           break;
553       }
554     }
555   }
556 }
557 
calc_interp_skip_pred_flag(MACROBLOCK * const x,const AV1_COMP * const cpi,int * skip_hor,int * skip_ver)558 static INLINE void calc_interp_skip_pred_flag(MACROBLOCK *const x,
559                                               const AV1_COMP *const cpi,
560                                               int *skip_hor, int *skip_ver) {
561   const AV1_COMMON *cm = &cpi->common;
562   MACROBLOCKD *const xd = &x->e_mbd;
563   MB_MODE_INFO *const mbmi = xd->mi[0];
564   const int num_planes = av1_num_planes(cm);
565   const int is_compound = has_second_ref(mbmi);
566   assert(is_intrabc_block(mbmi) == 0);
567   for (int ref = 0; ref < 1 + is_compound; ++ref) {
568     const struct scale_factors *const sf =
569         get_ref_scale_factors_const(cm, mbmi->ref_frame[ref]);
570     // TODO(any): Refine skip flag calculation considering scaling
571     if (av1_is_scaled(sf)) {
572       *skip_hor = 0;
573       *skip_ver = 0;
574       break;
575     }
576     const MV mv = mbmi->mv[ref].as_mv;
577     int skip_hor_plane = 0;
578     int skip_ver_plane = 0;
579     for (int plane_idx = 0; plane_idx < AOMMAX(1, (num_planes - 1));
580          ++plane_idx) {
581       struct macroblockd_plane *const pd = &xd->plane[plane_idx];
582       const int bw = pd->width;
583       const int bh = pd->height;
584       const MV mv_q4 = clamp_mv_to_umv_border_sb(
585           xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
586       const int sub_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
587       const int sub_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
588       skip_hor_plane |= ((sub_x == 0) << plane_idx);
589       skip_ver_plane |= ((sub_y == 0) << plane_idx);
590     }
591     *skip_hor &= skip_hor_plane;
592     *skip_ver &= skip_ver_plane;
593     // It is not valid that "luma MV is sub-pel, whereas chroma MV is not"
594     assert(*skip_hor != 2);
595     assert(*skip_ver != 2);
596   }
597   // When compond prediction type is compound segment wedge, luma MC and chroma
598   // MC need to go hand in hand as mask generated during luma MC is reuired for
599   // chroma MC. If skip_hor = 0 and skip_ver = 1, mask used for chroma MC during
600   // vertical filter decision may be incorrect as temporary MC evaluation
601   // overwrites the mask. Make skip_ver as 0 for this case so that mask is
602   // populated during luma MC
603   if (is_compound && mbmi->compound_idx == 1 &&
604       mbmi->interinter_comp.type == COMPOUND_DIFFWTD) {
605     assert(mbmi->comp_group_idx == 1);
606     if (*skip_hor == 0 && *skip_ver == 1) *skip_ver = 0;
607   }
608 }
609 
610 /*!\brief AV1 interpolation filter search
611  *
612  * \ingroup inter_mode_search
613  *
614  * \param[in]     cpi               Top-level encoder structure.
615  * \param[in]     tile_data         Pointer to struct holding adaptive
616  *                                  data/contexts/models for the tile during
617  *                                  encoding.
618  * \param[in]     x                 Pointer to struc holding all the data for
619  *                                  the current macroblock.
620  * \param[in]     bsize             Current block size.
621  * \param[in]     tmp_dst           A temporary prediction buffer to hold a
622  *                                  computed prediction.
623  * \param[in,out] orig_dst          A prediction buffer to hold a computed
624  *                                  prediction. This will eventually hold the
625  *                                  final prediction, and the tmp_dst info will
626  *                                  be copied here.
627  * \param[in,out] rd                The RD cost associated with the selected
628  *                                  interpolation filter parameters.
629  * \param[in,out] switchable_rate   The rate associated with using a SWITCHABLE
630  *                                  filter mode.
631  * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
632  *                                  predictor. If this is 0, the inter predictor
633  *                                  has already been built and thus we can avoid
634  *                                  repeating computation.
635  * \param[in]     args              HandleInterModeArgs struct holding
636  *                                  miscellaneous arguments for inter mode
637  *                                  search. See the documentation for this
638  *                                  struct for a description of each member.
639  * \param[in]     ref_best_rd       Best RD found so far for this block.
640  *                                  It is used for early termination of this
641  *                                  search if the RD exceeds this value.
642  *
643  * \return Returns INT64_MAX if the filter parameters are invalid and the
644  * current motion mode being tested should be skipped. It returns 0 if the
645  * parameter search is a success.
646  */
av1_interpolation_filter_search(MACROBLOCK * const x,const AV1_COMP * const cpi,const TileDataEnc * tile_data,BLOCK_SIZE bsize,const BUFFER_SET * const tmp_dst,const BUFFER_SET * const orig_dst,int64_t * const rd,int * const switchable_rate,int * skip_build_pred,HandleInterModeArgs * args,int64_t ref_best_rd)647 int64_t av1_interpolation_filter_search(
648     MACROBLOCK *const x, const AV1_COMP *const cpi,
649     const TileDataEnc *tile_data, BLOCK_SIZE bsize,
650     const BUFFER_SET *const tmp_dst, const BUFFER_SET *const orig_dst,
651     int64_t *const rd, int *const switchable_rate, int *skip_build_pred,
652     HandleInterModeArgs *args, int64_t ref_best_rd) {
653   const AV1_COMMON *cm = &cpi->common;
654   const InterpSearchFlags *interp_search_flags = &cpi->interp_search_flags;
655   const int num_planes = av1_num_planes(cm);
656   MACROBLOCKD *const xd = &x->e_mbd;
657   MB_MODE_INFO *const mbmi = xd->mi[0];
658   const int need_search =
659       av1_is_interp_needed(xd) && !cpi->sf.rt_sf.skip_interp_filter_search;
660   const int ref_frame = xd->mi[0]->ref_frame[0];
661   RD_STATS rd_stats_luma, rd_stats;
662 
663   // Initialization of rd_stats structures with default values
664   av1_init_rd_stats(&rd_stats_luma);
665   av1_init_rd_stats(&rd_stats);
666 
667   int match_found_idx = -1;
668   const InterpFilter assign_filter = cm->features.interp_filter;
669 
670   match_found_idx = av1_find_interp_filter_match(
671       mbmi, cpi, assign_filter, need_search, args->interp_filter_stats,
672       args->interp_filter_stats_idx);
673 
674   if (match_found_idx != -1) {
675     *rd = args->interp_filter_stats[match_found_idx].rd;
676     x->pred_sse[ref_frame] =
677         args->interp_filter_stats[match_found_idx].pred_sse;
678     return 0;
679   }
680 
681   int switchable_ctx[2];
682   switchable_ctx[0] = av1_get_pred_context_switchable_interp(xd, 0);
683   switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1);
684   *switchable_rate =
685       get_switchable_rate(x, mbmi->interp_filters, switchable_ctx,
686                           cm->seq_params->enable_dual_filter);
687 
688   // Do MC evaluation for default filter_type.
689   // Luma MC
690   interp_model_rd_eval(x, cpi, bsize, orig_dst, AOM_PLANE_Y, AOM_PLANE_Y,
691                        &rd_stats_luma, *skip_build_pred);
692 
693 #if CONFIG_COLLECT_RD_STATS == 3
694   RD_STATS rd_stats_y;
695   av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
696   PrintPredictionUnitStats(cpi, tile_data, x, &rd_stats_y, bsize);
697 #endif  // CONFIG_COLLECT_RD_STATS == 3
698   // Chroma MC
699   if (num_planes > 1) {
700     interp_model_rd_eval(x, cpi, bsize, orig_dst, AOM_PLANE_U, AOM_PLANE_V,
701                          &rd_stats, *skip_build_pred);
702   }
703   *skip_build_pred = 1;
704 
705   av1_merge_rd_stats(&rd_stats, &rd_stats_luma);
706 
707   assert(rd_stats.rate >= 0);
708 
709   *rd = RDCOST(x->rdmult, *switchable_rate + rd_stats.rate, rd_stats.dist);
710   x->pred_sse[ref_frame] = (unsigned int)(rd_stats_luma.sse >> 4);
711 
712   if (assign_filter != SWITCHABLE || match_found_idx != -1) {
713     return 0;
714   }
715   if (!need_search) {
716     int_interpfilters filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
717     assert(mbmi->interp_filters.as_int == filters.as_int);
718     (void)filters;
719     return 0;
720   }
721   if (args->modelled_rd != NULL) {
722     if (has_second_ref(mbmi)) {
723       const int ref_mv_idx = mbmi->ref_mv_idx;
724       MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
725       const int mode0 = compound_ref0_mode(mbmi->mode);
726       const int mode1 = compound_ref1_mode(mbmi->mode);
727       const int64_t mrd = AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
728                                  args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
729       if ((*rd >> 1) > mrd && ref_best_rd < INT64_MAX) {
730         return INT64_MAX;
731       }
732     }
733   }
734 
735   x->recalc_luma_mc_data = 0;
736   // skip_flag=xx (in binary form)
737   // Setting 0th flag corresonds to skipping luma MC and setting 1st bt
738   // corresponds to skipping chroma MC  skip_flag=0 corresponds to "Don't skip
739   // luma and chroma MC"  Skip flag=1 corresponds to "Skip Luma MC only"
740   // Skip_flag=2 is not a valid case
741   // skip_flag=3 corresponds to "Skip both luma and chroma MC"
742   int skip_hor = interp_search_flags->default_interp_skip_flags;
743   int skip_ver = interp_search_flags->default_interp_skip_flags;
744   calc_interp_skip_pred_flag(x, cpi, &skip_hor, &skip_ver);
745 
746   // do interp_filter search
747   restore_dst_buf(xd, *tmp_dst, num_planes);
748   const BUFFER_SET *dst_bufs[2] = { tmp_dst, orig_dst };
749   // Evaluate dual interp filters
750   if (cm->seq_params->enable_dual_filter) {
751     if (cpi->sf.interp_sf.use_fast_interpolation_filter_search) {
752       fast_dual_interp_filter_rd(x, cpi, tile_data, bsize, orig_dst, rd,
753                                  &rd_stats_luma, &rd_stats, switchable_rate,
754                                  dst_bufs, switchable_ctx, skip_hor, skip_ver);
755     } else {
756       // Use full interpolation filter search
757       uint16_t allowed_interp_mask = ALLOW_ALL_INTERP_FILT_MASK;
758       // REG_REG filter type is evaluated beforehand, so loop is repeated over
759       // REG_SMOOTH to SHARP_SHARP for full interpolation filter search
760       reset_interp_filter_allowed_mask(&allowed_interp_mask, REG_REG);
761       find_best_interp_rd_facade(x, cpi, tile_data, bsize, orig_dst, rd,
762                                  &rd_stats_luma, &rd_stats, switchable_rate,
763                                  dst_bufs, switchable_ctx,
764                                  (skip_hor & skip_ver), allowed_interp_mask, 0);
765     }
766   } else {
767     // Evaluate non-dual interp filters
768     find_best_non_dual_interp_filter(
769         x, cpi, tile_data, bsize, orig_dst, rd, &rd_stats_luma, &rd_stats,
770         switchable_rate, dst_bufs, switchable_ctx, skip_ver, skip_hor);
771   }
772   swap_dst_buf(xd, dst_bufs, num_planes);
773   // Recompute final MC data if required
774   if (x->recalc_luma_mc_data == 1) {
775     // Recomputing final luma MC data is required only if the same was skipped
776     // in either of the directions  Condition below is necessary, but not
777     // sufficient
778     assert((skip_hor == 1) || (skip_ver == 1));
779     const int mi_row = xd->mi_row;
780     const int mi_col = xd->mi_col;
781     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
782                                   AOM_PLANE_Y, AOM_PLANE_Y);
783   }
784   x->pred_sse[ref_frame] = (unsigned int)(rd_stats_luma.sse >> 4);
785 
786   // save search results
787   if (cpi->sf.interp_sf.use_interp_filter) {
788     assert(match_found_idx == -1);
789     args->interp_filter_stats_idx = save_interp_filter_search_stat(
790         mbmi, *rd, x->pred_sse[ref_frame], args->interp_filter_stats,
791         args->interp_filter_stats_idx);
792   }
793   return 0;
794 }
795