• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <float.h>
13 #include <math.h>
14 #include <limits.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_scale_rtcd.h"
18 
19 #include "aom_dsp/aom_dsp_common.h"
20 #include "aom_dsp/mathutils.h"
21 #include "aom_dsp/odintrin.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 #include "av1/common/alloccommon.h"
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/quant_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/encoder/av1_quantize.h"
30 #include "av1/encoder/encodeframe.h"
31 #include "av1/encoder/encoder.h"
32 #include "av1/encoder/ethread.h"
33 #include "av1/encoder/extend.h"
34 #include "av1/encoder/firstpass.h"
35 #include "av1/encoder/gop_structure.h"
36 #include "av1/encoder/intra_mode_search_utils.h"
37 #include "av1/encoder/mcomp.h"
38 #include "av1/encoder/motion_search_facade.h"
39 #include "av1/encoder/pass2_strategy.h"
40 #include "av1/encoder/ratectrl.h"
41 #include "av1/encoder/reconinter_enc.h"
42 #include "av1/encoder/segmentation.h"
43 #include "av1/encoder/temporal_filter.h"
44 
45 /*!\cond */
46 
47 // NOTE: All `tf` in this file means `temporal filtering`.
48 
49 // Forward Declaration.
50 static void tf_determine_block_partition(const MV block_mv, const int block_mse,
51                                          MV *subblock_mvs, int *subblock_mses);
52 
53 // This function returns the minimum and maximum log variances for 4x4 sub
54 // blocks in the current block.
get_log_var_4x4sub_blk(AV1_COMP * cpi,const YV12_BUFFER_CONFIG * const frame_to_filter,int mb_row,int mb_col,BLOCK_SIZE block_size,double * blk_4x4_var_min,double * blk_4x4_var_max,int is_hbd)55 static inline void get_log_var_4x4sub_blk(
56     AV1_COMP *cpi, const YV12_BUFFER_CONFIG *const frame_to_filter, int mb_row,
57     int mb_col, BLOCK_SIZE block_size, double *blk_4x4_var_min,
58     double *blk_4x4_var_max, int is_hbd) {
59   const int mb_height = block_size_high[block_size];
60   const int mb_width = block_size_wide[block_size];
61   int var_min = INT_MAX;
62   int var_max = 0;
63 
64   // Derive the source buffer.
65   const int src_stride = frame_to_filter->y_stride;
66   const int y_offset = mb_row * mb_height * src_stride + mb_col * mb_width;
67   const uint8_t *src_buf = frame_to_filter->y_buffer + y_offset;
68 
69   aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_4X4].vf;
70   for (int i = 0; i < mb_height; i += MI_SIZE) {
71     for (int j = 0; j < mb_width; j += MI_SIZE) {
72       // Calculate the 4x4 sub-block variance.
73       const int var = av1_calc_normalized_variance(
74           vf, src_buf + (i * src_stride) + j, src_stride, is_hbd);
75 
76       // Record min and max for over-arching block
77       var_min = AOMMIN(var_min, var);
78       var_max = AOMMAX(var_max, var);
79     }
80   }
81 
82   *blk_4x4_var_min = log1p(var_min / 16.0);
83   *blk_4x4_var_max = log1p(var_max / 16.0);
84 }
85 
86 // Helper function to get `q` used for encoding.
get_q(const AV1_COMP * cpi)87 static int get_q(const AV1_COMP *cpi) {
88   const GF_GROUP *gf_group = &cpi->ppi->gf_group;
89   const FRAME_TYPE frame_type = gf_group->frame_type[cpi->gf_frame_index];
90   const int q =
91       (int)av1_convert_qindex_to_q(cpi->ppi->p_rc.avg_frame_qindex[frame_type],
92                                    cpi->common.seq_params->bit_depth);
93   return q;
94 }
95 
96 /*!\endcond */
97 /*!\brief Does motion search for blocks in temporal filtering. This is
98  *  the first step for temporal filtering. More specifically, given a frame to
99  * be filtered and another frame as reference, this function searches the
100  * reference frame to find out the most similar block as that from the frame
101  * to be filtered. This found block will be further used for weighted
102  * averaging.
103  *
104  * NOTE: Besides doing motion search for the entire block, this function will
105  *       also do motion search for each 1/4 sub-block to get more precise
106  *       predictions. Then, this function will determines whether to use 4
107  *       sub-blocks to replace the entire block. If we do need to split the
108  *       entire block, 4 elements in `subblock_mvs` and `subblock_mses` refer to
109  *       the searched motion vector and search error (MSE) w.r.t. each sub-block
110  *       respectively. Otherwise, the 4 elements will be the same, all of which
111  *       are assigned as the searched motion vector and search error (MSE) for
112  *       the entire block.
113  *
114  * \ingroup src_frame_proc
115  * \param[in]   cpi                   Top level encoder instance structure
116  * \param[in]   mb                    Pointer to macroblock
117  * \param[in]   frame_to_filter       Pointer to the frame to be filtered
118  * \param[in]   ref_frame             Pointer to the reference frame
119  * \param[in]   block_size            Block size used for motion search
120  * \param[in]   mb_row                Row index of the block in the frame
121  * \param[in]   mb_col                Column index of the block in the frame
122  * \param[in]   ref_mv                Reference motion vector, which is commonly
123  *                                    inherited from the motion search result of
124  *                                    previous frame.
125  * \param[in]   allow_me_for_sub_blks Flag to indicate whether motion search at
126  *                                    16x16 sub-block level is needed or not.
127  * \param[out]  subblock_mvs          Pointer to the motion vectors for
128  *                                    4 sub-blocks
129  * \param[out]  subblock_mses         Pointer to the search errors (MSE) for
130  *                                    4 sub-blocks
131  * \param[out]  is_dc_diff_large      Pointer to the value that tells if the DC
132  *                                    difference is large for the block
133  *
134  * \remark Nothing will be returned. Results are saved in subblock_mvs and
135  *         subblock_mses
136  */
tf_motion_search(AV1_COMP * cpi,MACROBLOCK * mb,const YV12_BUFFER_CONFIG * frame_to_filter,const YV12_BUFFER_CONFIG * ref_frame,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,MV * ref_mv,bool allow_me_for_sub_blks,MV * subblock_mvs,int * subblock_mses,int * is_dc_diff_large)137 static void tf_motion_search(AV1_COMP *cpi, MACROBLOCK *mb,
138                              const YV12_BUFFER_CONFIG *frame_to_filter,
139                              const YV12_BUFFER_CONFIG *ref_frame,
140                              const BLOCK_SIZE block_size, const int mb_row,
141                              const int mb_col, MV *ref_mv,
142                              bool allow_me_for_sub_blks, MV *subblock_mvs,
143                              int *subblock_mses, int *is_dc_diff_large) {
144   // Frame information
145   const int min_frame_size = AOMMIN(cpi->common.width, cpi->common.height);
146 
147   // Block information (ONLY Y-plane is used for motion search).
148   const int mb_height = block_size_high[block_size];
149   const int mb_width = block_size_wide[block_size];
150   const int mb_pels = mb_height * mb_width;
151   const int y_stride = frame_to_filter->y_stride;
152   const int src_width = frame_to_filter->y_width;
153   const int ref_width = ref_frame->y_width;
154   assert(y_stride == ref_frame->y_stride);
155   assert(src_width == ref_width);
156   const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
157 
158   // Save input state.
159   MACROBLOCKD *const mbd = &mb->e_mbd;
160   const struct buf_2d ori_src_buf = mb->plane[0].src;
161   const struct buf_2d ori_pre_buf = mbd->plane[0].pre[0];
162 
163   // Parameters used for motion search.
164   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
165   SUBPEL_MOTION_SEARCH_PARAMS ms_params;
166   const int step_param = av1_init_search_range(
167       AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
168   const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
169   const int force_integer_mv = cpi->common.features.cur_frame_force_integer_mv;
170   const MV_COST_TYPE mv_cost_type =
171       min_frame_size >= 720
172           ? MV_COST_L1_HDRES
173           : (min_frame_size >= 480 ? MV_COST_L1_MIDRES : MV_COST_L1_LOWRES);
174 
175   // Starting position for motion search.
176   FULLPEL_MV start_mv = get_fullmv_from_mv(ref_mv);
177   // Baseline position for motion search (used for rate distortion comparison).
178   const MV baseline_mv = kZeroMv;
179 
180   // Setup.
181   mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset;
182   mb->plane[0].src.stride = y_stride;
183   mb->plane[0].src.width = src_width;
184   mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset;
185   mbd->plane[0].pre[0].stride = y_stride;
186   mbd->plane[0].pre[0].width = ref_width;
187   *is_dc_diff_large = 0;
188 
189   const SEARCH_METHODS search_method = NSTEP;
190   const search_site_config *search_site_cfg =
191       av1_get_search_site_config(cpi, mb, search_method);
192 
193   // Unused intermediate results for motion search.
194   unsigned int sse, error;
195   int distortion;
196   int cost_list[5];
197 
198   // Do motion search.
199   int_mv best_mv;  // Searched motion vector.
200   FULLPEL_MV_STATS best_mv_stats;
201   int block_mse = INT_MAX;
202   MV block_mv = kZeroMv;
203   const int q = get_q(cpi);
204 
205   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
206                                      &baseline_mv, start_mv, search_site_cfg,
207                                      search_method,
208                                      /*fine_search_interval=*/0);
209   full_ms_params.run_mesh_search = 1;
210   full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
211 
212   if (cpi->sf.mv_sf.prune_mesh_search == PRUNE_MESH_SEARCH_LVL_1) {
213     // Enable prune_mesh_search based on q for PRUNE_MESH_SEARCH_LVL_1.
214     full_ms_params.prune_mesh_search = (q <= 20) ? 0 : 1;
215     full_ms_params.mesh_search_mv_diff_threshold = 2;
216   }
217 
218   av1_full_pixel_search(start_mv, &full_ms_params, step_param,
219                         cond_cost_list(cpi, cost_list), &best_mv.as_fullmv,
220                         &best_mv_stats, NULL);
221 
222   if (force_integer_mv == 1) {  // Only do full search on the entire block.
223     const int mv_row = best_mv.as_mv.row;
224     const int mv_col = best_mv.as_mv.col;
225     best_mv.as_mv.row = GET_MV_SUBPEL(mv_row);
226     best_mv.as_mv.col = GET_MV_SUBPEL(mv_col);
227     const int mv_offset = mv_row * y_stride + mv_col;
228     error = cpi->ppi->fn_ptr[block_size].vf(
229         ref_frame->y_buffer + y_offset + mv_offset, y_stride,
230         frame_to_filter->y_buffer + y_offset, y_stride, &sse);
231     block_mse = DIVIDE_AND_ROUND(error, mb_pels);
232     block_mv = best_mv.as_mv;
233   } else {  // Do fractional search on the entire block and all sub-blocks.
234     av1_make_default_subpel_ms_params(&ms_params, cpi, mb, block_size,
235                                       &baseline_mv, cost_list);
236     ms_params.forced_stop = EIGHTH_PEL;
237     ms_params.var_params.subpel_search_type = subpel_search_type;
238     // Since we are merely refining the result from full pixel search, we don't
239     // need regularization for subpel search
240     ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
241     best_mv_stats.err_cost = 0;
242 
243     MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
244     assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv));
245     error = cpi->mv_search_params.find_fractional_mv_step(
246         &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv, &best_mv_stats,
247         &best_mv.as_mv, &distortion, &sse, NULL);
248     block_mse = DIVIDE_AND_ROUND(error, mb_pels);
249     block_mv = best_mv.as_mv;
250     *ref_mv = best_mv.as_mv;
251     *is_dc_diff_large = 50 * error < sse;
252 
253     if (allow_me_for_sub_blks) {
254       // On 4 sub-blocks.
255       const BLOCK_SIZE subblock_size = av1_ss_size_lookup[block_size][1][1];
256       const int subblock_height = block_size_high[subblock_size];
257       const int subblock_width = block_size_wide[subblock_size];
258       const int subblock_pels = subblock_height * subblock_width;
259       start_mv = get_fullmv_from_mv(ref_mv);
260 
261       int subblock_idx = 0;
262       for (int i = 0; i < mb_height; i += subblock_height) {
263         for (int j = 0; j < mb_width; j += subblock_width) {
264           const int offset = i * y_stride + j;
265           mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset + offset;
266           mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset + offset;
267           av1_make_default_fullpel_ms_params(
268               &full_ms_params, cpi, mb, subblock_size, &baseline_mv, start_mv,
269               search_site_cfg, search_method,
270               /*fine_search_interval=*/0);
271           full_ms_params.run_mesh_search = 1;
272           full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
273 
274           if (cpi->sf.mv_sf.prune_mesh_search == PRUNE_MESH_SEARCH_LVL_1) {
275             // Enable prune_mesh_search based on q for PRUNE_MESH_SEARCH_LVL_1.
276             full_ms_params.prune_mesh_search = (q <= 20) ? 0 : 1;
277             full_ms_params.mesh_search_mv_diff_threshold = 2;
278           }
279           av1_full_pixel_search(start_mv, &full_ms_params, step_param,
280                                 cond_cost_list(cpi, cost_list),
281                                 &best_mv.as_fullmv, &best_mv_stats, NULL);
282 
283           av1_make_default_subpel_ms_params(&ms_params, cpi, mb, subblock_size,
284                                             &baseline_mv, cost_list);
285           ms_params.forced_stop = EIGHTH_PEL;
286           ms_params.var_params.subpel_search_type = subpel_search_type;
287           // Since we are merely refining the result from full pixel search, we
288           // don't need regularization for subpel search
289           ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
290           best_mv_stats.err_cost = 0;
291 
292           subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
293           assert(
294               av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv));
295           error = cpi->mv_search_params.find_fractional_mv_step(
296               &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv,
297               &best_mv_stats, &best_mv.as_mv, &distortion, &sse, NULL);
298           subblock_mses[subblock_idx] = DIVIDE_AND_ROUND(error, subblock_pels);
299           subblock_mvs[subblock_idx] = best_mv.as_mv;
300           ++subblock_idx;
301         }
302       }
303     }
304   }
305 
306   // Restore input state.
307   mb->plane[0].src = ori_src_buf;
308   mbd->plane[0].pre[0] = ori_pre_buf;
309 
310   // Make partition decision.
311   if (allow_me_for_sub_blks) {
312     tf_determine_block_partition(block_mv, block_mse, subblock_mvs,
313                                  subblock_mses);
314   } else {
315     // Copy 32X32 block mv and mse values to sub blocks
316     for (int i = 0; i < 4; ++i) {
317       subblock_mvs[i] = block_mv;
318       subblock_mses[i] = block_mse;
319     }
320   }
321   // Do not pass down the reference motion vector if error is too large.
322   const int thresh = (min_frame_size >= 720) ? 12 : 3;
323   if (block_mse > (thresh << (mbd->bd - 8))) {
324     *ref_mv = kZeroMv;
325   }
326 }
327 /*!\cond */
328 
329 // Determines whether to split the entire block to 4 sub-blocks for filtering.
330 // In particular, this decision is made based on the comparison between the
331 // motion search error of the entire block and the errors of all sub-blocks.
332 // Inputs:
333 //   block_mv: Motion vector for the entire block (ONLY as reference).
334 //   block_mse: Motion search error (MSE) for the entire block (ONLY as
335 //              reference).
336 //   subblock_mvs: Pointer to the motion vectors for 4 sub-blocks (will be
337 //                 modified based on the partition decision).
338 //   subblock_mses: Pointer to the search errors (MSE) for 4 sub-blocks (will
339 //                  be modified based on the partition decision).
340 // Returns:
341 //   Nothing will be returned. Results are saved in `subblock_mvs` and
342 //   `subblock_mses`.
tf_determine_block_partition(const MV block_mv,const int block_mse,MV * subblock_mvs,int * subblock_mses)343 static void tf_determine_block_partition(const MV block_mv, const int block_mse,
344                                          MV *subblock_mvs, int *subblock_mses) {
345   int min_subblock_mse = INT_MAX;
346   int max_subblock_mse = INT_MIN;
347   int64_t sum_subblock_mse = 0;
348   for (int i = 0; i < 4; ++i) {
349     sum_subblock_mse += subblock_mses[i];
350     min_subblock_mse = AOMMIN(min_subblock_mse, subblock_mses[i]);
351     max_subblock_mse = AOMMAX(max_subblock_mse, subblock_mses[i]);
352   }
353 
354   // TODO(any): The following magic numbers may be tuned to improve the
355   // performance OR find a way to get rid of these magic numbers.
356   if (((block_mse * 15 < sum_subblock_mse * 4) &&
357        max_subblock_mse - min_subblock_mse < 48) ||
358       ((block_mse * 14 < sum_subblock_mse * 4) &&
359        max_subblock_mse - min_subblock_mse < 24)) {  // No split.
360     for (int i = 0; i < 4; ++i) {
361       subblock_mvs[i] = block_mv;
362       subblock_mses[i] = block_mse;
363     }
364   }
365 }
366 
367 // Helper function to determine whether a frame is encoded with high bit-depth.
is_frame_high_bitdepth(const YV12_BUFFER_CONFIG * frame)368 static inline int is_frame_high_bitdepth(const YV12_BUFFER_CONFIG *frame) {
369   return (frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
370 }
371 
372 /*!\endcond */
373 /*!\brief Builds predictor for blocks in temporal filtering. This is the
374  * second step for temporal filtering, which is to construct predictions from
375  * all reference frames INCLUDING the frame to be filtered itself. These
376  * predictors are built based on the motion search results (motion vector is
377  * set as 0 for the frame to be filtered), and will be futher used for
378  * weighted averaging.
379  *
380  * \ingroup src_frame_proc
381  * \param[in]   ref_frame      Pointer to the reference frame (or the frame
382  *                             to be filtered)
383  * \param[in]   mbd            Pointer to the block for filtering. Besides
384  *                             containing the subsampling information of all
385  *                             planes, this field also gives the searched
386  *                             motion vector for the entire block, i.e.,
387  *                             `mbd->mi[0]->mv[0]`. This vector  should be 0
388  *                             if the `ref_frame` itself is the frame to be
389  *                             filtered.
390  * \param[in]   block_size     Size of the block
391  * \param[in]   mb_row         Row index of the block in the frame
392  * \param[in]   mb_col         Column index of the block in the frame
393  * \param[in]   num_planes     Number of planes in the frame
394  * \param[in]   scale          Scaling factor
395  * \param[in]   subblock_mvs   The motion vectors for each sub-block (row-major
396  *                             order)
397  * \param[out]  pred           Pointer to the predictor to be built
398  *
399  * \remark Nothing returned, But the contents of `pred` will be modified
400  */
tf_build_predictor(const YV12_BUFFER_CONFIG * ref_frame,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const struct scale_factors * scale,const MV * subblock_mvs,uint8_t * pred)401 static void tf_build_predictor(const YV12_BUFFER_CONFIG *ref_frame,
402                                const MACROBLOCKD *mbd,
403                                const BLOCK_SIZE block_size, const int mb_row,
404                                const int mb_col, const int num_planes,
405                                const struct scale_factors *scale,
406                                const MV *subblock_mvs, uint8_t *pred) {
407   // Information of the entire block.
408   const int mb_height = block_size_high[block_size];  // Height.
409   const int mb_width = block_size_wide[block_size];   // Width.
410   const int mb_y = mb_height * mb_row;                // Y-coord (Top-left).
411   const int mb_x = mb_width * mb_col;                 // X-coord (Top-left).
412   const int bit_depth = mbd->bd;                      // Bit depth.
413   const int is_intrabc = 0;                           // Is intra-copied?
414   const int is_high_bitdepth = is_frame_high_bitdepth(ref_frame);
415 
416   // Default interpolation filters.
417   const int_interpfilters interp_filters =
418       av1_broadcast_interp_filter(MULTITAP_SHARP2);
419 
420   // Handle Y-plane, U-plane and V-plane (if needed) in sequence.
421   int plane_offset = 0;
422   for (int plane = 0; plane < num_planes; ++plane) {
423     const int subsampling_y = mbd->plane[plane].subsampling_y;
424     const int subsampling_x = mbd->plane[plane].subsampling_x;
425     // Information of each sub-block in current plane.
426     const int plane_h = mb_height >> subsampling_y;  // Plane height.
427     const int plane_w = mb_width >> subsampling_x;   // Plane width.
428     const int plane_y = mb_y >> subsampling_y;       // Y-coord (Top-left).
429     const int plane_x = mb_x >> subsampling_x;       // X-coord (Top-left).
430     const int h = plane_h >> 1;                      // Sub-block height.
431     const int w = plane_w >> 1;                      // Sub-block width.
432     const int is_y_plane = (plane == 0);             // Is Y-plane?
433 
434     const struct buf_2d ref_buf = { NULL, ref_frame->buffers[plane],
435                                     ref_frame->widths[is_y_plane ? 0 : 1],
436                                     ref_frame->heights[is_y_plane ? 0 : 1],
437                                     ref_frame->strides[is_y_plane ? 0 : 1] };
438 
439     // Handle each subblock.
440     int subblock_idx = 0;
441     for (int i = 0; i < plane_h; i += h) {
442       for (int j = 0; j < plane_w; j += w) {
443         // Choose proper motion vector.
444         const MV mv = subblock_mvs[subblock_idx++];
445         assert(mv.row >= INT16_MIN && mv.row <= INT16_MAX &&
446                mv.col >= INT16_MIN && mv.col <= INT16_MAX);
447 
448         const int y = plane_y + i;
449         const int x = plane_x + j;
450 
451         // Build predictior for each sub-block on current plane.
452         InterPredParams inter_pred_params;
453         av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
454                               subsampling_y, bit_depth, is_high_bitdepth,
455                               is_intrabc, scale, &ref_buf, interp_filters);
456         inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
457         av1_enc_build_one_inter_predictor(&pred[plane_offset + i * plane_w + j],
458                                           plane_w, &mv, &inter_pred_params);
459       }
460     }
461     plane_offset += plane_h * plane_w;
462   }
463 }
464 /*!\cond */
465 
466 // Computes temporal filter weights and accumulators for the frame to be
467 // filtered. More concretely, the filter weights for all pixels are the same.
468 // Inputs:
469 //   mbd: Pointer to the block for filtering, which is ONLY used to get
470 //        subsampling information of all planes as well as the bit-depth.
471 //   block_size: Size of the block.
472 //   num_planes: Number of planes in the frame.
473 //   pred: Pointer to the well-built predictors.
474 //   accum: Pointer to the pixel-wise accumulator for filtering.
475 //   count: Pointer to the pixel-wise counter fot filtering.
476 // Returns:
477 //   Nothing will be returned. But the content to which `accum` and `pred`
478 //   point will be modified.
tf_apply_temporal_filter_self(const YV12_BUFFER_CONFIG * ref_frame,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,uint32_t * accum,uint16_t * count)479 static void tf_apply_temporal_filter_self(const YV12_BUFFER_CONFIG *ref_frame,
480                                           const MACROBLOCKD *mbd,
481                                           const BLOCK_SIZE block_size,
482                                           const int mb_row, const int mb_col,
483                                           const int num_planes, uint32_t *accum,
484                                           uint16_t *count) {
485   // Block information.
486   const int mb_height = block_size_high[block_size];
487   const int mb_width = block_size_wide[block_size];
488   const int is_high_bitdepth = is_cur_buf_hbd(mbd);
489 
490   int plane_offset = 0;
491   for (int plane = 0; plane < num_planes; ++plane) {
492     const int subsampling_y = mbd->plane[plane].subsampling_y;
493     const int subsampling_x = mbd->plane[plane].subsampling_x;
494     const int h = mb_height >> subsampling_y;  // Plane height.
495     const int w = mb_width >> subsampling_x;   // Plane width.
496 
497     const int frame_stride = ref_frame->strides[plane == AOM_PLANE_Y ? 0 : 1];
498     const uint8_t *buf8 = ref_frame->buffers[plane];
499     const uint16_t *buf16 = CONVERT_TO_SHORTPTR(buf8);
500     const int frame_offset = mb_row * h * frame_stride + mb_col * w;
501 
502     int pred_idx = 0;
503     int pixel_idx = 0;
504     for (int i = 0; i < h; ++i) {
505       for (int j = 0; j < w; ++j) {
506         const int idx = plane_offset + pred_idx;  // Index with plane shift.
507         const int pred_value = is_high_bitdepth
508                                    ? buf16[frame_offset + pixel_idx]
509                                    : buf8[frame_offset + pixel_idx];
510         accum[idx] += TF_WEIGHT_SCALE * pred_value;
511         count[idx] += TF_WEIGHT_SCALE;
512         ++pred_idx;
513         ++pixel_idx;
514       }
515       pixel_idx += (frame_stride - w);
516     }
517     plane_offset += h * w;
518   }
519 }
520 
521 // Function to compute pixel-wise squared difference between two buffers.
522 // Inputs:
523 //   ref: Pointer to reference buffer.
524 //   ref_offset: Start position of reference buffer for computation.
525 //   ref_stride: Stride for reference buffer.
526 //   tgt: Pointer to target buffer.
527 //   tgt_offset: Start position of target buffer for computation.
528 //   tgt_stride: Stride for target buffer.
529 //   height: Height of block for computation.
530 //   width: Width of block for computation.
531 //   is_high_bitdepth: Whether the two buffers point to high bit-depth frames.
532 //   square_diff: Pointer to save the squared differces.
533 // Returns:
534 //   Nothing will be returned. But the content to which `square_diff` points
535 //   will be modified.
compute_square_diff(const uint8_t * ref,const int ref_offset,const int ref_stride,const uint8_t * tgt,const int tgt_offset,const int tgt_stride,const int height,const int width,const int is_high_bitdepth,uint32_t * square_diff)536 static inline void compute_square_diff(const uint8_t *ref, const int ref_offset,
537                                        const int ref_stride, const uint8_t *tgt,
538                                        const int tgt_offset,
539                                        const int tgt_stride, const int height,
540                                        const int width,
541                                        const int is_high_bitdepth,
542                                        uint32_t *square_diff) {
543   const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
544   const uint16_t *tgt16 = CONVERT_TO_SHORTPTR(tgt);
545 
546   int ref_idx = 0;
547   int tgt_idx = 0;
548   int idx = 0;
549   for (int i = 0; i < height; ++i) {
550     for (int j = 0; j < width; ++j) {
551       const uint16_t ref_value = is_high_bitdepth ? ref16[ref_offset + ref_idx]
552                                                   : ref[ref_offset + ref_idx];
553       const uint16_t tgt_value = is_high_bitdepth ? tgt16[tgt_offset + tgt_idx]
554                                                   : tgt[tgt_offset + tgt_idx];
555       const uint32_t diff = (ref_value > tgt_value) ? (ref_value - tgt_value)
556                                                     : (tgt_value - ref_value);
557       square_diff[idx] = diff * diff;
558 
559       ++ref_idx;
560       ++tgt_idx;
561       ++idx;
562     }
563     ref_idx += (ref_stride - width);
564     tgt_idx += (tgt_stride - width);
565   }
566 }
567 
568 // Function to accumulate pixel-wise squared difference between two luma buffers
569 // to be consumed while filtering the chroma planes.
570 // Inputs:
571 //   square_diff: Pointer to squared differences from luma plane.
572 //   luma_sse_sum: Pointer to save the sum of luma squared differences.
573 //   block_height: Height of block for computation.
574 //   block_width: Width of block for computation.
575 //   ss_x_shift: Chroma subsampling shift in 'X' direction
576 //   ss_y_shift: Chroma subsampling shift in 'Y' direction
577 // Returns:
578 //   Nothing will be returned. But the content to which `luma_sse_sum` points
579 //   will be modified.
compute_luma_sq_error_sum(uint32_t * square_diff,uint32_t * luma_sse_sum,int block_height,int block_width,int ss_x_shift,int ss_y_shift)580 static void compute_luma_sq_error_sum(uint32_t *square_diff,
581                                       uint32_t *luma_sse_sum, int block_height,
582                                       int block_width, int ss_x_shift,
583                                       int ss_y_shift) {
584   for (int i = 0; i < block_height; ++i) {
585     for (int j = 0; j < block_width; ++j) {
586       for (int ii = 0; ii < (1 << ss_y_shift); ++ii) {
587         for (int jj = 0; jj < (1 << ss_x_shift); ++jj) {
588           const int yy = (i << ss_y_shift) + ii;     // Y-coord on Y-plane.
589           const int xx = (j << ss_x_shift) + jj;     // X-coord on Y-plane.
590           const int ww = block_width << ss_x_shift;  // Width of Y-plane.
591           luma_sse_sum[i * block_width + j] += square_diff[yy * ww + xx];
592         }
593       }
594     }
595   }
596 }
597 
598 /*!\endcond */
599 /*!\brief Applies temporal filtering. NOTE that there are various optimised
600  * versions of this function called where the appropriate instruction set is
601  * supported.
602  *
603  * \ingroup src_frame_proc
604  * \param[in]   frame_to_filter Pointer to the frame to be filtered, which is
605  *                              used as reference to compute squared
606  *                              difference from the predictor.
607  * \param[in]   mbd             Pointer to the block for filtering, ONLY used
608  *                              to get subsampling information for the  planes
609  * \param[in]   block_size      Size of the block
610  * \param[in]   mb_row          Row index of the block in the frame
611  * \param[in]   mb_col          Column index of the block in the frame
612  * \param[in]   num_planes      Number of planes in the frame
613  * \param[in]   noise_levels    Estimated noise levels for each plane
614  *                              in the frame (Y,U,V)
615  * \param[in]   subblock_mvs    Pointer to the motion vectors for 4 sub-blocks
616  * \param[in]   subblock_mses   Pointer to the search errors (MSE) for 4
617  *                              sub-blocks
618  * \param[in]   q_factor        Quantization factor. This is actually the `q`
619  *                              defined in libaom, converted from `qindex`
620  * \param[in]   filter_strength Filtering strength. This value lies in range
621  *                              [0, 6] where 6 is the maximum strength.
622  * \param[in]   tf_wgt_calc_lvl Controls the weight calculation method during
623  *                              temporal filtering
624  * \param[out]  pred            Pointer to the well-built predictors
625  * \param[out]  accum           Pointer to the pixel-wise accumulator for
626  *                              filtering
627  * \param[out]  count           Pointer to the pixel-wise counter for
628  *                              filtering
629  *
630  * \remark Nothing returned, But the contents of `accum`, `pred` and 'count'
631  *         will be modified
632  */
av1_apply_temporal_filter_c(const YV12_BUFFER_CONFIG * frame_to_filter,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const double * noise_levels,const MV * subblock_mvs,const int * subblock_mses,const int q_factor,const int filter_strength,int tf_wgt_calc_lvl,const uint8_t * pred,uint32_t * accum,uint16_t * count)633 void av1_apply_temporal_filter_c(
634     const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
635     const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
636     const int num_planes, const double *noise_levels, const MV *subblock_mvs,
637     const int *subblock_mses, const int q_factor, const int filter_strength,
638     int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum,
639     uint16_t *count) {
640   // Block information.
641   const int mb_height = block_size_high[block_size];
642   const int mb_width = block_size_wide[block_size];
643   const int mb_pels = mb_height * mb_width;
644   const int is_high_bitdepth = is_frame_high_bitdepth(frame_to_filter);
645   const uint16_t *pred16 = CONVERT_TO_SHORTPTR(pred);
646   // Frame information.
647   const int frame_height = frame_to_filter->y_crop_height;
648   const int frame_width = frame_to_filter->y_crop_width;
649   const int min_frame_size = AOMMIN(frame_height, frame_width);
650   // Variables to simplify combined error calculation.
651   const double inv_factor = 1.0 / ((TF_WINDOW_BLOCK_BALANCE_WEIGHT + 1) *
652                                    TF_SEARCH_ERROR_NORM_WEIGHT);
653   const double weight_factor =
654       (double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
655   // Decay factors for non-local mean approach.
656   double decay_factor[MAX_MB_PLANE] = { 0 };
657   // Adjust filtering based on q.
658   // Larger q -> stronger filtering -> larger weight.
659   // Smaller q -> weaker filtering -> smaller weight.
660   double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
661   q_decay = CLIP(q_decay, 1e-5, 1);
662   if (q_factor >= TF_QINDEX_CUTOFF) {
663     // Max q_factor is 255, therefore the upper bound of q_decay is 8.
664     // We do not need a clip here.
665     q_decay = 0.5 * pow((double)q_factor / 64, 2);
666   }
667   // Smaller strength -> smaller filtering weight.
668   double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
669   s_decay = CLIP(s_decay, 1e-5, 1);
670   for (int plane = 0; plane < num_planes; plane++) {
671     // Larger noise -> larger filtering weight.
672     const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
673     decay_factor[plane] = 1 / (n_decay * q_decay * s_decay);
674   }
675   double d_factor[4] = { 0 };
676   for (int subblock_idx = 0; subblock_idx < 4; subblock_idx++) {
677     // Larger motion vector -> smaller filtering weight.
678     const MV mv = subblock_mvs[subblock_idx];
679     const double distance = sqrt(pow(mv.row, 2) + pow(mv.col, 2));
680     double distance_threshold = min_frame_size * TF_SEARCH_DISTANCE_THRESHOLD;
681     distance_threshold = AOMMAX(distance_threshold, 1);
682     d_factor[subblock_idx] = distance / distance_threshold;
683     d_factor[subblock_idx] = AOMMAX(d_factor[subblock_idx], 1);
684   }
685 
686   // Allocate memory for pixel-wise squared differences. They,
687   // regardless of the subsampling, are assigned with memory of size `mb_pels`.
688   uint32_t *square_diff = aom_memalign(16, mb_pels * sizeof(uint32_t));
689   if (!square_diff) {
690     aom_internal_error(mbd->error_info, AOM_CODEC_MEM_ERROR,
691                        "Error allocating temporal filter data");
692   }
693   memset(square_diff, 0, mb_pels * sizeof(square_diff[0]));
694 
695   // Allocate memory for accumulated luma squared error. This value will be
696   // consumed while filtering the chroma planes.
697   uint32_t *luma_sse_sum = aom_memalign(32, mb_pels * sizeof(uint32_t));
698   if (!luma_sse_sum) {
699     aom_free(square_diff);
700     aom_internal_error(mbd->error_info, AOM_CODEC_MEM_ERROR,
701                        "Error allocating temporal filter data");
702   }
703   memset(luma_sse_sum, 0, mb_pels * sizeof(luma_sse_sum[0]));
704 
705   // Get window size for pixel-wise filtering.
706   assert(TF_WINDOW_LENGTH % 2 == 1);
707   const int half_window = TF_WINDOW_LENGTH >> 1;
708 
709   // Handle planes in sequence.
710   int plane_offset = 0;
711   for (int plane = 0; plane < num_planes; ++plane) {
712     // Locate pixel on reference frame.
713     const int subsampling_y = mbd->plane[plane].subsampling_y;
714     const int subsampling_x = mbd->plane[plane].subsampling_x;
715     const int h = mb_height >> subsampling_y;  // Plane height.
716     const int w = mb_width >> subsampling_x;   // Plane width.
717     const int frame_stride =
718         frame_to_filter->strides[plane == AOM_PLANE_Y ? 0 : 1];
719     const int frame_offset = mb_row * h * frame_stride + mb_col * w;
720     const uint8_t *ref = frame_to_filter->buffers[plane];
721     const int ss_y_shift =
722         subsampling_y - mbd->plane[AOM_PLANE_Y].subsampling_y;
723     const int ss_x_shift =
724         subsampling_x - mbd->plane[AOM_PLANE_Y].subsampling_x;
725     const int num_ref_pixels = TF_WINDOW_LENGTH * TF_WINDOW_LENGTH +
726                                ((plane) ? (1 << (ss_x_shift + ss_y_shift)) : 0);
727     const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
728 
729     // Filter U-plane and V-plane using Y-plane. This is because motion
730     // search is only done on Y-plane, so the information from Y-plane will
731     // be more accurate. The luma sse sum is reused in both chroma planes.
732     if (plane == AOM_PLANE_U)
733       compute_luma_sq_error_sum(square_diff, luma_sse_sum, h, w, ss_x_shift,
734                                 ss_y_shift);
735     compute_square_diff(ref, frame_offset, frame_stride, pred, plane_offset, w,
736                         h, w, is_high_bitdepth, square_diff);
737 
738     // Perform filtering.
739     int pred_idx = 0;
740     for (int i = 0; i < h; ++i) {
741       for (int j = 0; j < w; ++j) {
742         // non-local mean approach
743         uint64_t sum_square_diff = 0;
744 
745         for (int wi = -half_window; wi <= half_window; ++wi) {
746           for (int wj = -half_window; wj <= half_window; ++wj) {
747             const int y = CLIP(i + wi, 0, h - 1);  // Y-coord on current plane.
748             const int x = CLIP(j + wj, 0, w - 1);  // X-coord on current plane.
749             sum_square_diff += square_diff[y * w + x];
750           }
751         }
752 
753         sum_square_diff += luma_sse_sum[i * w + j];
754 
755         // Scale down the difference for high bit depth input.
756         if (mbd->bd > 8) sum_square_diff >>= ((mbd->bd - 8) * 2);
757 
758         // Combine window error and block error, and normalize it.
759         const double window_error = sum_square_diff * inv_num_ref_pixels;
760         const int subblock_idx = (i >= h / 2) * 2 + (j >= w / 2);
761         const double block_error = (double)subblock_mses[subblock_idx];
762         const double combined_error =
763             weight_factor * window_error + block_error * inv_factor;
764 
765         // Compute filter weight.
766         double scaled_error =
767             combined_error * d_factor[subblock_idx] * decay_factor[plane];
768         scaled_error = AOMMIN(scaled_error, 7);
769         int weight;
770         if (tf_wgt_calc_lvl == 0) {
771           weight = (int)(exp(-scaled_error) * TF_WEIGHT_SCALE);
772         } else {
773           const float fweight =
774               approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE;
775           weight = iroundpf(fweight);
776         }
777 
778         const int idx = plane_offset + pred_idx;  // Index with plane shift.
779         const int pred_value = is_high_bitdepth ? pred16[idx] : pred[idx];
780         accum[idx] += weight * pred_value;
781         count[idx] += weight;
782 
783         ++pred_idx;
784       }
785     }
786     plane_offset += h * w;
787   }
788 
789   aom_free(square_diff);
790   aom_free(luma_sse_sum);
791 }
792 #if CONFIG_AV1_HIGHBITDEPTH
793 // Calls High bit-depth temporal filter
av1_highbd_apply_temporal_filter_c(const YV12_BUFFER_CONFIG * frame_to_filter,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const double * noise_levels,const MV * subblock_mvs,const int * subblock_mses,const int q_factor,const int filter_strength,int tf_wgt_calc_lvl,const uint8_t * pred,uint32_t * accum,uint16_t * count)794 void av1_highbd_apply_temporal_filter_c(
795     const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
796     const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
797     const int num_planes, const double *noise_levels, const MV *subblock_mvs,
798     const int *subblock_mses, const int q_factor, const int filter_strength,
799     int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum,
800     uint16_t *count) {
801   av1_apply_temporal_filter_c(frame_to_filter, mbd, block_size, mb_row, mb_col,
802                               num_planes, noise_levels, subblock_mvs,
803                               subblock_mses, q_factor, filter_strength,
804                               tf_wgt_calc_lvl, pred, accum, count);
805 }
806 #endif  // CONFIG_AV1_HIGHBITDEPTH
807 /*!\brief Normalizes the accumulated filtering result to produce the filtered
808  *        frame
809  *
810  * \ingroup src_frame_proc
811  * \param[in]   mbd            Pointer to the block for filtering, which is
812  *                             ONLY used to get subsampling information for
813  *                             all the planes
814  * \param[in]   block_size     Size of the block
815  * \param[in]   mb_row         Row index of the block in the frame
816  * \param[in]   mb_col         Column index of the block in the frame
817  * \param[in]   num_planes     Number of planes in the frame
818  * \param[in]   accum          Pointer to the pre-computed accumulator
819  * \param[in]   count          Pointer to the pre-computed count
820  * \param[out]  result_buffer  Pointer to result buffer
821  *
822  * \remark Nothing returned, but the content to which `result_buffer` pointer
823  *         will be modified
824  */
tf_normalize_filtered_frame(const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const uint32_t * accum,const uint16_t * count,YV12_BUFFER_CONFIG * result_buffer)825 static void tf_normalize_filtered_frame(
826     const MACROBLOCKD *mbd, const BLOCK_SIZE block_size, const int mb_row,
827     const int mb_col, const int num_planes, const uint32_t *accum,
828     const uint16_t *count, YV12_BUFFER_CONFIG *result_buffer) {
829   // Block information.
830   const int mb_height = block_size_high[block_size];
831   const int mb_width = block_size_wide[block_size];
832   const int is_high_bitdepth = is_frame_high_bitdepth(result_buffer);
833 
834   int plane_offset = 0;
835   for (int plane = 0; plane < num_planes; ++plane) {
836     const int plane_h = mb_height >> mbd->plane[plane].subsampling_y;
837     const int plane_w = mb_width >> mbd->plane[plane].subsampling_x;
838     const int frame_stride = result_buffer->strides[plane == 0 ? 0 : 1];
839     const int frame_offset = mb_row * plane_h * frame_stride + mb_col * plane_w;
840     uint8_t *const buf = result_buffer->buffers[plane];
841     uint16_t *const buf16 = CONVERT_TO_SHORTPTR(buf);
842 
843     int plane_idx = 0;             // Pixel index on current plane (block-base).
844     int frame_idx = frame_offset;  // Pixel index on the entire frame.
845     for (int i = 0; i < plane_h; ++i) {
846       for (int j = 0; j < plane_w; ++j) {
847         const int idx = plane_idx + plane_offset;
848         const uint16_t rounding = count[idx] >> 1;
849         if (is_high_bitdepth) {
850           buf16[frame_idx] =
851               (uint16_t)OD_DIVU(accum[idx] + rounding, count[idx]);
852         } else {
853           buf[frame_idx] = (uint8_t)OD_DIVU(accum[idx] + rounding, count[idx]);
854         }
855         ++plane_idx;
856         ++frame_idx;
857       }
858       frame_idx += (frame_stride - plane_w);
859     }
860     plane_offset += plane_h * plane_w;
861   }
862 }
863 
av1_tf_do_filtering_row(AV1_COMP * cpi,ThreadData * td,int mb_row)864 void av1_tf_do_filtering_row(AV1_COMP *cpi, ThreadData *td, int mb_row) {
865   TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
866   YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
867   const int num_frames = tf_ctx->num_frames;
868   const int filter_frame_idx = tf_ctx->filter_frame_idx;
869   const int compute_frame_diff = tf_ctx->compute_frame_diff;
870   const struct scale_factors *scale = &tf_ctx->sf;
871   const double *noise_levels = tf_ctx->noise_levels;
872   const int num_pels = tf_ctx->num_pels;
873   const int q_factor = tf_ctx->q_factor;
874   const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
875   const YV12_BUFFER_CONFIG *const frame_to_filter = frames[filter_frame_idx];
876   MACROBLOCK *const mb = &td->mb;
877   MACROBLOCKD *const mbd = &mb->e_mbd;
878   TemporalFilterData *const tf_data = &td->tf_data;
879   const int mb_height = block_size_high[block_size];
880   const int mb_width = block_size_wide[block_size];
881   const int mi_h = mi_size_high_log2[block_size];
882   const int mi_w = mi_size_wide_log2[block_size];
883   const int num_planes = av1_num_planes(&cpi->common);
884   const int weight_calc_level_in_tf = cpi->sf.hl_sf.weight_calc_level_in_tf;
885   uint32_t *accum = tf_data->accum;
886   uint16_t *count = tf_data->count;
887   uint8_t *pred = tf_data->pred;
888 
889   // Factor to control the filering strength.
890   int filter_strength = cpi->oxcf.algo_cfg.arnr_strength;
891   const GF_GROUP *gf_group = &cpi->ppi->gf_group;
892   const FRAME_TYPE frame_type = gf_group->frame_type[cpi->gf_frame_index];
893 
894   // Do filtering.
895   FRAME_DIFF *diff = &td->tf_data.diff;
896   av1_set_mv_row_limits(&cpi->common.mi_params, &mb->mv_limits,
897                         (mb_row << mi_h), (mb_height >> MI_SIZE_LOG2),
898                         cpi->oxcf.border_in_pixels);
899   for (int mb_col = 0; mb_col < tf_ctx->mb_cols; mb_col++) {
900     av1_set_mv_col_limits(&cpi->common.mi_params, &mb->mv_limits,
901                           (mb_col << mi_w), (mb_width >> MI_SIZE_LOG2),
902                           cpi->oxcf.border_in_pixels);
903     memset(accum, 0, num_pels * sizeof(accum[0]));
904     memset(count, 0, num_pels * sizeof(count[0]));
905     MV ref_mv = kZeroMv;  // Reference motion vector passed down along frames.
906                           // Perform temporal filtering frame by frame.
907 
908     // Decide whether to perform motion search at 16x16 sub-block level or not
909     // based on 4x4 sub-blocks source variance. Allow motion search for split
910     // partition only if the difference between max and min source variance of
911     // 4x4 blocks is greater than a threshold (which is derived empirically).
912     bool allow_me_for_sub_blks = true;
913     if (cpi->sf.hl_sf.allow_sub_blk_me_in_tf) {
914       const int is_hbd = is_frame_high_bitdepth(frame_to_filter);
915       // Initialize minimum variance to a large value and maximum variance to 0.
916       double blk_4x4_var_min = DBL_MAX;
917       double blk_4x4_var_max = 0;
918       get_log_var_4x4sub_blk(cpi, frame_to_filter, mb_row, mb_col,
919                              TF_BLOCK_SIZE, &blk_4x4_var_min, &blk_4x4_var_max,
920                              is_hbd);
921       // TODO(sanampudi.venkatarao@ittiam.com): Experiment and adjust the
922       // threshold for high bit depth.
923       if ((blk_4x4_var_max - blk_4x4_var_min) <= 4.0)
924         allow_me_for_sub_blks = false;
925     }
926 
927     for (int frame = 0; frame < num_frames; frame++) {
928       if (frames[frame] == NULL) continue;
929 
930       // Motion search.
931       MV subblock_mvs[4] = { kZeroMv, kZeroMv, kZeroMv, kZeroMv };
932       int subblock_mses[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
933       int is_dc_diff_large = 0;
934 
935       if (frame ==
936           filter_frame_idx) {  // Frame to be filtered.
937                                // Change ref_mv sign for following frames.
938         ref_mv.row *= -1;
939         ref_mv.col *= -1;
940       } else {  // Other reference frames.
941         tf_motion_search(cpi, mb, frame_to_filter, frames[frame], block_size,
942                          mb_row, mb_col, &ref_mv, allow_me_for_sub_blks,
943                          subblock_mvs, subblock_mses, &is_dc_diff_large);
944       }
945 
946       if (cpi->oxcf.kf_cfg.enable_keyframe_filtering == 1 &&
947           frame_type == KEY_FRAME && is_dc_diff_large)
948         filter_strength = AOMMIN(filter_strength, 1);
949 
950       // Perform weighted averaging.
951       if (frame == filter_frame_idx) {  // Frame to be filtered.
952         tf_apply_temporal_filter_self(frames[frame], mbd, block_size, mb_row,
953                                       mb_col, num_planes, accum, count);
954       } else {  // Other reference frames.
955         tf_build_predictor(frames[frame], mbd, block_size, mb_row, mb_col,
956                            num_planes, scale, subblock_mvs, pred);
957 
958         // All variants of av1_apply_temporal_filter() contain floating point
959         // operations. Hence, clear the system state.
960 
961         // TODO(any): avx2/sse2 version should be changed to align with C
962         // function before using. In particular, current avx2/sse2 function
963         // only supports 32x32 block size and 5x5 filtering window.
964         if (is_frame_high_bitdepth(frame_to_filter)) {  // for high bit-depth
965 #if CONFIG_AV1_HIGHBITDEPTH
966           if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
967             av1_highbd_apply_temporal_filter(
968                 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
969                 noise_levels, subblock_mvs, subblock_mses, q_factor,
970                 filter_strength, weight_calc_level_in_tf, pred, accum, count);
971           } else {
972 #endif  // CONFIG_AV1_HIGHBITDEPTH
973             av1_apply_temporal_filter_c(
974                 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
975                 noise_levels, subblock_mvs, subblock_mses, q_factor,
976                 filter_strength, weight_calc_level_in_tf, pred, accum, count);
977 #if CONFIG_AV1_HIGHBITDEPTH
978           }
979 #endif  // CONFIG_AV1_HIGHBITDEPTH
980         } else {
981           // for 8-bit
982           if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
983             av1_apply_temporal_filter(
984                 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
985                 noise_levels, subblock_mvs, subblock_mses, q_factor,
986                 filter_strength, weight_calc_level_in_tf, pred, accum, count);
987           } else {
988             av1_apply_temporal_filter_c(
989                 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
990                 noise_levels, subblock_mvs, subblock_mses, q_factor,
991                 filter_strength, weight_calc_level_in_tf, pred, accum, count);
992           }
993         }
994       }
995     }
996     tf_normalize_filtered_frame(mbd, block_size, mb_row, mb_col, num_planes,
997                                 accum, count, tf_ctx->output_frame);
998 
999     if (compute_frame_diff) {
1000       const int y_height = mb_height >> mbd->plane[0].subsampling_y;
1001       const int y_width = mb_width >> mbd->plane[0].subsampling_x;
1002       const int source_y_stride = frame_to_filter->y_stride;
1003       const int filter_y_stride = tf_ctx->output_frame->y_stride;
1004       const int source_offset =
1005           mb_row * y_height * source_y_stride + mb_col * y_width;
1006       const int filter_offset =
1007           mb_row * y_height * filter_y_stride + mb_col * y_width;
1008       unsigned int sse = 0;
1009       cpi->ppi->fn_ptr[block_size].vf(
1010           frame_to_filter->y_buffer + source_offset, source_y_stride,
1011           tf_ctx->output_frame->y_buffer + filter_offset, filter_y_stride,
1012           &sse);
1013       diff->sum += sse;
1014       diff->sse += sse * (int64_t)sse;
1015     }
1016   }
1017 }
1018 
1019 /*!\brief Does temporal filter for a given frame.
1020  *
1021  * \ingroup src_frame_proc
1022  * \param[in]   cpi                   Top level encoder instance structure
1023  *
1024  * \remark Nothing will be returned, but the contents of td->diff will be
1025  modified.
1026  */
tf_do_filtering(AV1_COMP * cpi)1027 static void tf_do_filtering(AV1_COMP *cpi) {
1028   // Basic information.
1029   ThreadData *td = &cpi->td;
1030   TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
1031   const struct scale_factors *scale = &tf_ctx->sf;
1032   const int num_planes = av1_num_planes(&cpi->common);
1033   assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
1034 
1035   MACROBLOCKD *mbd = &td->mb.e_mbd;
1036   uint8_t *input_buffer[MAX_MB_PLANE];
1037   MB_MODE_INFO **input_mb_mode_info;
1038   tf_save_state(mbd, &input_mb_mode_info, input_buffer, num_planes);
1039   tf_setup_macroblockd(mbd, &td->tf_data, scale);
1040 
1041   // Perform temporal filtering for each row.
1042   for (int mb_row = 0; mb_row < tf_ctx->mb_rows; mb_row++)
1043     av1_tf_do_filtering_row(cpi, td, mb_row);
1044 
1045   tf_restore_state(mbd, input_mb_mode_info, input_buffer, num_planes);
1046 }
1047 
1048 /*!\brief Setups the frame buffer for temporal filtering. This fuction
1049  * determines how many frames will be used for temporal filtering and then
1050  * groups them into a buffer. This function will also estimate the noise level
1051  * of the to-filter frame.
1052  *
1053  * \ingroup src_frame_proc
1054  * \param[in]   cpi             Top level encoder instance structure
1055  * \param[in]   filter_frame_lookahead_idx  The index of the to-filter frame
1056  *                              in the lookahead buffer cpi->lookahead
1057  * \param[in]   gf_frame_index  GOP index
1058  *
1059  * \remark Nothing will be returned. But the fields `frames`, `num_frames`,
1060  *         `filter_frame_idx` and `noise_levels` will be updated in cpi->tf_ctx.
1061  */
tf_setup_filtering_buffer(AV1_COMP * cpi,int filter_frame_lookahead_idx,int gf_frame_index)1062 static void tf_setup_filtering_buffer(AV1_COMP *cpi,
1063                                       int filter_frame_lookahead_idx,
1064                                       int gf_frame_index) {
1065   const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1066   const FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_frame_index];
1067   const FRAME_TYPE frame_type = gf_group->frame_type[gf_frame_index];
1068   const int is_forward_keyframe =
1069       av1_gop_check_forward_keyframe(gf_group, gf_frame_index);
1070 
1071   TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
1072   YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
1073   // Number of frames used for filtering. Set `arnr_max_frames` as 1 to disable
1074   // temporal filtering.
1075   int num_frames = AOMMAX(cpi->oxcf.algo_cfg.arnr_max_frames, 1);
1076   int num_before = 0;  // Number of filtering frames before the to-filter frame.
1077   int num_after = 0;   // Number of filtering frames after the to-filer frame.
1078   const int lookahead_depth =
1079       av1_lookahead_depth(cpi->ppi->lookahead, cpi->compressor_stage);
1080 
1081   // Temporal filtering should not go beyond key frames
1082   const int key_to_curframe =
1083       AOMMAX(cpi->rc.frames_since_key + filter_frame_lookahead_idx, 0);
1084   const int curframe_to_key =
1085       AOMMAX(cpi->rc.frames_to_key - filter_frame_lookahead_idx - 1, 0);
1086 
1087   // Number of buffered frames before the to-filter frame.
1088   int max_before = AOMMIN(filter_frame_lookahead_idx, key_to_curframe);
1089 
1090   // Number of buffered frames after the to-filter frame.
1091   int max_after =
1092       AOMMIN(lookahead_depth - filter_frame_lookahead_idx - 1, curframe_to_key);
1093 
1094   // Estimate noises for each plane.
1095   const struct lookahead_entry *to_filter_buf = av1_lookahead_peek(
1096       cpi->ppi->lookahead, filter_frame_lookahead_idx, cpi->compressor_stage);
1097   assert(to_filter_buf != NULL);
1098   const YV12_BUFFER_CONFIG *to_filter_frame = &to_filter_buf->img;
1099   const int num_planes = av1_num_planes(&cpi->common);
1100   double *noise_levels = tf_ctx->noise_levels;
1101   av1_estimate_noise_level(to_filter_frame, noise_levels, AOM_PLANE_Y,
1102                            num_planes - 1, cpi->common.seq_params->bit_depth,
1103                            NOISE_ESTIMATION_EDGE_THRESHOLD);
1104   // Get quantization factor.
1105   const int q = get_q(cpi);
1106   // Get correlation estimates from first-pass;
1107   const FIRSTPASS_STATS *stats =
1108       cpi->twopass_frame.stats_in - (cpi->rc.frames_since_key == 0);
1109   double accu_coeff0 = 1.0, accu_coeff1 = 1.0;
1110   for (int i = 1; i <= max_after; i++) {
1111     if (stats + filter_frame_lookahead_idx + i >=
1112         cpi->ppi->twopass.stats_buf_ctx->stats_in_end) {
1113       max_after = i - 1;
1114       break;
1115     }
1116     accu_coeff1 *=
1117         AOMMAX(stats[filter_frame_lookahead_idx + i].cor_coeff, 0.001);
1118   }
1119   if (max_after >= 1) {
1120     accu_coeff1 = pow(accu_coeff1, 1.0 / (double)max_after);
1121   }
1122   for (int i = 1; i <= max_before; i++) {
1123     if (stats + filter_frame_lookahead_idx - i + 1 <=
1124         cpi->ppi->twopass.stats_buf_ctx->stats_in_start) {
1125       max_before = i - 1;
1126       break;
1127     }
1128     accu_coeff0 *=
1129         AOMMAX(stats[filter_frame_lookahead_idx - i + 1].cor_coeff, 0.001);
1130   }
1131   if (max_before >= 1) {
1132     accu_coeff0 = pow(accu_coeff0, 1.0 / (double)max_before);
1133   }
1134 
1135   // Adjust number of filtering frames based on quantization factor. When the
1136   // quantization factor is small enough (lossless compression), we will not
1137   // change the number of frames for key frame filtering, which is to avoid
1138   // visual quality drop.
1139   int adjust_num = 6;
1140   const int adjust_num_frames_for_arf_filtering =
1141       cpi->sf.hl_sf.adjust_num_frames_for_arf_filtering;
1142   if (num_frames == 1) {  // `arnr_max_frames = 1` is used to disable filtering.
1143     adjust_num = 0;
1144   } else if ((update_type == KF_UPDATE) && q <= 10) {
1145     adjust_num = 0;
1146   } else if (adjust_num_frames_for_arf_filtering > 0 &&
1147              update_type != KF_UPDATE && (cpi->rc.frames_since_key > 0)) {
1148     // Since screen content detection happens after temporal filtering,
1149     // 'frames_since_key' check is added to ensure the sf is disabled for the
1150     // first alt-ref frame.
1151     // Adjust number of frames to be considered for filtering based on noise
1152     // level of the current frame. For low-noise frame, use more frames to
1153     // filter such that the filtered frame can provide better predictions for
1154     // subsequent frames and vice versa.
1155     const uint8_t av1_adjust_num_using_noise_lvl[2][3] = { { 6, 4, 2 },
1156                                                            { 4, 2, 0 } };
1157     const uint8_t *adjust_num_frames =
1158         av1_adjust_num_using_noise_lvl[adjust_num_frames_for_arf_filtering - 1];
1159 
1160     if (noise_levels[AOM_PLANE_Y] < 0.5)
1161       adjust_num = adjust_num_frames[0];
1162     else if (noise_levels[AOM_PLANE_Y] < 1.0)
1163       adjust_num = adjust_num_frames[1];
1164     else
1165       adjust_num = adjust_num_frames[2];
1166   }
1167   num_frames = AOMMIN(num_frames + adjust_num, lookahead_depth);
1168 
1169   if (frame_type == KEY_FRAME) {
1170     num_before = AOMMIN(is_forward_keyframe ? num_frames / 2 : 0, max_before);
1171     num_after = AOMMIN(num_frames - 1, max_after);
1172   } else {
1173     int gfu_boost = av1_calc_arf_boost(&cpi->ppi->twopass, &cpi->twopass_frame,
1174                                        &cpi->ppi->p_rc, &cpi->frame_info,
1175                                        filter_frame_lookahead_idx, max_before,
1176                                        max_after, NULL, NULL, 0);
1177 
1178     num_frames = AOMMIN(num_frames, gfu_boost / 150);
1179     num_frames += !(num_frames & 1);  // Make the number odd.
1180 
1181     // Only use 2 neighbours for the second ARF.
1182     if (update_type == INTNL_ARF_UPDATE) num_frames = AOMMIN(num_frames, 3);
1183     if (AOMMIN(max_after, max_before) >= num_frames / 2) {
1184       // just use half half
1185       num_before = num_frames / 2;
1186       num_after = num_frames / 2;
1187     } else {
1188       if (max_after < num_frames / 2) {
1189         num_after = max_after;
1190         num_before = AOMMIN(num_frames - 1 - num_after, max_before);
1191       } else {
1192         num_before = max_before;
1193         num_after = AOMMIN(num_frames - 1 - num_before, max_after);
1194       }
1195       // Adjust insymmetry based on frame-level correlation
1196       if (max_after > 0 && max_before > 0) {
1197         if (num_after < num_before) {
1198           const int insym = (int)(0.4 / AOMMAX(1 - accu_coeff1, 0.01));
1199           num_before = AOMMIN(num_before, num_after + insym);
1200         } else {
1201           const int insym = (int)(0.4 / AOMMAX(1 - accu_coeff0, 0.01));
1202           num_after = AOMMIN(num_after, num_before + insym);
1203         }
1204       }
1205     }
1206   }
1207   num_frames = num_before + 1 + num_after;
1208 
1209   // Setup the frame buffer.
1210   for (int frame = 0; frame < num_frames; ++frame) {
1211     const int lookahead_idx = frame - num_before + filter_frame_lookahead_idx;
1212     struct lookahead_entry *buf = av1_lookahead_peek(
1213         cpi->ppi->lookahead, lookahead_idx, cpi->compressor_stage);
1214     assert(buf != NULL);
1215     frames[frame] = &buf->img;
1216   }
1217   tf_ctx->num_frames = num_frames;
1218   tf_ctx->filter_frame_idx = num_before;
1219   assert(frames[tf_ctx->filter_frame_idx] == to_filter_frame);
1220 
1221   av1_setup_src_planes(&cpi->td.mb, &to_filter_buf->img, 0, 0, num_planes,
1222                        cpi->common.seq_params->sb_size);
1223   av1_setup_block_planes(&cpi->td.mb.e_mbd,
1224                          cpi->common.seq_params->subsampling_x,
1225                          cpi->common.seq_params->subsampling_y, num_planes);
1226 }
1227 
1228 /*!\cond */
1229 
av1_estimate_noise_from_single_plane_c(const uint8_t * src,int height,int width,int stride,int edge_thresh)1230 double av1_estimate_noise_from_single_plane_c(const uint8_t *src, int height,
1231                                               int width, int stride,
1232                                               int edge_thresh) {
1233   int64_t accum = 0;
1234   int count = 0;
1235 
1236   for (int i = 1; i < height - 1; ++i) {
1237     for (int j = 1; j < width - 1; ++j) {
1238       // Setup a small 3x3 matrix.
1239       const int center_idx = i * stride + j;
1240       int mat[3][3];
1241       for (int ii = -1; ii <= 1; ++ii) {
1242         for (int jj = -1; jj <= 1; ++jj) {
1243           const int idx = center_idx + ii * stride + jj;
1244           mat[ii + 1][jj + 1] = src[idx];
1245         }
1246       }
1247       // Compute sobel gradients.
1248       const int Gx = (mat[0][0] - mat[0][2]) + (mat[2][0] - mat[2][2]) +
1249                      2 * (mat[1][0] - mat[1][2]);
1250       const int Gy = (mat[0][0] - mat[2][0]) + (mat[0][2] - mat[2][2]) +
1251                      2 * (mat[0][1] - mat[2][1]);
1252       const int Ga = ROUND_POWER_OF_TWO(abs(Gx) + abs(Gy), 0);
1253       // Accumulate Laplacian.
1254       if (Ga < edge_thresh) {  // Only count smooth pixels.
1255         const int v = 4 * mat[1][1] -
1256                       2 * (mat[0][1] + mat[2][1] + mat[1][0] + mat[1][2]) +
1257                       (mat[0][0] + mat[0][2] + mat[2][0] + mat[2][2]);
1258         accum += ROUND_POWER_OF_TWO(abs(v), 0);
1259         ++count;
1260       }
1261     }
1262   }
1263 
1264   // Return -1.0 (unreliable estimation) if there are too few smooth pixels.
1265   return (count < 16) ? -1.0 : (double)accum / (6 * count) * SQRT_PI_BY_2;
1266 }
1267 
1268 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_estimate_noise_from_single_plane_c(const uint16_t * src16,int height,int width,const int stride,int bit_depth,int edge_thresh)1269 double av1_highbd_estimate_noise_from_single_plane_c(const uint16_t *src16,
1270                                                      int height, int width,
1271                                                      const int stride,
1272                                                      int bit_depth,
1273                                                      int edge_thresh) {
1274   int64_t accum = 0;
1275   int count = 0;
1276   for (int i = 1; i < height - 1; ++i) {
1277     for (int j = 1; j < width - 1; ++j) {
1278       // Setup a small 3x3 matrix.
1279       const int center_idx = i * stride + j;
1280       int mat[3][3];
1281       for (int ii = -1; ii <= 1; ++ii) {
1282         for (int jj = -1; jj <= 1; ++jj) {
1283           const int idx = center_idx + ii * stride + jj;
1284           mat[ii + 1][jj + 1] = src16[idx];
1285         }
1286       }
1287       // Compute sobel gradients.
1288       const int Gx = (mat[0][0] - mat[0][2]) + (mat[2][0] - mat[2][2]) +
1289                      2 * (mat[1][0] - mat[1][2]);
1290       const int Gy = (mat[0][0] - mat[2][0]) + (mat[0][2] - mat[2][2]) +
1291                      2 * (mat[0][1] - mat[2][1]);
1292       const int Ga = ROUND_POWER_OF_TWO(abs(Gx) + abs(Gy), bit_depth - 8);
1293       // Accumulate Laplacian.
1294       if (Ga < edge_thresh) {  // Only count smooth pixels.
1295         const int v = 4 * mat[1][1] -
1296                       2 * (mat[0][1] + mat[2][1] + mat[1][0] + mat[1][2]) +
1297                       (mat[0][0] + mat[0][2] + mat[2][0] + mat[2][2]);
1298         accum += ROUND_POWER_OF_TWO(abs(v), bit_depth - 8);
1299         ++count;
1300       }
1301     }
1302   }
1303 
1304   // Return -1.0 (unreliable estimation) if there are too few smooth pixels.
1305   return (count < 16) ? -1.0 : (double)accum / (6 * count) * SQRT_PI_BY_2;
1306 }
1307 #endif
1308 
av1_estimate_noise_level(const YV12_BUFFER_CONFIG * frame,double * noise_level,int plane_from,int plane_to,int bit_depth,int edge_thresh)1309 void av1_estimate_noise_level(const YV12_BUFFER_CONFIG *frame,
1310                               double *noise_level, int plane_from, int plane_to,
1311                               int bit_depth, int edge_thresh) {
1312   for (int plane = plane_from; plane <= plane_to; plane++) {
1313     const bool is_uv_plane = (plane != AOM_PLANE_Y);
1314     const int height = frame->crop_heights[is_uv_plane];
1315     const int width = frame->crop_widths[is_uv_plane];
1316     const int stride = frame->strides[is_uv_plane];
1317     const uint8_t *src = frame->buffers[plane];
1318 
1319 #if CONFIG_AV1_HIGHBITDEPTH
1320     const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
1321     const int is_high_bitdepth = is_frame_high_bitdepth(frame);
1322     if (is_high_bitdepth) {
1323       noise_level[plane] = av1_highbd_estimate_noise_from_single_plane(
1324           src16, height, width, stride, bit_depth, edge_thresh);
1325     } else {
1326       noise_level[plane] = av1_estimate_noise_from_single_plane(
1327           src, height, width, stride, edge_thresh);
1328     }
1329 #else
1330     (void)bit_depth;
1331     noise_level[plane] = av1_estimate_noise_from_single_plane(
1332         src, height, width, stride, edge_thresh);
1333 #endif
1334   }
1335 }
1336 
1337 // Initializes the members of TemporalFilterCtx
1338 // Inputs:
1339 //   cpi: Top level encoder instance structure
1340 //   check_show_existing: If 1, check whether the filtered frame is similar
1341 //                        to the original frame.
1342 //   filter_frame_lookahead_idx: The index of the frame to be filtered in the
1343 //                               lookahead buffer cpi->lookahead.
1344 // Returns:
1345 //   Nothing will be returned. But the contents of cpi->tf_ctx will be modified.
init_tf_ctx(AV1_COMP * cpi,int filter_frame_lookahead_idx,int gf_frame_index,int compute_frame_diff,YV12_BUFFER_CONFIG * output_frame)1346 static void init_tf_ctx(AV1_COMP *cpi, int filter_frame_lookahead_idx,
1347                         int gf_frame_index, int compute_frame_diff,
1348                         YV12_BUFFER_CONFIG *output_frame) {
1349   TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
1350   // Setup frame buffer for filtering.
1351   YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
1352   tf_ctx->num_frames = 0;
1353   tf_ctx->filter_frame_idx = -1;
1354   tf_ctx->output_frame = output_frame;
1355   tf_ctx->compute_frame_diff = compute_frame_diff;
1356   tf_setup_filtering_buffer(cpi, filter_frame_lookahead_idx, gf_frame_index);
1357   assert(tf_ctx->num_frames > 0);
1358   assert(tf_ctx->filter_frame_idx < tf_ctx->num_frames);
1359 
1360   // Setup scaling factors. Scaling on each of the arnr frames is not
1361   // supported.
1362   // ARF is produced at the native frame size and resized when coded.
1363   struct scale_factors *sf = &tf_ctx->sf;
1364   av1_setup_scale_factors_for_frame(
1365       sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
1366       frames[0]->y_crop_width, frames[0]->y_crop_height);
1367 
1368   // Initialize temporal filter parameters.
1369   MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
1370   const int filter_frame_idx = tf_ctx->filter_frame_idx;
1371   const YV12_BUFFER_CONFIG *const frame_to_filter = frames[filter_frame_idx];
1372   const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
1373   const int frame_height = frame_to_filter->y_crop_height;
1374   const int frame_width = frame_to_filter->y_crop_width;
1375   const int mb_width = block_size_wide[block_size];
1376   const int mb_height = block_size_high[block_size];
1377   const int mb_rows = get_num_blocks(frame_height, mb_height);
1378   const int mb_cols = get_num_blocks(frame_width, mb_width);
1379   const int mb_pels = mb_width * mb_height;
1380   const int is_highbitdepth = is_frame_high_bitdepth(frame_to_filter);
1381   const int num_planes = av1_num_planes(&cpi->common);
1382   int num_pels = 0;
1383   for (int i = 0; i < num_planes; i++) {
1384     const int subsampling_x = mbd->plane[i].subsampling_x;
1385     const int subsampling_y = mbd->plane[i].subsampling_y;
1386     num_pels += mb_pels >> (subsampling_x + subsampling_y);
1387   }
1388   tf_ctx->num_pels = num_pels;
1389   tf_ctx->mb_rows = mb_rows;
1390   tf_ctx->mb_cols = mb_cols;
1391   tf_ctx->is_highbitdepth = is_highbitdepth;
1392   tf_ctx->q_factor = get_q(cpi);
1393 }
1394 
av1_check_show_filtered_frame(const YV12_BUFFER_CONFIG * frame,const FRAME_DIFF * frame_diff,int q_index,aom_bit_depth_t bit_depth)1395 int av1_check_show_filtered_frame(const YV12_BUFFER_CONFIG *frame,
1396                                   const FRAME_DIFF *frame_diff, int q_index,
1397                                   aom_bit_depth_t bit_depth) {
1398   const int frame_height = frame->y_crop_height;
1399   const int frame_width = frame->y_crop_width;
1400   const int block_height = block_size_high[TF_BLOCK_SIZE];
1401   const int block_width = block_size_wide[TF_BLOCK_SIZE];
1402   const int mb_rows = get_num_blocks(frame_height, block_height);
1403   const int mb_cols = get_num_blocks(frame_width, block_width);
1404   const int num_mbs = AOMMAX(1, mb_rows * mb_cols);
1405   const float mean = (float)frame_diff->sum / num_mbs;
1406   const float std = (float)sqrt((float)frame_diff->sse / num_mbs - mean * mean);
1407 
1408   const int ac_q_step = av1_ac_quant_QTX(q_index, 0, bit_depth);
1409   const float threshold = 0.7f * ac_q_step * ac_q_step;
1410 
1411   if (mean < threshold && std < mean * 1.2) {
1412     return 1;
1413   }
1414   return 0;
1415 }
1416 
av1_temporal_filter(AV1_COMP * cpi,const int filter_frame_lookahead_idx,int gf_frame_index,FRAME_DIFF * frame_diff,YV12_BUFFER_CONFIG * output_frame)1417 void av1_temporal_filter(AV1_COMP *cpi, const int filter_frame_lookahead_idx,
1418                          int gf_frame_index, FRAME_DIFF *frame_diff,
1419                          YV12_BUFFER_CONFIG *output_frame) {
1420   MultiThreadInfo *const mt_info = &cpi->mt_info;
1421   // Basic informaton of the current frame.
1422   TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
1423   TemporalFilterData *tf_data = &cpi->td.tf_data;
1424   const int compute_frame_diff = frame_diff != NULL;
1425   // TODO(anyone): Currently, we enforce the filtering strength on internal
1426   // ARFs except the second ARF to be zero. We should investigate in which case
1427   // it is more beneficial to use non-zero strength filtering.
1428   // Only parallel level 0 frames go through temporal filtering.
1429   assert(cpi->ppi->gf_group.frame_parallel_level[gf_frame_index] == 0);
1430 
1431   // Initialize temporal filter context structure.
1432   init_tf_ctx(cpi, filter_frame_lookahead_idx, gf_frame_index,
1433               compute_frame_diff, output_frame);
1434 
1435   // Allocate and reset temporal filter buffers.
1436   const int is_highbitdepth = tf_ctx->is_highbitdepth;
1437   if (!tf_alloc_and_reset_data(tf_data, tf_ctx->num_pels, is_highbitdepth)) {
1438     aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR,
1439                        "Error allocating temporal filter data");
1440   }
1441 
1442   // Perform temporal filtering process.
1443   if (mt_info->num_workers > 1)
1444     av1_tf_do_filtering_mt(cpi);
1445   else
1446     tf_do_filtering(cpi);
1447 
1448   if (compute_frame_diff) {
1449     *frame_diff = tf_data->diff;
1450   }
1451   // Deallocate temporal filter buffers.
1452   tf_dealloc_data(tf_data, is_highbitdepth);
1453 }
1454 
av1_is_temporal_filter_on(const AV1EncoderConfig * oxcf)1455 int av1_is_temporal_filter_on(const AV1EncoderConfig *oxcf) {
1456   return oxcf->algo_cfg.arnr_max_frames > 0 && oxcf->gf_cfg.lag_in_frames > 1;
1457 }
1458 
av1_tf_info_alloc(TEMPORAL_FILTER_INFO * tf_info,const AV1_COMP * cpi)1459 bool av1_tf_info_alloc(TEMPORAL_FILTER_INFO *tf_info, const AV1_COMP *cpi) {
1460   const AV1EncoderConfig *oxcf = &cpi->oxcf;
1461   tf_info->is_temporal_filter_on = av1_is_temporal_filter_on(oxcf);
1462   if (tf_info->is_temporal_filter_on == 0) return true;
1463 
1464   const AV1_COMMON *cm = &cpi->common;
1465   const SequenceHeader *const seq_params = cm->seq_params;
1466   for (int i = 0; i < TF_INFO_BUF_COUNT; ++i) {
1467     if (aom_realloc_frame_buffer(
1468             &tf_info->tf_buf[i], oxcf->frm_dim_cfg.width,
1469             oxcf->frm_dim_cfg.height, seq_params->subsampling_x,
1470             seq_params->subsampling_y, seq_params->use_highbitdepth,
1471             cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
1472             NULL, cpi->alloc_pyramid, 0)) {
1473       return false;
1474     }
1475   }
1476   return true;
1477 }
1478 
av1_tf_info_free(TEMPORAL_FILTER_INFO * tf_info)1479 void av1_tf_info_free(TEMPORAL_FILTER_INFO *tf_info) {
1480   if (tf_info->is_temporal_filter_on == 0) return;
1481   for (int i = 0; i < TF_INFO_BUF_COUNT; ++i) {
1482     aom_free_frame_buffer(&tf_info->tf_buf[i]);
1483   }
1484   aom_free_frame_buffer(&tf_info->tf_buf_second_arf);
1485 }
1486 
av1_tf_info_reset(TEMPORAL_FILTER_INFO * tf_info)1487 void av1_tf_info_reset(TEMPORAL_FILTER_INFO *tf_info) {
1488   av1_zero(tf_info->tf_buf_valid);
1489   av1_zero(tf_info->tf_buf_gf_index);
1490   av1_zero(tf_info->tf_buf_display_index_offset);
1491 }
1492 
av1_tf_info_filtering(TEMPORAL_FILTER_INFO * tf_info,AV1_COMP * cpi,const GF_GROUP * gf_group)1493 void av1_tf_info_filtering(TEMPORAL_FILTER_INFO *tf_info, AV1_COMP *cpi,
1494                            const GF_GROUP *gf_group) {
1495   if (tf_info->is_temporal_filter_on == 0) return;
1496   const AV1_COMMON *const cm = &cpi->common;
1497   for (int gf_index = 0; gf_index < gf_group->size; ++gf_index) {
1498     int update_type = gf_group->update_type[gf_index];
1499     if (update_type == KF_UPDATE || update_type == ARF_UPDATE) {
1500       int buf_idx = gf_group->frame_type[gf_index] == INTER_FRAME;
1501       int lookahead_idx = gf_group->arf_src_offset[gf_index] +
1502                           gf_group->cur_frame_idx[gf_index];
1503       // This function is designed to be called multiple times after
1504       // av1_tf_info_reset(). It will only generate the filtered frame that does
1505       // not exist yet.
1506       if (tf_info->tf_buf_valid[buf_idx] == 0 ||
1507           tf_info->tf_buf_display_index_offset[buf_idx] != lookahead_idx) {
1508         YV12_BUFFER_CONFIG *out_buf = &tf_info->tf_buf[buf_idx];
1509         av1_temporal_filter(cpi, lookahead_idx, gf_index,
1510                             &tf_info->frame_diff[buf_idx], out_buf);
1511         aom_extend_frame_borders(out_buf, av1_num_planes(cm));
1512         tf_info->tf_buf_gf_index[buf_idx] = gf_index;
1513         tf_info->tf_buf_display_index_offset[buf_idx] = lookahead_idx;
1514         tf_info->tf_buf_valid[buf_idx] = 1;
1515       }
1516     }
1517   }
1518 }
1519 
av1_tf_info_get_filtered_buf(TEMPORAL_FILTER_INFO * tf_info,int gf_index,FRAME_DIFF * frame_diff)1520 YV12_BUFFER_CONFIG *av1_tf_info_get_filtered_buf(TEMPORAL_FILTER_INFO *tf_info,
1521                                                  int gf_index,
1522                                                  FRAME_DIFF *frame_diff) {
1523   if (tf_info->is_temporal_filter_on == 0) return NULL;
1524   YV12_BUFFER_CONFIG *out_buf = NULL;
1525   for (int i = 0; i < TF_INFO_BUF_COUNT; ++i) {
1526     if (tf_info->tf_buf_valid[i] && tf_info->tf_buf_gf_index[i] == gf_index) {
1527       out_buf = &tf_info->tf_buf[i];
1528       *frame_diff = tf_info->frame_diff[i];
1529     }
1530   }
1531   return out_buf;
1532 }
1533 /*!\endcond */
1534