1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <float.h>
13 #include <math.h>
14 #include <limits.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_scale_rtcd.h"
18
19 #include "aom_dsp/aom_dsp_common.h"
20 #include "aom_dsp/mathutils.h"
21 #include "aom_dsp/odintrin.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_scale/aom_scale.h"
26 #include "av1/common/alloccommon.h"
27 #include "av1/common/av1_common_int.h"
28 #include "av1/common/quant_common.h"
29 #include "av1/common/reconinter.h"
30 #include "av1/encoder/av1_quantize.h"
31 #include "av1/encoder/encodeframe.h"
32 #include "av1/encoder/encoder.h"
33 #include "av1/encoder/ethread.h"
34 #include "av1/encoder/extend.h"
35 #include "av1/encoder/firstpass.h"
36 #include "av1/encoder/gop_structure.h"
37 #include "av1/encoder/intra_mode_search_utils.h"
38 #include "av1/encoder/mcomp.h"
39 #include "av1/encoder/motion_search_facade.h"
40 #include "av1/encoder/pass2_strategy.h"
41 #include "av1/encoder/ratectrl.h"
42 #include "av1/encoder/reconinter_enc.h"
43 #include "av1/encoder/segmentation.h"
44 #include "av1/encoder/temporal_filter.h"
45
46 /*!\cond */
47
48 // NOTE: All `tf` in this file means `temporal filtering`.
49
50 // Forward Declaration.
51 static void tf_determine_block_partition(const MV block_mv, const int block_mse,
52 MV *subblock_mvs, int *subblock_mses);
53
54 // This function returns the minimum and maximum log variances for 4x4 sub
55 // blocks in the current block.
get_log_var_4x4sub_blk(AV1_COMP * cpi,const YV12_BUFFER_CONFIG * const frame_to_filter,int mb_row,int mb_col,BLOCK_SIZE block_size,double * blk_4x4_var_min,double * blk_4x4_var_max,int is_hbd)56 static INLINE void get_log_var_4x4sub_blk(
57 AV1_COMP *cpi, const YV12_BUFFER_CONFIG *const frame_to_filter, int mb_row,
58 int mb_col, BLOCK_SIZE block_size, double *blk_4x4_var_min,
59 double *blk_4x4_var_max, int is_hbd) {
60 const int mb_height = block_size_high[block_size];
61 const int mb_width = block_size_wide[block_size];
62 int var_min = INT_MAX;
63 int var_max = 0;
64
65 // Derive the source buffer.
66 const int src_stride = frame_to_filter->y_stride;
67 const int y_offset = mb_row * mb_height * src_stride + mb_col * mb_width;
68 const uint8_t *src_buf = frame_to_filter->y_buffer + y_offset;
69
70 for (int i = 0; i < mb_height; i += MI_SIZE) {
71 for (int j = 0; j < mb_width; j += MI_SIZE) {
72 // Calculate the 4x4 sub-block variance.
73 const int var = av1_calc_normalized_variance(
74 cpi->ppi->fn_ptr[BLOCK_4X4].vf, src_buf + (i * src_stride) + j,
75 src_stride, is_hbd);
76
77 // Record min and max for over-arching block
78 var_min = AOMMIN(var_min, var);
79 var_max = AOMMAX(var_max, var);
80 }
81 }
82
83 *blk_4x4_var_min = log1p(var_min / 16.0);
84 *blk_4x4_var_max = log1p(var_max / 16.0);
85 }
86
87 /*!\endcond */
88 /*!\brief Does motion search for blocks in temporal filtering. This is
89 * the first step for temporal filtering. More specifically, given a frame to
90 * be filtered and another frame as reference, this function searches the
91 * reference frame to find out the most similar block as that from the frame
92 * to be filtered. This found block will be further used for weighted
93 * averaging.
94 *
95 * NOTE: Besides doing motion search for the entire block, this function will
96 * also do motion search for each 1/4 sub-block to get more precise
97 * predictions. Then, this function will determines whether to use 4
98 * sub-blocks to replace the entire block. If we do need to split the
99 * entire block, 4 elements in `subblock_mvs` and `subblock_mses` refer to
100 * the searched motion vector and search error (MSE) w.r.t. each sub-block
101 * respectively. Otherwise, the 4 elements will be the same, all of which
102 * are assigned as the searched motion vector and search error (MSE) for
103 * the entire block.
104 *
105 * \ingroup src_frame_proc
106 * \param[in] cpi Top level encoder instance structure
107 * \param[in] mb Pointer to macroblock
108 * \param[in] frame_to_filter Pointer to the frame to be filtered
109 * \param[in] ref_frame Pointer to the reference frame
110 * \param[in] block_size Block size used for motion search
111 * \param[in] mb_row Row index of the block in the frame
112 * \param[in] mb_col Column index of the block in the frame
113 * \param[in] ref_mv Reference motion vector, which is commonly
114 * inherited from the motion search result of
115 * previous frame.
116 * \param[in] allow_me_for_sub_blks Flag to indicate whether motion search at
117 * 16x16 sub-block level is needed or not.
118 * \param[out] subblock_mvs Pointer to the motion vectors for
119 * 4 sub-blocks
120 * \param[out] subblock_mses Pointer to the search errors (MSE) for
121 * 4 sub-blocks
122 *
123 * \remark Nothing will be returned. Results are saved in subblock_mvs and
124 * subblock_mses
125 */
tf_motion_search(AV1_COMP * cpi,MACROBLOCK * mb,const YV12_BUFFER_CONFIG * frame_to_filter,const YV12_BUFFER_CONFIG * ref_frame,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,MV * ref_mv,bool allow_me_for_sub_blks,MV * subblock_mvs,int * subblock_mses)126 static void tf_motion_search(AV1_COMP *cpi, MACROBLOCK *mb,
127 const YV12_BUFFER_CONFIG *frame_to_filter,
128 const YV12_BUFFER_CONFIG *ref_frame,
129 const BLOCK_SIZE block_size, const int mb_row,
130 const int mb_col, MV *ref_mv,
131 bool allow_me_for_sub_blks, MV *subblock_mvs,
132 int *subblock_mses) {
133 // Frame information
134 const int min_frame_size = AOMMIN(cpi->common.width, cpi->common.height);
135
136 // Block information (ONLY Y-plane is used for motion search).
137 const int mb_height = block_size_high[block_size];
138 const int mb_width = block_size_wide[block_size];
139 const int mb_pels = mb_height * mb_width;
140 const int y_stride = frame_to_filter->y_stride;
141 const int src_width = frame_to_filter->y_width;
142 const int ref_width = ref_frame->y_width;
143 assert(y_stride == ref_frame->y_stride);
144 assert(src_width == ref_width);
145 const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
146
147 // Save input state.
148 MACROBLOCKD *const mbd = &mb->e_mbd;
149 const struct buf_2d ori_src_buf = mb->plane[0].src;
150 const struct buf_2d ori_pre_buf = mbd->plane[0].pre[0];
151
152 // Parameters used for motion search.
153 FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
154 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
155 const int step_param = av1_init_search_range(
156 AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
157 const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
158 const int force_integer_mv = cpi->common.features.cur_frame_force_integer_mv;
159 const MV_COST_TYPE mv_cost_type =
160 min_frame_size >= 720
161 ? MV_COST_L1_HDRES
162 : (min_frame_size >= 480 ? MV_COST_L1_MIDRES : MV_COST_L1_LOWRES);
163
164 // Starting position for motion search.
165 FULLPEL_MV start_mv = get_fullmv_from_mv(ref_mv);
166 // Baseline position for motion search (used for rate distortion comparison).
167 const MV baseline_mv = kZeroMv;
168
169 // Setup.
170 mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset;
171 mb->plane[0].src.stride = y_stride;
172 mb->plane[0].src.width = src_width;
173 mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset;
174 mbd->plane[0].pre[0].stride = y_stride;
175 mbd->plane[0].pre[0].width = ref_width;
176
177 const SEARCH_METHODS search_method = NSTEP;
178 const search_site_config *search_site_cfg =
179 av1_get_search_site_config(cpi, mb, search_method);
180
181 // Unused intermediate results for motion search.
182 unsigned int sse, error;
183 int distortion;
184 int cost_list[5];
185
186 // Do motion search.
187 int_mv best_mv; // Searched motion vector.
188 FULLPEL_MV_STATS best_mv_stats;
189 int block_mse = INT_MAX;
190 MV block_mv = kZeroMv;
191 const int q = av1_get_q(cpi);
192
193 av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
194 &baseline_mv, start_mv, search_site_cfg,
195 search_method,
196 /*fine_search_interval=*/0);
197 full_ms_params.run_mesh_search = 1;
198 full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
199
200 if (cpi->sf.mv_sf.prune_mesh_search == PRUNE_MESH_SEARCH_LVL_1) {
201 // Enable prune_mesh_search based on q for PRUNE_MESH_SEARCH_LVL_1.
202 full_ms_params.prune_mesh_search = (q <= 20) ? 0 : 1;
203 full_ms_params.mesh_search_mv_diff_threshold = 2;
204 }
205
206 av1_full_pixel_search(start_mv, &full_ms_params, step_param,
207 cond_cost_list(cpi, cost_list), &best_mv.as_fullmv,
208 &best_mv_stats, NULL);
209
210 if (force_integer_mv == 1) { // Only do full search on the entire block.
211 const int mv_row = best_mv.as_mv.row;
212 const int mv_col = best_mv.as_mv.col;
213 best_mv.as_mv.row = GET_MV_SUBPEL(mv_row);
214 best_mv.as_mv.col = GET_MV_SUBPEL(mv_col);
215 const int mv_offset = mv_row * y_stride + mv_col;
216 error = cpi->ppi->fn_ptr[block_size].vf(
217 ref_frame->y_buffer + y_offset + mv_offset, y_stride,
218 frame_to_filter->y_buffer + y_offset, y_stride, &sse);
219 block_mse = DIVIDE_AND_ROUND(error, mb_pels);
220 block_mv = best_mv.as_mv;
221 } else { // Do fractional search on the entire block and all sub-blocks.
222 av1_make_default_subpel_ms_params(&ms_params, cpi, mb, block_size,
223 &baseline_mv, cost_list);
224 ms_params.forced_stop = EIGHTH_PEL;
225 ms_params.var_params.subpel_search_type = subpel_search_type;
226 // Since we are merely refining the result from full pixel search, we don't
227 // need regularization for subpel search
228 ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
229 best_mv_stats.err_cost = 0;
230
231 MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
232 assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv));
233 error = cpi->mv_search_params.find_fractional_mv_step(
234 &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv, &best_mv_stats,
235 &best_mv.as_mv, &distortion, &sse, NULL);
236 block_mse = DIVIDE_AND_ROUND(error, mb_pels);
237 block_mv = best_mv.as_mv;
238 *ref_mv = best_mv.as_mv;
239
240 if (allow_me_for_sub_blks) {
241 // On 4 sub-blocks.
242 const BLOCK_SIZE subblock_size = av1_ss_size_lookup[block_size][1][1];
243 const int subblock_height = block_size_high[subblock_size];
244 const int subblock_width = block_size_wide[subblock_size];
245 const int subblock_pels = subblock_height * subblock_width;
246 start_mv = get_fullmv_from_mv(ref_mv);
247
248 int subblock_idx = 0;
249 for (int i = 0; i < mb_height; i += subblock_height) {
250 for (int j = 0; j < mb_width; j += subblock_width) {
251 const int offset = i * y_stride + j;
252 mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset + offset;
253 mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset + offset;
254 av1_make_default_fullpel_ms_params(
255 &full_ms_params, cpi, mb, subblock_size, &baseline_mv, start_mv,
256 search_site_cfg, search_method,
257 /*fine_search_interval=*/0);
258 full_ms_params.run_mesh_search = 1;
259 full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
260
261 if (cpi->sf.mv_sf.prune_mesh_search == PRUNE_MESH_SEARCH_LVL_1) {
262 // Enable prune_mesh_search based on q for PRUNE_MESH_SEARCH_LVL_1.
263 full_ms_params.prune_mesh_search = (q <= 20) ? 0 : 1;
264 full_ms_params.mesh_search_mv_diff_threshold = 2;
265 }
266 av1_full_pixel_search(start_mv, &full_ms_params, step_param,
267 cond_cost_list(cpi, cost_list),
268 &best_mv.as_fullmv, &best_mv_stats, NULL);
269
270 av1_make_default_subpel_ms_params(&ms_params, cpi, mb, subblock_size,
271 &baseline_mv, cost_list);
272 ms_params.forced_stop = EIGHTH_PEL;
273 ms_params.var_params.subpel_search_type = subpel_search_type;
274 // Since we are merely refining the result from full pixel search, we
275 // don't need regularization for subpel search
276 ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
277 best_mv_stats.err_cost = 0;
278
279 subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
280 assert(
281 av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv));
282 error = cpi->mv_search_params.find_fractional_mv_step(
283 &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv,
284 &best_mv_stats, &best_mv.as_mv, &distortion, &sse, NULL);
285 subblock_mses[subblock_idx] = DIVIDE_AND_ROUND(error, subblock_pels);
286 subblock_mvs[subblock_idx] = best_mv.as_mv;
287 ++subblock_idx;
288 }
289 }
290 }
291 }
292
293 // Restore input state.
294 mb->plane[0].src = ori_src_buf;
295 mbd->plane[0].pre[0] = ori_pre_buf;
296
297 // Make partition decision.
298 if (allow_me_for_sub_blks) {
299 tf_determine_block_partition(block_mv, block_mse, subblock_mvs,
300 subblock_mses);
301 } else {
302 // Copy 32X32 block mv and mse values to sub blocks
303 for (int i = 0; i < 4; ++i) {
304 subblock_mvs[i] = block_mv;
305 subblock_mses[i] = block_mse;
306 }
307 }
308 // Do not pass down the reference motion vector if error is too large.
309 const int thresh = (min_frame_size >= 720) ? 12 : 3;
310 if (block_mse > (thresh << (mbd->bd - 8))) {
311 *ref_mv = kZeroMv;
312 }
313 }
314 /*!\cond */
315
316 // Determines whether to split the entire block to 4 sub-blocks for filtering.
317 // In particular, this decision is made based on the comparison between the
318 // motion search error of the entire block and the errors of all sub-blocks.
319 // Inputs:
320 // block_mv: Motion vector for the entire block (ONLY as reference).
321 // block_mse: Motion search error (MSE) for the entire block (ONLY as
322 // reference).
323 // subblock_mvs: Pointer to the motion vectors for 4 sub-blocks (will be
324 // modified based on the partition decision).
325 // subblock_mses: Pointer to the search errors (MSE) for 4 sub-blocks (will
326 // be modified based on the partition decision).
327 // Returns:
328 // Nothing will be returned. Results are saved in `subblock_mvs` and
329 // `subblock_mses`.
tf_determine_block_partition(const MV block_mv,const int block_mse,MV * subblock_mvs,int * subblock_mses)330 static void tf_determine_block_partition(const MV block_mv, const int block_mse,
331 MV *subblock_mvs, int *subblock_mses) {
332 int min_subblock_mse = INT_MAX;
333 int max_subblock_mse = INT_MIN;
334 int64_t sum_subblock_mse = 0;
335 for (int i = 0; i < 4; ++i) {
336 sum_subblock_mse += subblock_mses[i];
337 min_subblock_mse = AOMMIN(min_subblock_mse, subblock_mses[i]);
338 max_subblock_mse = AOMMAX(max_subblock_mse, subblock_mses[i]);
339 }
340
341 // TODO(any): The following magic numbers may be tuned to improve the
342 // performance OR find a way to get rid of these magic numbers.
343 if (((block_mse * 15 < sum_subblock_mse * 4) &&
344 max_subblock_mse - min_subblock_mse < 48) ||
345 ((block_mse * 14 < sum_subblock_mse * 4) &&
346 max_subblock_mse - min_subblock_mse < 24)) { // No split.
347 for (int i = 0; i < 4; ++i) {
348 subblock_mvs[i] = block_mv;
349 subblock_mses[i] = block_mse;
350 }
351 }
352 }
353
354 // Helper function to determine whether a frame is encoded with high bit-depth.
is_frame_high_bitdepth(const YV12_BUFFER_CONFIG * frame)355 static INLINE int is_frame_high_bitdepth(const YV12_BUFFER_CONFIG *frame) {
356 return (frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
357 }
358
359 /*!\endcond */
360 /*!\brief Builds predictor for blocks in temporal filtering. This is the
361 * second step for temporal filtering, which is to construct predictions from
362 * all reference frames INCLUDING the frame to be filtered itself. These
363 * predictors are built based on the motion search results (motion vector is
364 * set as 0 for the frame to be filtered), and will be futher used for
365 * weighted averaging.
366 *
367 * \ingroup src_frame_proc
368 * \param[in] ref_frame Pointer to the reference frame (or the frame
369 * to be filtered)
370 * \param[in] mbd Pointer to the block for filtering. Besides
371 * containing the subsampling information of all
372 * planes, this field also gives the searched
373 * motion vector for the entire block, i.e.,
374 * `mbd->mi[0]->mv[0]`. This vector should be 0
375 * if the `ref_frame` itself is the frame to be
376 * filtered.
377 * \param[in] block_size Size of the block
378 * \param[in] mb_row Row index of the block in the frame
379 * \param[in] mb_col Column index of the block in the frame
380 * \param[in] num_planes Number of planes in the frame
381 * \param[in] scale Scaling factor
382 * \param[in] subblock_mvs The motion vectors for each sub-block (row-major
383 * order)
384 * \param[out] pred Pointer to the predictor to be built
385 *
386 * \remark Nothing returned, But the contents of `pred` will be modified
387 */
tf_build_predictor(const YV12_BUFFER_CONFIG * ref_frame,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const struct scale_factors * scale,const MV * subblock_mvs,uint8_t * pred)388 static void tf_build_predictor(const YV12_BUFFER_CONFIG *ref_frame,
389 const MACROBLOCKD *mbd,
390 const BLOCK_SIZE block_size, const int mb_row,
391 const int mb_col, const int num_planes,
392 const struct scale_factors *scale,
393 const MV *subblock_mvs, uint8_t *pred) {
394 // Information of the entire block.
395 const int mb_height = block_size_high[block_size]; // Height.
396 const int mb_width = block_size_wide[block_size]; // Width.
397 const int mb_y = mb_height * mb_row; // Y-coord (Top-left).
398 const int mb_x = mb_width * mb_col; // X-coord (Top-left).
399 const int bit_depth = mbd->bd; // Bit depth.
400 const int is_intrabc = 0; // Is intra-copied?
401 const int is_high_bitdepth = is_frame_high_bitdepth(ref_frame);
402
403 // Default interpolation filters.
404 const int_interpfilters interp_filters =
405 av1_broadcast_interp_filter(MULTITAP_SHARP2);
406
407 // Handle Y-plane, U-plane and V-plane (if needed) in sequence.
408 int plane_offset = 0;
409 for (int plane = 0; plane < num_planes; ++plane) {
410 const int subsampling_y = mbd->plane[plane].subsampling_y;
411 const int subsampling_x = mbd->plane[plane].subsampling_x;
412 // Information of each sub-block in current plane.
413 const int plane_h = mb_height >> subsampling_y; // Plane height.
414 const int plane_w = mb_width >> subsampling_x; // Plane width.
415 const int plane_y = mb_y >> subsampling_y; // Y-coord (Top-left).
416 const int plane_x = mb_x >> subsampling_x; // X-coord (Top-left).
417 const int h = plane_h >> 1; // Sub-block height.
418 const int w = plane_w >> 1; // Sub-block width.
419 const int is_y_plane = (plane == 0); // Is Y-plane?
420
421 const struct buf_2d ref_buf = { NULL, ref_frame->buffers[plane],
422 ref_frame->widths[is_y_plane ? 0 : 1],
423 ref_frame->heights[is_y_plane ? 0 : 1],
424 ref_frame->strides[is_y_plane ? 0 : 1] };
425
426 // Handle each subblock.
427 int subblock_idx = 0;
428 for (int i = 0; i < plane_h; i += h) {
429 for (int j = 0; j < plane_w; j += w) {
430 // Choose proper motion vector.
431 const MV mv = subblock_mvs[subblock_idx++];
432 assert(mv.row >= INT16_MIN && mv.row <= INT16_MAX &&
433 mv.col >= INT16_MIN && mv.col <= INT16_MAX);
434
435 const int y = plane_y + i;
436 const int x = plane_x + j;
437
438 // Build predictior for each sub-block on current plane.
439 InterPredParams inter_pred_params;
440 av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
441 subsampling_y, bit_depth, is_high_bitdepth,
442 is_intrabc, scale, &ref_buf, interp_filters);
443 inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
444 av1_enc_build_one_inter_predictor(&pred[plane_offset + i * plane_w + j],
445 plane_w, &mv, &inter_pred_params);
446 }
447 }
448 plane_offset += plane_h * plane_w;
449 }
450 }
451 /*!\cond */
452
453 // Computes temporal filter weights and accumulators for the frame to be
454 // filtered. More concretely, the filter weights for all pixels are the same.
455 // Inputs:
456 // mbd: Pointer to the block for filtering, which is ONLY used to get
457 // subsampling information of all planes as well as the bit-depth.
458 // block_size: Size of the block.
459 // num_planes: Number of planes in the frame.
460 // pred: Pointer to the well-built predictors.
461 // accum: Pointer to the pixel-wise accumulator for filtering.
462 // count: Pointer to the pixel-wise counter fot filtering.
463 // Returns:
464 // Nothing will be returned. But the content to which `accum` and `pred`
465 // point will be modified.
tf_apply_temporal_filter_self(const YV12_BUFFER_CONFIG * ref_frame,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,uint32_t * accum,uint16_t * count)466 void tf_apply_temporal_filter_self(const YV12_BUFFER_CONFIG *ref_frame,
467 const MACROBLOCKD *mbd,
468 const BLOCK_SIZE block_size,
469 const int mb_row, const int mb_col,
470 const int num_planes, uint32_t *accum,
471 uint16_t *count) {
472 // Block information.
473 const int mb_height = block_size_high[block_size];
474 const int mb_width = block_size_wide[block_size];
475 const int is_high_bitdepth = is_cur_buf_hbd(mbd);
476
477 int plane_offset = 0;
478 for (int plane = 0; plane < num_planes; ++plane) {
479 const int subsampling_y = mbd->plane[plane].subsampling_y;
480 const int subsampling_x = mbd->plane[plane].subsampling_x;
481 const int h = mb_height >> subsampling_y; // Plane height.
482 const int w = mb_width >> subsampling_x; // Plane width.
483
484 const int frame_stride = ref_frame->strides[plane == AOM_PLANE_Y ? 0 : 1];
485 const uint8_t *buf8 = ref_frame->buffers[plane];
486 const uint16_t *buf16 = CONVERT_TO_SHORTPTR(buf8);
487 const int frame_offset = mb_row * h * frame_stride + mb_col * w;
488
489 int pred_idx = 0;
490 int pixel_idx = 0;
491 for (int i = 0; i < h; ++i) {
492 for (int j = 0; j < w; ++j) {
493 const int idx = plane_offset + pred_idx; // Index with plane shift.
494 const int pred_value = is_high_bitdepth
495 ? buf16[frame_offset + pixel_idx]
496 : buf8[frame_offset + pixel_idx];
497 accum[idx] += TF_WEIGHT_SCALE * pred_value;
498 count[idx] += TF_WEIGHT_SCALE;
499 ++pred_idx;
500 ++pixel_idx;
501 }
502 pixel_idx += (frame_stride - w);
503 }
504 plane_offset += h * w;
505 }
506 }
507
508 // Function to compute pixel-wise squared difference between two buffers.
509 // Inputs:
510 // ref: Pointer to reference buffer.
511 // ref_offset: Start position of reference buffer for computation.
512 // ref_stride: Stride for reference buffer.
513 // tgt: Pointer to target buffer.
514 // tgt_offset: Start position of target buffer for computation.
515 // tgt_stride: Stride for target buffer.
516 // height: Height of block for computation.
517 // width: Width of block for computation.
518 // is_high_bitdepth: Whether the two buffers point to high bit-depth frames.
519 // square_diff: Pointer to save the squared differces.
520 // Returns:
521 // Nothing will be returned. But the content to which `square_diff` points
522 // will be modified.
compute_square_diff(const uint8_t * ref,const int ref_offset,const int ref_stride,const uint8_t * tgt,const int tgt_offset,const int tgt_stride,const int height,const int width,const int is_high_bitdepth,uint32_t * square_diff)523 static INLINE void compute_square_diff(const uint8_t *ref, const int ref_offset,
524 const int ref_stride, const uint8_t *tgt,
525 const int tgt_offset,
526 const int tgt_stride, const int height,
527 const int width,
528 const int is_high_bitdepth,
529 uint32_t *square_diff) {
530 const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
531 const uint16_t *tgt16 = CONVERT_TO_SHORTPTR(tgt);
532
533 int ref_idx = 0;
534 int tgt_idx = 0;
535 int idx = 0;
536 for (int i = 0; i < height; ++i) {
537 for (int j = 0; j < width; ++j) {
538 const uint16_t ref_value = is_high_bitdepth ? ref16[ref_offset + ref_idx]
539 : ref[ref_offset + ref_idx];
540 const uint16_t tgt_value = is_high_bitdepth ? tgt16[tgt_offset + tgt_idx]
541 : tgt[tgt_offset + tgt_idx];
542 const uint32_t diff = (ref_value > tgt_value) ? (ref_value - tgt_value)
543 : (tgt_value - ref_value);
544 square_diff[idx] = diff * diff;
545
546 ++ref_idx;
547 ++tgt_idx;
548 ++idx;
549 }
550 ref_idx += (ref_stride - width);
551 tgt_idx += (tgt_stride - width);
552 }
553 }
554
555 // Function to accumulate pixel-wise squared difference between two luma buffers
556 // to be consumed while filtering the chroma planes.
557 // Inputs:
558 // square_diff: Pointer to squared differences from luma plane.
559 // luma_sse_sum: Pointer to save the sum of luma squared differences.
560 // block_height: Height of block for computation.
561 // block_width: Width of block for computation.
562 // ss_x_shift: Chroma subsampling shift in 'X' direction
563 // ss_y_shift: Chroma subsampling shift in 'Y' direction
564 // Returns:
565 // Nothing will be returned. But the content to which `luma_sse_sum` points
566 // will be modified.
compute_luma_sq_error_sum(uint32_t * square_diff,uint32_t * luma_sse_sum,int block_height,int block_width,int ss_x_shift,int ss_y_shift)567 void compute_luma_sq_error_sum(uint32_t *square_diff, uint32_t *luma_sse_sum,
568 int block_height, int block_width,
569 int ss_x_shift, int ss_y_shift) {
570 for (int i = 0; i < block_height; ++i) {
571 for (int j = 0; j < block_width; ++j) {
572 for (int ii = 0; ii < (1 << ss_y_shift); ++ii) {
573 for (int jj = 0; jj < (1 << ss_x_shift); ++jj) {
574 const int yy = (i << ss_y_shift) + ii; // Y-coord on Y-plane.
575 const int xx = (j << ss_x_shift) + jj; // X-coord on Y-plane.
576 const int ww = block_width << ss_x_shift; // Width of Y-plane.
577 luma_sse_sum[i * block_width + j] += square_diff[yy * ww + xx];
578 }
579 }
580 }
581 }
582 }
583
584 /*!\endcond */
585 /*!\brief Applies temporal filtering. NOTE that there are various optimised
586 * versions of this function called where the appropriate instruction set is
587 * supported.
588 *
589 * \ingroup src_frame_proc
590 * \param[in] frame_to_filter Pointer to the frame to be filtered, which is
591 * used as reference to compute squared
592 * difference from the predictor.
593 * \param[in] mbd Pointer to the block for filtering, ONLY used
594 * to get subsampling information for the planes
595 * \param[in] block_size Size of the block
596 * \param[in] mb_row Row index of the block in the frame
597 * \param[in] mb_col Column index of the block in the frame
598 * \param[in] num_planes Number of planes in the frame
599 * \param[in] noise_levels Estimated noise levels for each plane
600 * in the frame (Y,U,V)
601 * \param[in] subblock_mvs Pointer to the motion vectors for 4 sub-blocks
602 * \param[in] subblock_mses Pointer to the search errors (MSE) for 4
603 * sub-blocks
604 * \param[in] q_factor Quantization factor. This is actually the `q`
605 * defined in libaom, converted from `qindex`
606 * \param[in] filter_strength Filtering strength. This value lies in range
607 * [0, 6] where 6 is the maximum strength.
608 * \param[in] tf_wgt_calc_lvl Controls the weight calculation method during
609 * temporal filtering
610 * \param[out] pred Pointer to the well-built predictors
611 * \param[out] accum Pointer to the pixel-wise accumulator for
612 * filtering
613 * \param[out] count Pointer to the pixel-wise counter for
614 * filtering
615 *
616 * \remark Nothing returned, But the contents of `accum`, `pred` and 'count'
617 * will be modified
618 */
av1_apply_temporal_filter_c(const YV12_BUFFER_CONFIG * frame_to_filter,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const double * noise_levels,const MV * subblock_mvs,const int * subblock_mses,const int q_factor,const int filter_strength,int tf_wgt_calc_lvl,const uint8_t * pred,uint32_t * accum,uint16_t * count)619 void av1_apply_temporal_filter_c(
620 const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
621 const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
622 const int num_planes, const double *noise_levels, const MV *subblock_mvs,
623 const int *subblock_mses, const int q_factor, const int filter_strength,
624 int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum,
625 uint16_t *count) {
626 // Block information.
627 const int mb_height = block_size_high[block_size];
628 const int mb_width = block_size_wide[block_size];
629 const int mb_pels = mb_height * mb_width;
630 const int is_high_bitdepth = is_frame_high_bitdepth(frame_to_filter);
631 const uint16_t *pred16 = CONVERT_TO_SHORTPTR(pred);
632 // Frame information.
633 const int frame_height = frame_to_filter->y_crop_height;
634 const int frame_width = frame_to_filter->y_crop_width;
635 const int min_frame_size = AOMMIN(frame_height, frame_width);
636 // Variables to simplify combined error calculation.
637 const double inv_factor = 1.0 / ((TF_WINDOW_BLOCK_BALANCE_WEIGHT + 1) *
638 TF_SEARCH_ERROR_NORM_WEIGHT);
639 const double weight_factor =
640 (double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
641 // Decay factors for non-local mean approach.
642 double decay_factor[MAX_MB_PLANE] = { 0 };
643 // Adjust filtering based on q.
644 // Larger q -> stronger filtering -> larger weight.
645 // Smaller q -> weaker filtering -> smaller weight.
646 double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
647 q_decay = CLIP(q_decay, 1e-5, 1);
648 if (q_factor >= TF_QINDEX_CUTOFF) {
649 // Max q_factor is 255, therefore the upper bound of q_decay is 8.
650 // We do not need a clip here.
651 q_decay = 0.5 * pow((double)q_factor / 64, 2);
652 }
653 // Smaller strength -> smaller filtering weight.
654 double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
655 s_decay = CLIP(s_decay, 1e-5, 1);
656 for (int plane = 0; plane < num_planes; plane++) {
657 // Larger noise -> larger filtering weight.
658 const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
659 decay_factor[plane] = 1 / (n_decay * q_decay * s_decay);
660 }
661 double d_factor[4] = { 0 };
662 for (int subblock_idx = 0; subblock_idx < 4; subblock_idx++) {
663 // Larger motion vector -> smaller filtering weight.
664 const MV mv = subblock_mvs[subblock_idx];
665 const double distance = sqrt(pow(mv.row, 2) + pow(mv.col, 2));
666 double distance_threshold = min_frame_size * TF_SEARCH_DISTANCE_THRESHOLD;
667 distance_threshold = AOMMAX(distance_threshold, 1);
668 d_factor[subblock_idx] = distance / distance_threshold;
669 d_factor[subblock_idx] = AOMMAX(d_factor[subblock_idx], 1);
670 }
671
672 // Allocate memory for pixel-wise squared differences. They,
673 // regardless of the subsampling, are assigned with memory of size `mb_pels`.
674 uint32_t *square_diff = aom_memalign(16, mb_pels * sizeof(uint32_t));
675 if (!square_diff) {
676 aom_internal_error(mbd->error_info, AOM_CODEC_MEM_ERROR,
677 "Error allocating temporal filter data");
678 }
679 memset(square_diff, 0, mb_pels * sizeof(square_diff[0]));
680
681 // Allocate memory for accumulated luma squared error. This value will be
682 // consumed while filtering the chroma planes.
683 uint32_t *luma_sse_sum = aom_memalign(32, mb_pels * sizeof(uint32_t));
684 if (!luma_sse_sum) {
685 aom_free(square_diff);
686 aom_internal_error(mbd->error_info, AOM_CODEC_MEM_ERROR,
687 "Error allocating temporal filter data");
688 }
689 memset(luma_sse_sum, 0, mb_pels * sizeof(luma_sse_sum[0]));
690
691 // Get window size for pixel-wise filtering.
692 assert(TF_WINDOW_LENGTH % 2 == 1);
693 const int half_window = TF_WINDOW_LENGTH >> 1;
694
695 // Handle planes in sequence.
696 int plane_offset = 0;
697 for (int plane = 0; plane < num_planes; ++plane) {
698 // Locate pixel on reference frame.
699 const int subsampling_y = mbd->plane[plane].subsampling_y;
700 const int subsampling_x = mbd->plane[plane].subsampling_x;
701 const int h = mb_height >> subsampling_y; // Plane height.
702 const int w = mb_width >> subsampling_x; // Plane width.
703 const int frame_stride =
704 frame_to_filter->strides[plane == AOM_PLANE_Y ? 0 : 1];
705 const int frame_offset = mb_row * h * frame_stride + mb_col * w;
706 const uint8_t *ref = frame_to_filter->buffers[plane];
707 const int ss_y_shift =
708 subsampling_y - mbd->plane[AOM_PLANE_Y].subsampling_y;
709 const int ss_x_shift =
710 subsampling_x - mbd->plane[AOM_PLANE_Y].subsampling_x;
711 const int num_ref_pixels = TF_WINDOW_LENGTH * TF_WINDOW_LENGTH +
712 ((plane) ? (1 << (ss_x_shift + ss_y_shift)) : 0);
713 const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
714
715 // Filter U-plane and V-plane using Y-plane. This is because motion
716 // search is only done on Y-plane, so the information from Y-plane will
717 // be more accurate. The luma sse sum is reused in both chroma planes.
718 if (plane == AOM_PLANE_U)
719 compute_luma_sq_error_sum(square_diff, luma_sse_sum, h, w, ss_x_shift,
720 ss_y_shift);
721 compute_square_diff(ref, frame_offset, frame_stride, pred, plane_offset, w,
722 h, w, is_high_bitdepth, square_diff);
723
724 // Perform filtering.
725 int pred_idx = 0;
726 for (int i = 0; i < h; ++i) {
727 for (int j = 0; j < w; ++j) {
728 // non-local mean approach
729 uint64_t sum_square_diff = 0;
730
731 for (int wi = -half_window; wi <= half_window; ++wi) {
732 for (int wj = -half_window; wj <= half_window; ++wj) {
733 const int y = CLIP(i + wi, 0, h - 1); // Y-coord on current plane.
734 const int x = CLIP(j + wj, 0, w - 1); // X-coord on current plane.
735 sum_square_diff += square_diff[y * w + x];
736 }
737 }
738
739 sum_square_diff += luma_sse_sum[i * w + j];
740
741 // Scale down the difference for high bit depth input.
742 if (mbd->bd > 8) sum_square_diff >>= ((mbd->bd - 8) * 2);
743
744 // Combine window error and block error, and normalize it.
745 const double window_error = sum_square_diff * inv_num_ref_pixels;
746 const int subblock_idx = (i >= h / 2) * 2 + (j >= w / 2);
747 const double block_error = (double)subblock_mses[subblock_idx];
748 const double combined_error =
749 weight_factor * window_error + block_error * inv_factor;
750
751 // Compute filter weight.
752 double scaled_error =
753 combined_error * d_factor[subblock_idx] * decay_factor[plane];
754 scaled_error = AOMMIN(scaled_error, 7);
755 int weight;
756 if (tf_wgt_calc_lvl == 0) {
757 weight = (int)(exp(-scaled_error) * TF_WEIGHT_SCALE);
758 } else {
759 const float fweight =
760 approx_exp((float)-scaled_error) * TF_WEIGHT_SCALE;
761 weight = iroundpf(fweight);
762 }
763
764 const int idx = plane_offset + pred_idx; // Index with plane shift.
765 const int pred_value = is_high_bitdepth ? pred16[idx] : pred[idx];
766 accum[idx] += weight * pred_value;
767 count[idx] += weight;
768
769 ++pred_idx;
770 }
771 }
772 plane_offset += h * w;
773 }
774
775 aom_free(square_diff);
776 aom_free(luma_sse_sum);
777 }
778 #if CONFIG_AV1_HIGHBITDEPTH
779 // Calls High bit-depth temporal filter
av1_highbd_apply_temporal_filter_c(const YV12_BUFFER_CONFIG * frame_to_filter,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const double * noise_levels,const MV * subblock_mvs,const int * subblock_mses,const int q_factor,const int filter_strength,int tf_wgt_calc_lvl,const uint8_t * pred,uint32_t * accum,uint16_t * count)780 void av1_highbd_apply_temporal_filter_c(
781 const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
782 const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
783 const int num_planes, const double *noise_levels, const MV *subblock_mvs,
784 const int *subblock_mses, const int q_factor, const int filter_strength,
785 int tf_wgt_calc_lvl, const uint8_t *pred, uint32_t *accum,
786 uint16_t *count) {
787 av1_apply_temporal_filter_c(frame_to_filter, mbd, block_size, mb_row, mb_col,
788 num_planes, noise_levels, subblock_mvs,
789 subblock_mses, q_factor, filter_strength,
790 tf_wgt_calc_lvl, pred, accum, count);
791 }
792 #endif // CONFIG_AV1_HIGHBITDEPTH
793 /*!\brief Normalizes the accumulated filtering result to produce the filtered
794 * frame
795 *
796 * \ingroup src_frame_proc
797 * \param[in] mbd Pointer to the block for filtering, which is
798 * ONLY used to get subsampling information for
799 * all the planes
800 * \param[in] block_size Size of the block
801 * \param[in] mb_row Row index of the block in the frame
802 * \param[in] mb_col Column index of the block in the frame
803 * \param[in] num_planes Number of planes in the frame
804 * \param[in] accum Pointer to the pre-computed accumulator
805 * \param[in] count Pointer to the pre-computed count
806 * \param[out] result_buffer Pointer to result buffer
807 *
808 * \remark Nothing returned, but the content to which `result_buffer` pointer
809 * will be modified
810 */
tf_normalize_filtered_frame(const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const uint32_t * accum,const uint16_t * count,YV12_BUFFER_CONFIG * result_buffer)811 static void tf_normalize_filtered_frame(
812 const MACROBLOCKD *mbd, const BLOCK_SIZE block_size, const int mb_row,
813 const int mb_col, const int num_planes, const uint32_t *accum,
814 const uint16_t *count, YV12_BUFFER_CONFIG *result_buffer) {
815 // Block information.
816 const int mb_height = block_size_high[block_size];
817 const int mb_width = block_size_wide[block_size];
818 const int is_high_bitdepth = is_frame_high_bitdepth(result_buffer);
819
820 int plane_offset = 0;
821 for (int plane = 0; plane < num_planes; ++plane) {
822 const int plane_h = mb_height >> mbd->plane[plane].subsampling_y;
823 const int plane_w = mb_width >> mbd->plane[plane].subsampling_x;
824 const int frame_stride = result_buffer->strides[plane == 0 ? 0 : 1];
825 const int frame_offset = mb_row * plane_h * frame_stride + mb_col * plane_w;
826 uint8_t *const buf = result_buffer->buffers[plane];
827 uint16_t *const buf16 = CONVERT_TO_SHORTPTR(buf);
828
829 int plane_idx = 0; // Pixel index on current plane (block-base).
830 int frame_idx = frame_offset; // Pixel index on the entire frame.
831 for (int i = 0; i < plane_h; ++i) {
832 for (int j = 0; j < plane_w; ++j) {
833 const int idx = plane_idx + plane_offset;
834 const uint16_t rounding = count[idx] >> 1;
835 if (is_high_bitdepth) {
836 buf16[frame_idx] =
837 (uint16_t)OD_DIVU(accum[idx] + rounding, count[idx]);
838 } else {
839 buf[frame_idx] = (uint8_t)OD_DIVU(accum[idx] + rounding, count[idx]);
840 }
841 ++plane_idx;
842 ++frame_idx;
843 }
844 frame_idx += (frame_stride - plane_w);
845 }
846 plane_offset += plane_h * plane_w;
847 }
848 }
849
av1_get_q(const AV1_COMP * cpi)850 int av1_get_q(const AV1_COMP *cpi) {
851 const GF_GROUP *gf_group = &cpi->ppi->gf_group;
852 const FRAME_TYPE frame_type = gf_group->frame_type[cpi->gf_frame_index];
853 const int q =
854 (int)av1_convert_qindex_to_q(cpi->ppi->p_rc.avg_frame_qindex[frame_type],
855 cpi->common.seq_params->bit_depth);
856 return q;
857 }
858
av1_tf_do_filtering_row(AV1_COMP * cpi,ThreadData * td,int mb_row)859 void av1_tf_do_filtering_row(AV1_COMP *cpi, ThreadData *td, int mb_row) {
860 TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
861 YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
862 const int num_frames = tf_ctx->num_frames;
863 const int filter_frame_idx = tf_ctx->filter_frame_idx;
864 const int compute_frame_diff = tf_ctx->compute_frame_diff;
865 const struct scale_factors *scale = &tf_ctx->sf;
866 const double *noise_levels = tf_ctx->noise_levels;
867 const int num_pels = tf_ctx->num_pels;
868 const int q_factor = tf_ctx->q_factor;
869 const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
870 const YV12_BUFFER_CONFIG *const frame_to_filter = frames[filter_frame_idx];
871 MACROBLOCK *const mb = &td->mb;
872 MACROBLOCKD *const mbd = &mb->e_mbd;
873 TemporalFilterData *const tf_data = &td->tf_data;
874 const int mb_height = block_size_high[block_size];
875 const int mb_width = block_size_wide[block_size];
876 const int mi_h = mi_size_high_log2[block_size];
877 const int mi_w = mi_size_wide_log2[block_size];
878 const int num_planes = av1_num_planes(&cpi->common);
879 const int weight_calc_level_in_tf = cpi->sf.hl_sf.weight_calc_level_in_tf;
880 uint32_t *accum = tf_data->accum;
881 uint16_t *count = tf_data->count;
882 uint8_t *pred = tf_data->pred;
883
884 // Factor to control the filering strength.
885 const int filter_strength = cpi->oxcf.algo_cfg.arnr_strength;
886
887 // Do filtering.
888 FRAME_DIFF *diff = &td->tf_data.diff;
889 av1_set_mv_row_limits(&cpi->common.mi_params, &mb->mv_limits,
890 (mb_row << mi_h), (mb_height >> MI_SIZE_LOG2),
891 cpi->oxcf.border_in_pixels);
892 for (int mb_col = 0; mb_col < tf_ctx->mb_cols; mb_col++) {
893 av1_set_mv_col_limits(&cpi->common.mi_params, &mb->mv_limits,
894 (mb_col << mi_w), (mb_width >> MI_SIZE_LOG2),
895 cpi->oxcf.border_in_pixels);
896 memset(accum, 0, num_pels * sizeof(accum[0]));
897 memset(count, 0, num_pels * sizeof(count[0]));
898 MV ref_mv = kZeroMv; // Reference motion vector passed down along frames.
899 // Perform temporal filtering frame by frame.
900
901 // Decide whether to perform motion search at 16x16 sub-block level or not
902 // based on 4x4 sub-blocks source variance. Allow motion search for split
903 // partition only if the difference between max and min source variance of
904 // 4x4 blocks is greater than a threshold (which is derived empirically).
905 bool allow_me_for_sub_blks = true;
906 if (cpi->sf.hl_sf.allow_sub_blk_me_in_tf) {
907 const int is_hbd = is_frame_high_bitdepth(frame_to_filter);
908 // Initialize minimum variance to a large value and maximum variance to 0.
909 double blk_4x4_var_min = DBL_MAX;
910 double blk_4x4_var_max = 0;
911 get_log_var_4x4sub_blk(cpi, frame_to_filter, mb_row, mb_col,
912 TF_BLOCK_SIZE, &blk_4x4_var_min, &blk_4x4_var_max,
913 is_hbd);
914 // TODO(sanampudi.venkatarao@ittiam.com): Experiment and adjust the
915 // threshold for high bit depth.
916 if ((blk_4x4_var_max - blk_4x4_var_min) <= 4.0)
917 allow_me_for_sub_blks = false;
918 }
919
920 for (int frame = 0; frame < num_frames; frame++) {
921 if (frames[frame] == NULL) continue;
922
923 // Motion search.
924 MV subblock_mvs[4] = { kZeroMv, kZeroMv, kZeroMv, kZeroMv };
925 int subblock_mses[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
926 if (frame ==
927 filter_frame_idx) { // Frame to be filtered.
928 // Change ref_mv sign for following frames.
929 ref_mv.row *= -1;
930 ref_mv.col *= -1;
931 } else { // Other reference frames.
932 tf_motion_search(cpi, mb, frame_to_filter, frames[frame], block_size,
933 mb_row, mb_col, &ref_mv, allow_me_for_sub_blks,
934 subblock_mvs, subblock_mses);
935 }
936
937 // Perform weighted averaging.
938 if (frame == filter_frame_idx) { // Frame to be filtered.
939 tf_apply_temporal_filter_self(frames[frame], mbd, block_size, mb_row,
940 mb_col, num_planes, accum, count);
941 } else { // Other reference frames.
942 tf_build_predictor(frames[frame], mbd, block_size, mb_row, mb_col,
943 num_planes, scale, subblock_mvs, pred);
944
945 // All variants of av1_apply_temporal_filter() contain floating point
946 // operations. Hence, clear the system state.
947
948 // TODO(any): avx2/sse2 version should be changed to align with C
949 // function before using. In particular, current avx2/sse2 function
950 // only supports 32x32 block size and 5x5 filtering window.
951 if (is_frame_high_bitdepth(frame_to_filter)) { // for high bit-depth
952 #if CONFIG_AV1_HIGHBITDEPTH
953 if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
954 av1_highbd_apply_temporal_filter(
955 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
956 noise_levels, subblock_mvs, subblock_mses, q_factor,
957 filter_strength, weight_calc_level_in_tf, pred, accum, count);
958 } else {
959 #endif // CONFIG_AV1_HIGHBITDEPTH
960 av1_apply_temporal_filter_c(
961 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
962 noise_levels, subblock_mvs, subblock_mses, q_factor,
963 filter_strength, weight_calc_level_in_tf, pred, accum, count);
964 #if CONFIG_AV1_HIGHBITDEPTH
965 }
966 #endif // CONFIG_AV1_HIGHBITDEPTH
967 } else {
968 // for 8-bit
969 if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
970 av1_apply_temporal_filter(
971 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
972 noise_levels, subblock_mvs, subblock_mses, q_factor,
973 filter_strength, weight_calc_level_in_tf, pred, accum, count);
974 } else {
975 av1_apply_temporal_filter_c(
976 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
977 noise_levels, subblock_mvs, subblock_mses, q_factor,
978 filter_strength, weight_calc_level_in_tf, pred, accum, count);
979 }
980 }
981 }
982 }
983 tf_normalize_filtered_frame(mbd, block_size, mb_row, mb_col, num_planes,
984 accum, count, tf_ctx->output_frame);
985
986 if (compute_frame_diff) {
987 const int y_height = mb_height >> mbd->plane[0].subsampling_y;
988 const int y_width = mb_width >> mbd->plane[0].subsampling_x;
989 const int source_y_stride = frame_to_filter->y_stride;
990 const int filter_y_stride = tf_ctx->output_frame->y_stride;
991 const int source_offset =
992 mb_row * y_height * source_y_stride + mb_col * y_width;
993 const int filter_offset =
994 mb_row * y_height * filter_y_stride + mb_col * y_width;
995 unsigned int sse = 0;
996 cpi->ppi->fn_ptr[block_size].vf(
997 frame_to_filter->y_buffer + source_offset, source_y_stride,
998 tf_ctx->output_frame->y_buffer + filter_offset, filter_y_stride,
999 &sse);
1000 diff->sum += sse;
1001 diff->sse += sse * (int64_t)sse;
1002 }
1003 }
1004 }
1005
1006 /*!\brief Does temporal filter for a given frame.
1007 *
1008 * \ingroup src_frame_proc
1009 * \param[in] cpi Top level encoder instance structure
1010 *
1011 * \remark Nothing will be returned, but the contents of td->diff will be
1012 modified.
1013 */
tf_do_filtering(AV1_COMP * cpi)1014 static void tf_do_filtering(AV1_COMP *cpi) {
1015 // Basic information.
1016 ThreadData *td = &cpi->td;
1017 TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
1018 const struct scale_factors *scale = &tf_ctx->sf;
1019 const int num_planes = av1_num_planes(&cpi->common);
1020 assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
1021
1022 MACROBLOCKD *mbd = &td->mb.e_mbd;
1023 uint8_t *input_buffer[MAX_MB_PLANE];
1024 MB_MODE_INFO **input_mb_mode_info;
1025 tf_save_state(mbd, &input_mb_mode_info, input_buffer, num_planes);
1026 tf_setup_macroblockd(mbd, &td->tf_data, scale);
1027
1028 // Perform temporal filtering for each row.
1029 for (int mb_row = 0; mb_row < tf_ctx->mb_rows; mb_row++)
1030 av1_tf_do_filtering_row(cpi, td, mb_row);
1031
1032 tf_restore_state(mbd, input_mb_mode_info, input_buffer, num_planes);
1033 }
1034
1035 /*!\brief Setups the frame buffer for temporal filtering. This fuction
1036 * determines how many frames will be used for temporal filtering and then
1037 * groups them into a buffer. This function will also estimate the noise level
1038 * of the to-filter frame.
1039 *
1040 * \ingroup src_frame_proc
1041 * \param[in] cpi Top level encoder instance structure
1042 * \param[in] filter_frame_lookahead_idx The index of the to-filter frame
1043 * in the lookahead buffer cpi->lookahead
1044 * \param[in] gf_frame_index GOP index
1045 *
1046 * \remark Nothing will be returned. But the fields `frames`, `num_frames`,
1047 * `filter_frame_idx` and `noise_levels` will be updated in cpi->tf_ctx.
1048 */
tf_setup_filtering_buffer(AV1_COMP * cpi,int filter_frame_lookahead_idx,int gf_frame_index)1049 static void tf_setup_filtering_buffer(AV1_COMP *cpi,
1050 int filter_frame_lookahead_idx,
1051 int gf_frame_index) {
1052 const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1053 const FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_frame_index];
1054 const FRAME_TYPE frame_type = gf_group->frame_type[gf_frame_index];
1055 const int is_forward_keyframe =
1056 av1_gop_check_forward_keyframe(gf_group, gf_frame_index);
1057
1058 TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
1059 YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
1060 // Number of frames used for filtering. Set `arnr_max_frames` as 1 to disable
1061 // temporal filtering.
1062 int num_frames = AOMMAX(cpi->oxcf.algo_cfg.arnr_max_frames, 1);
1063 int num_before = 0; // Number of filtering frames before the to-filter frame.
1064 int num_after = 0; // Number of filtering frames after the to-filer frame.
1065 const int lookahead_depth =
1066 av1_lookahead_depth(cpi->ppi->lookahead, cpi->compressor_stage);
1067
1068 // Temporal filtering should not go beyond key frames
1069 const int key_to_curframe =
1070 AOMMAX(cpi->rc.frames_since_key + filter_frame_lookahead_idx, 0);
1071 const int curframe_to_key =
1072 AOMMAX(cpi->rc.frames_to_key - filter_frame_lookahead_idx - 1, 0);
1073
1074 // Number of buffered frames before the to-filter frame.
1075 int max_before = AOMMIN(filter_frame_lookahead_idx, key_to_curframe);
1076
1077 // Number of buffered frames after the to-filter frame.
1078 int max_after =
1079 AOMMIN(lookahead_depth - filter_frame_lookahead_idx - 1, curframe_to_key);
1080
1081 // Estimate noises for each plane.
1082 const struct lookahead_entry *to_filter_buf = av1_lookahead_peek(
1083 cpi->ppi->lookahead, filter_frame_lookahead_idx, cpi->compressor_stage);
1084 assert(to_filter_buf != NULL);
1085 const YV12_BUFFER_CONFIG *to_filter_frame = &to_filter_buf->img;
1086 const int num_planes = av1_num_planes(&cpi->common);
1087 double *noise_levels = tf_ctx->noise_levels;
1088 av1_estimate_noise_level(to_filter_frame, noise_levels, AOM_PLANE_Y,
1089 num_planes - 1, cpi->common.seq_params->bit_depth,
1090 NOISE_ESTIMATION_EDGE_THRESHOLD);
1091 // Get quantization factor.
1092 const int q = av1_get_q(cpi);
1093 // Get correlation estimates from first-pass;
1094 const FIRSTPASS_STATS *stats =
1095 cpi->twopass_frame.stats_in - (cpi->rc.frames_since_key == 0);
1096 double accu_coeff0 = 1.0, accu_coeff1 = 1.0;
1097 for (int i = 1; i <= max_after; i++) {
1098 if (stats + filter_frame_lookahead_idx + i >=
1099 cpi->ppi->twopass.stats_buf_ctx->stats_in_end) {
1100 max_after = i - 1;
1101 break;
1102 }
1103 accu_coeff1 *=
1104 AOMMAX(stats[filter_frame_lookahead_idx + i].cor_coeff, 0.001);
1105 }
1106 if (max_after >= 1) {
1107 accu_coeff1 = pow(accu_coeff1, 1.0 / (double)max_after);
1108 }
1109 for (int i = 1; i <= max_before; i++) {
1110 if (stats + filter_frame_lookahead_idx - i + 1 <=
1111 cpi->ppi->twopass.stats_buf_ctx->stats_in_start) {
1112 max_before = i - 1;
1113 break;
1114 }
1115 accu_coeff0 *=
1116 AOMMAX(stats[filter_frame_lookahead_idx - i + 1].cor_coeff, 0.001);
1117 }
1118 if (max_before >= 1) {
1119 accu_coeff0 = pow(accu_coeff0, 1.0 / (double)max_before);
1120 }
1121
1122 // Adjust number of filtering frames based on quantization factor. When the
1123 // quantization factor is small enough (lossless compression), we will not
1124 // change the number of frames for key frame filtering, which is to avoid
1125 // visual quality drop.
1126 int adjust_num = 6;
1127 const int adjust_num_frames_for_arf_filtering =
1128 cpi->sf.hl_sf.adjust_num_frames_for_arf_filtering;
1129 if (num_frames == 1) { // `arnr_max_frames = 1` is used to disable filtering.
1130 adjust_num = 0;
1131 } else if ((update_type == KF_UPDATE) && q <= 10) {
1132 adjust_num = 0;
1133 } else if (adjust_num_frames_for_arf_filtering > 0 &&
1134 update_type != KF_UPDATE && (cpi->rc.frames_since_key > 0)) {
1135 // Since screen content detection happens after temporal filtering,
1136 // 'frames_since_key' check is added to ensure the sf is disabled for the
1137 // first alt-ref frame.
1138 // Adjust number of frames to be considered for filtering based on noise
1139 // level of the current frame. For low-noise frame, use more frames to
1140 // filter such that the filtered frame can provide better predictions for
1141 // subsequent frames and vice versa.
1142 const uint8_t av1_adjust_num_using_noise_lvl[2][3] = { { 6, 4, 2 },
1143 { 4, 2, 0 } };
1144 const uint8_t *adjust_num_frames =
1145 av1_adjust_num_using_noise_lvl[adjust_num_frames_for_arf_filtering - 1];
1146
1147 if (noise_levels[AOM_PLANE_Y] < 0.5)
1148 adjust_num = adjust_num_frames[0];
1149 else if (noise_levels[AOM_PLANE_Y] < 1.0)
1150 adjust_num = adjust_num_frames[1];
1151 else
1152 adjust_num = adjust_num_frames[2];
1153 }
1154 num_frames = AOMMIN(num_frames + adjust_num, lookahead_depth);
1155
1156 if (frame_type == KEY_FRAME) {
1157 num_before = AOMMIN(is_forward_keyframe ? num_frames / 2 : 0, max_before);
1158 num_after = AOMMIN(num_frames - 1, max_after);
1159 } else {
1160 int gfu_boost = av1_calc_arf_boost(&cpi->ppi->twopass, &cpi->twopass_frame,
1161 &cpi->ppi->p_rc, &cpi->frame_info,
1162 filter_frame_lookahead_idx, max_before,
1163 max_after, NULL, NULL, 0);
1164
1165 num_frames = AOMMIN(num_frames, gfu_boost / 150);
1166 num_frames += !(num_frames & 1); // Make the number odd.
1167
1168 // Only use 2 neighbours for the second ARF.
1169 if (update_type == INTNL_ARF_UPDATE) num_frames = AOMMIN(num_frames, 3);
1170 if (AOMMIN(max_after, max_before) >= num_frames / 2) {
1171 // just use half half
1172 num_before = num_frames / 2;
1173 num_after = num_frames / 2;
1174 } else {
1175 if (max_after < num_frames / 2) {
1176 num_after = max_after;
1177 num_before = AOMMIN(num_frames - 1 - num_after, max_before);
1178 } else {
1179 num_before = max_before;
1180 num_after = AOMMIN(num_frames - 1 - num_before, max_after);
1181 }
1182 // Adjust insymmetry based on frame-level correlation
1183 if (max_after > 0 && max_before > 0) {
1184 if (num_after < num_before) {
1185 const int insym = (int)(0.4 / AOMMAX(1 - accu_coeff1, 0.01));
1186 num_before = AOMMIN(num_before, num_after + insym);
1187 } else {
1188 const int insym = (int)(0.4 / AOMMAX(1 - accu_coeff0, 0.01));
1189 num_after = AOMMIN(num_after, num_before + insym);
1190 }
1191 }
1192 }
1193 }
1194 num_frames = num_before + 1 + num_after;
1195
1196 // Setup the frame buffer.
1197 for (int frame = 0; frame < num_frames; ++frame) {
1198 const int lookahead_idx = frame - num_before + filter_frame_lookahead_idx;
1199 struct lookahead_entry *buf = av1_lookahead_peek(
1200 cpi->ppi->lookahead, lookahead_idx, cpi->compressor_stage);
1201 assert(buf != NULL);
1202 frames[frame] = &buf->img;
1203 }
1204 tf_ctx->num_frames = num_frames;
1205 tf_ctx->filter_frame_idx = num_before;
1206 assert(frames[tf_ctx->filter_frame_idx] == to_filter_frame);
1207
1208 av1_setup_src_planes(&cpi->td.mb, &to_filter_buf->img, 0, 0, num_planes,
1209 cpi->common.seq_params->sb_size);
1210 av1_setup_block_planes(&cpi->td.mb.e_mbd,
1211 cpi->common.seq_params->subsampling_x,
1212 cpi->common.seq_params->subsampling_y, num_planes);
1213 }
1214
1215 /*!\cond */
1216
av1_estimate_noise_from_single_plane_c(const uint8_t * src,int height,int width,int stride,int edge_thresh)1217 double av1_estimate_noise_from_single_plane_c(const uint8_t *src, int height,
1218 int width, int stride,
1219 int edge_thresh) {
1220 int64_t accum = 0;
1221 int count = 0;
1222
1223 for (int i = 1; i < height - 1; ++i) {
1224 for (int j = 1; j < width - 1; ++j) {
1225 // Setup a small 3x3 matrix.
1226 const int center_idx = i * stride + j;
1227 int mat[3][3];
1228 for (int ii = -1; ii <= 1; ++ii) {
1229 for (int jj = -1; jj <= 1; ++jj) {
1230 const int idx = center_idx + ii * stride + jj;
1231 mat[ii + 1][jj + 1] = src[idx];
1232 }
1233 }
1234 // Compute sobel gradients.
1235 const int Gx = (mat[0][0] - mat[0][2]) + (mat[2][0] - mat[2][2]) +
1236 2 * (mat[1][0] - mat[1][2]);
1237 const int Gy = (mat[0][0] - mat[2][0]) + (mat[0][2] - mat[2][2]) +
1238 2 * (mat[0][1] - mat[2][1]);
1239 const int Ga = ROUND_POWER_OF_TWO(abs(Gx) + abs(Gy), 0);
1240 // Accumulate Laplacian.
1241 if (Ga < edge_thresh) { // Only count smooth pixels.
1242 const int v = 4 * mat[1][1] -
1243 2 * (mat[0][1] + mat[2][1] + mat[1][0] + mat[1][2]) +
1244 (mat[0][0] + mat[0][2] + mat[2][0] + mat[2][2]);
1245 accum += ROUND_POWER_OF_TWO(abs(v), 0);
1246 ++count;
1247 }
1248 }
1249 }
1250
1251 // Return -1.0 (unreliable estimation) if there are too few smooth pixels.
1252 return (count < 16) ? -1.0 : (double)accum / (6 * count) * SQRT_PI_BY_2;
1253 }
1254
1255 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_estimate_noise_from_single_plane_c(const uint16_t * src16,int height,int width,const int stride,int bit_depth,int edge_thresh)1256 double av1_highbd_estimate_noise_from_single_plane_c(const uint16_t *src16,
1257 int height, int width,
1258 const int stride,
1259 int bit_depth,
1260 int edge_thresh) {
1261 int64_t accum = 0;
1262 int count = 0;
1263 for (int i = 1; i < height - 1; ++i) {
1264 for (int j = 1; j < width - 1; ++j) {
1265 // Setup a small 3x3 matrix.
1266 const int center_idx = i * stride + j;
1267 int mat[3][3];
1268 for (int ii = -1; ii <= 1; ++ii) {
1269 for (int jj = -1; jj <= 1; ++jj) {
1270 const int idx = center_idx + ii * stride + jj;
1271 mat[ii + 1][jj + 1] = src16[idx];
1272 }
1273 }
1274 // Compute sobel gradients.
1275 const int Gx = (mat[0][0] - mat[0][2]) + (mat[2][0] - mat[2][2]) +
1276 2 * (mat[1][0] - mat[1][2]);
1277 const int Gy = (mat[0][0] - mat[2][0]) + (mat[0][2] - mat[2][2]) +
1278 2 * (mat[0][1] - mat[2][1]);
1279 const int Ga = ROUND_POWER_OF_TWO(abs(Gx) + abs(Gy), bit_depth - 8);
1280 // Accumulate Laplacian.
1281 if (Ga < edge_thresh) { // Only count smooth pixels.
1282 const int v = 4 * mat[1][1] -
1283 2 * (mat[0][1] + mat[2][1] + mat[1][0] + mat[1][2]) +
1284 (mat[0][0] + mat[0][2] + mat[2][0] + mat[2][2]);
1285 accum += ROUND_POWER_OF_TWO(abs(v), bit_depth - 8);
1286 ++count;
1287 }
1288 }
1289 }
1290
1291 // Return -1.0 (unreliable estimation) if there are too few smooth pixels.
1292 return (count < 16) ? -1.0 : (double)accum / (6 * count) * SQRT_PI_BY_2;
1293 }
1294 #endif
1295
av1_estimate_noise_level(const YV12_BUFFER_CONFIG * frame,double * noise_level,int plane_from,int plane_to,int bit_depth,int edge_thresh)1296 void av1_estimate_noise_level(const YV12_BUFFER_CONFIG *frame,
1297 double *noise_level, int plane_from, int plane_to,
1298 int bit_depth, int edge_thresh) {
1299 for (int plane = plane_from; plane <= plane_to; plane++) {
1300 const bool is_uv_plane = (plane != AOM_PLANE_Y);
1301 const int height = frame->crop_heights[is_uv_plane];
1302 const int width = frame->crop_widths[is_uv_plane];
1303 const int stride = frame->strides[is_uv_plane];
1304 const uint8_t *src = frame->buffers[plane];
1305
1306 #if CONFIG_AV1_HIGHBITDEPTH
1307 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
1308 const int is_high_bitdepth = is_frame_high_bitdepth(frame);
1309 if (is_high_bitdepth) {
1310 noise_level[plane] = av1_highbd_estimate_noise_from_single_plane(
1311 src16, height, width, stride, bit_depth, edge_thresh);
1312 } else {
1313 noise_level[plane] = av1_estimate_noise_from_single_plane(
1314 src, height, width, stride, edge_thresh);
1315 }
1316 #else
1317 (void)bit_depth;
1318 noise_level[plane] = av1_estimate_noise_from_single_plane(
1319 src, height, width, stride, edge_thresh);
1320 #endif
1321 }
1322 }
1323
1324 // Initializes the members of TemporalFilterCtx
1325 // Inputs:
1326 // cpi: Top level encoder instance structure
1327 // check_show_existing: If 1, check whether the filtered frame is similar
1328 // to the original frame.
1329 // filter_frame_lookahead_idx: The index of the frame to be filtered in the
1330 // lookahead buffer cpi->lookahead.
1331 // Returns:
1332 // Nothing will be returned. But the contents of cpi->tf_ctx will be modified.
init_tf_ctx(AV1_COMP * cpi,int filter_frame_lookahead_idx,int gf_frame_index,int compute_frame_diff,YV12_BUFFER_CONFIG * output_frame)1333 static void init_tf_ctx(AV1_COMP *cpi, int filter_frame_lookahead_idx,
1334 int gf_frame_index, int compute_frame_diff,
1335 YV12_BUFFER_CONFIG *output_frame) {
1336 TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
1337 // Setup frame buffer for filtering.
1338 YV12_BUFFER_CONFIG **frames = tf_ctx->frames;
1339 tf_ctx->num_frames = 0;
1340 tf_ctx->filter_frame_idx = -1;
1341 tf_ctx->output_frame = output_frame;
1342 tf_ctx->compute_frame_diff = compute_frame_diff;
1343 tf_setup_filtering_buffer(cpi, filter_frame_lookahead_idx, gf_frame_index);
1344 assert(tf_ctx->num_frames > 0);
1345 assert(tf_ctx->filter_frame_idx < tf_ctx->num_frames);
1346
1347 // Setup scaling factors. Scaling on each of the arnr frames is not
1348 // supported.
1349 // ARF is produced at the native frame size and resized when coded.
1350 struct scale_factors *sf = &tf_ctx->sf;
1351 av1_setup_scale_factors_for_frame(
1352 sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
1353 frames[0]->y_crop_width, frames[0]->y_crop_height);
1354
1355 // Initialize temporal filter parameters.
1356 MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
1357 const int filter_frame_idx = tf_ctx->filter_frame_idx;
1358 const YV12_BUFFER_CONFIG *const frame_to_filter = frames[filter_frame_idx];
1359 const BLOCK_SIZE block_size = TF_BLOCK_SIZE;
1360 const int frame_height = frame_to_filter->y_crop_height;
1361 const int frame_width = frame_to_filter->y_crop_width;
1362 const int mb_width = block_size_wide[block_size];
1363 const int mb_height = block_size_high[block_size];
1364 const int mb_rows = get_num_blocks(frame_height, mb_height);
1365 const int mb_cols = get_num_blocks(frame_width, mb_width);
1366 const int mb_pels = mb_width * mb_height;
1367 const int is_highbitdepth = is_frame_high_bitdepth(frame_to_filter);
1368 const int num_planes = av1_num_planes(&cpi->common);
1369 int num_pels = 0;
1370 for (int i = 0; i < num_planes; i++) {
1371 const int subsampling_x = mbd->plane[i].subsampling_x;
1372 const int subsampling_y = mbd->plane[i].subsampling_y;
1373 num_pels += mb_pels >> (subsampling_x + subsampling_y);
1374 }
1375 tf_ctx->num_pels = num_pels;
1376 tf_ctx->mb_rows = mb_rows;
1377 tf_ctx->mb_cols = mb_cols;
1378 tf_ctx->is_highbitdepth = is_highbitdepth;
1379 tf_ctx->q_factor = av1_get_q(cpi);
1380 }
1381
av1_check_show_filtered_frame(const YV12_BUFFER_CONFIG * frame,const FRAME_DIFF * frame_diff,int q_index,aom_bit_depth_t bit_depth)1382 int av1_check_show_filtered_frame(const YV12_BUFFER_CONFIG *frame,
1383 const FRAME_DIFF *frame_diff, int q_index,
1384 aom_bit_depth_t bit_depth) {
1385 const int frame_height = frame->y_crop_height;
1386 const int frame_width = frame->y_crop_width;
1387 const int block_height = block_size_high[TF_BLOCK_SIZE];
1388 const int block_width = block_size_wide[TF_BLOCK_SIZE];
1389 const int mb_rows = get_num_blocks(frame_height, block_height);
1390 const int mb_cols = get_num_blocks(frame_width, block_width);
1391 const int num_mbs = AOMMAX(1, mb_rows * mb_cols);
1392 const float mean = (float)frame_diff->sum / num_mbs;
1393 const float std = (float)sqrt((float)frame_diff->sse / num_mbs - mean * mean);
1394
1395 const int ac_q_step = av1_ac_quant_QTX(q_index, 0, bit_depth);
1396 const float threshold = 0.7f * ac_q_step * ac_q_step;
1397
1398 if (mean < threshold && std < mean * 1.2) {
1399 return 1;
1400 }
1401 return 0;
1402 }
1403
av1_temporal_filter(AV1_COMP * cpi,const int filter_frame_lookahead_idx,int gf_frame_index,FRAME_DIFF * frame_diff,YV12_BUFFER_CONFIG * output_frame)1404 void av1_temporal_filter(AV1_COMP *cpi, const int filter_frame_lookahead_idx,
1405 int gf_frame_index, FRAME_DIFF *frame_diff,
1406 YV12_BUFFER_CONFIG *output_frame) {
1407 MultiThreadInfo *const mt_info = &cpi->mt_info;
1408 // Basic informaton of the current frame.
1409 TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
1410 TemporalFilterData *tf_data = &cpi->td.tf_data;
1411 const int compute_frame_diff = frame_diff != NULL;
1412 // TODO(anyone): Currently, we enforce the filtering strength on internal
1413 // ARFs except the second ARF to be zero. We should investigate in which case
1414 // it is more beneficial to use non-zero strength filtering.
1415 // Only parallel level 0 frames go through temporal filtering.
1416 assert(cpi->ppi->gf_group.frame_parallel_level[gf_frame_index] == 0);
1417
1418 // Initialize temporal filter context structure.
1419 init_tf_ctx(cpi, filter_frame_lookahead_idx, gf_frame_index,
1420 compute_frame_diff, output_frame);
1421
1422 // Allocate and reset temporal filter buffers.
1423 const int is_highbitdepth = tf_ctx->is_highbitdepth;
1424 if (!tf_alloc_and_reset_data(tf_data, tf_ctx->num_pels, is_highbitdepth)) {
1425 aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR,
1426 "Error allocating temporal filter data");
1427 }
1428
1429 // Perform temporal filtering process.
1430 if (mt_info->num_workers > 1)
1431 av1_tf_do_filtering_mt(cpi);
1432 else
1433 tf_do_filtering(cpi);
1434
1435 if (compute_frame_diff) {
1436 *frame_diff = tf_data->diff;
1437 }
1438 // Deallocate temporal filter buffers.
1439 tf_dealloc_data(tf_data, is_highbitdepth);
1440 }
1441
av1_is_temporal_filter_on(const AV1EncoderConfig * oxcf)1442 int av1_is_temporal_filter_on(const AV1EncoderConfig *oxcf) {
1443 return oxcf->algo_cfg.arnr_max_frames > 0 && oxcf->gf_cfg.lag_in_frames > 1;
1444 }
1445
av1_tf_info_alloc(TEMPORAL_FILTER_INFO * tf_info,const AV1_COMP * cpi)1446 bool av1_tf_info_alloc(TEMPORAL_FILTER_INFO *tf_info, const AV1_COMP *cpi) {
1447 const AV1EncoderConfig *oxcf = &cpi->oxcf;
1448 tf_info->is_temporal_filter_on = av1_is_temporal_filter_on(oxcf);
1449 if (tf_info->is_temporal_filter_on == 0) return true;
1450
1451 const AV1_COMMON *cm = &cpi->common;
1452 const SequenceHeader *const seq_params = cm->seq_params;
1453 for (int i = 0; i < TF_INFO_BUF_COUNT; ++i) {
1454 if (aom_realloc_frame_buffer(
1455 &tf_info->tf_buf[i], oxcf->frm_dim_cfg.width,
1456 oxcf->frm_dim_cfg.height, seq_params->subsampling_x,
1457 seq_params->subsampling_y, seq_params->use_highbitdepth,
1458 cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
1459 NULL, cpi->alloc_pyramid, 0)) {
1460 return false;
1461 }
1462 }
1463 return true;
1464 }
1465
av1_tf_info_free(TEMPORAL_FILTER_INFO * tf_info)1466 void av1_tf_info_free(TEMPORAL_FILTER_INFO *tf_info) {
1467 if (tf_info->is_temporal_filter_on == 0) return;
1468 for (int i = 0; i < TF_INFO_BUF_COUNT; ++i) {
1469 aom_free_frame_buffer(&tf_info->tf_buf[i]);
1470 }
1471 aom_free_frame_buffer(&tf_info->tf_buf_second_arf);
1472 }
1473
av1_tf_info_reset(TEMPORAL_FILTER_INFO * tf_info)1474 void av1_tf_info_reset(TEMPORAL_FILTER_INFO *tf_info) {
1475 av1_zero(tf_info->tf_buf_valid);
1476 av1_zero(tf_info->tf_buf_gf_index);
1477 av1_zero(tf_info->tf_buf_display_index_offset);
1478 }
1479
av1_tf_info_filtering(TEMPORAL_FILTER_INFO * tf_info,AV1_COMP * cpi,const GF_GROUP * gf_group)1480 void av1_tf_info_filtering(TEMPORAL_FILTER_INFO *tf_info, AV1_COMP *cpi,
1481 const GF_GROUP *gf_group) {
1482 if (tf_info->is_temporal_filter_on == 0) return;
1483 const AV1_COMMON *const cm = &cpi->common;
1484 for (int gf_index = 0; gf_index < gf_group->size; ++gf_index) {
1485 int update_type = gf_group->update_type[gf_index];
1486 if (update_type == KF_UPDATE || update_type == ARF_UPDATE) {
1487 int buf_idx = gf_group->frame_type[gf_index] == INTER_FRAME;
1488 int lookahead_idx = gf_group->arf_src_offset[gf_index] +
1489 gf_group->cur_frame_idx[gf_index];
1490 // This function is designed to be called multiple times after
1491 // av1_tf_info_reset(). It will only generate the filtered frame that does
1492 // not exist yet.
1493 if (tf_info->tf_buf_valid[buf_idx] == 0 ||
1494 tf_info->tf_buf_display_index_offset[buf_idx] != lookahead_idx) {
1495 YV12_BUFFER_CONFIG *out_buf = &tf_info->tf_buf[buf_idx];
1496 av1_temporal_filter(cpi, lookahead_idx, gf_index,
1497 &tf_info->frame_diff[buf_idx], out_buf);
1498 aom_extend_frame_borders(out_buf, av1_num_planes(cm));
1499 tf_info->tf_buf_gf_index[buf_idx] = gf_index;
1500 tf_info->tf_buf_display_index_offset[buf_idx] = lookahead_idx;
1501 tf_info->tf_buf_valid[buf_idx] = 1;
1502 }
1503 }
1504 }
1505 }
1506
av1_tf_info_get_filtered_buf(TEMPORAL_FILTER_INFO * tf_info,int gf_index,FRAME_DIFF * frame_diff)1507 YV12_BUFFER_CONFIG *av1_tf_info_get_filtered_buf(TEMPORAL_FILTER_INFO *tf_info,
1508 int gf_index,
1509 FRAME_DIFF *frame_diff) {
1510 if (tf_info->is_temporal_filter_on == 0) return NULL;
1511 YV12_BUFFER_CONFIG *out_buf = NULL;
1512 for (int i = 0; i < TF_INFO_BUF_COUNT; ++i) {
1513 if (tf_info->tf_buf_valid[i] && tf_info->tf_buf_gf_index[i] == gf_index) {
1514 out_buf = &tf_info->tf_buf[i];
1515 *frame_diff = tf_info->frame_diff[i];
1516 }
1517 }
1518 return out_buf;
1519 }
1520 /*!\endcond */
1521