1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13 #include <limits.h>
14
15 #include "config/aom_config.h"
16
17 #include "av1/common/alloccommon.h"
18 #include "av1/common/av1_common_int.h"
19 #include "av1/common/odintrin.h"
20 #include "av1/common/quant_common.h"
21 #include "av1/common/reconinter.h"
22 #include "av1/encoder/av1_quantize.h"
23 #include "av1/encoder/encoder.h"
24 #include "av1/encoder/extend.h"
25 #include "av1/encoder/firstpass.h"
26 #include "av1/encoder/mcomp.h"
27 #include "av1/encoder/ratectrl.h"
28 #include "av1/encoder/reconinter_enc.h"
29 #include "av1/encoder/segmentation.h"
30 #include "av1/encoder/temporal_filter.h"
31 #include "aom_dsp/aom_dsp_common.h"
32 #include "aom_mem/aom_mem.h"
33 #include "aom_ports/aom_timer.h"
34 #include "aom_ports/mem.h"
35 #include "aom_ports/system_state.h"
36 #include "aom_scale/aom_scale.h"
37
38 // NOTE: All `tf` in this file means `temporal filtering`.
39
40 // Does motion search for blocks in temporal filtering. This is the first step
41 // for temporal filtering. More specifically, given a frame to be filtered and
42 // another frame as reference, this function searches the reference frame to
43 // find out the most alike block as that from the frame to be filtered. This
44 // found block will be further used for weighted averaging.
45 // NOTE: Besides doing motion search for the entire block, this function will
46 // also do motion search for each 1/4 sub-block to get more precise prediction.
47 // Inputs:
48 // cpi: Pointer to the composed information of input video.
49 // frame_to_filter: Pointer to the frame to be filtered.
50 // ref_frame: Pointer to the reference frame.
51 // block_size: Block size used for motion search.
52 // mb_row: Row index of the block in the entire frame.
53 // mb_col: Column index of the block in the entire frame.
54 // ref_mv: Reference motion vector, which is commonly inherited from the
55 // motion search result of previous frame.
56 // subblock_mvs: Pointer to the result motion vectors for 4 sub-blocks.
57 // subblock_mses: Pointer to the search errors (MSE) for 4 sub-blocks.
58 // Returns:
59 // Search error (MSE) of the entire block.
tf_motion_search(AV1_COMP * cpi,const YV12_BUFFER_CONFIG * frame_to_filter,const YV12_BUFFER_CONFIG * ref_frame,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,MV * ref_mv,MV * subblock_mvs,int * subblock_mses)60 static int tf_motion_search(AV1_COMP *cpi,
61 const YV12_BUFFER_CONFIG *frame_to_filter,
62 const YV12_BUFFER_CONFIG *ref_frame,
63 const BLOCK_SIZE block_size, const int mb_row,
64 const int mb_col, MV *ref_mv, MV *subblock_mvs,
65 int *subblock_mses) {
66 // Frame information
67 const int min_frame_size = AOMMIN(cpi->common.width, cpi->common.height);
68
69 // Block information (ONLY Y-plane is used for motion search).
70 const int mb_height = block_size_high[block_size];
71 const int mb_width = block_size_wide[block_size];
72 const int mb_pels = mb_height * mb_width;
73 const int y_stride = frame_to_filter->y_stride;
74 assert(y_stride == ref_frame->y_stride);
75 const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
76
77 // Save input state.
78 MACROBLOCK *const mb = &cpi->td.mb;
79 MACROBLOCKD *const mbd = &mb->e_mbd;
80 const struct buf_2d ori_src_buf = mb->plane[0].src;
81 const struct buf_2d ori_pre_buf = mbd->plane[0].pre[0];
82 const MV_COST_TYPE ori_mv_cost_type = mb->mv_cost_type;
83
84 // Parameters used for motion search.
85 FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
86 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
87
88 const search_site_config ss_cfg =
89 cpi->mv_search_params.ss_cfg[SS_CFG_LOOKAHEAD];
90 const SEARCH_METHODS full_search_method = NSTEP;
91 const int step_param = av1_init_search_range(
92 AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
93 const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
94 const int force_integer_mv = cpi->common.features.cur_frame_force_integer_mv;
95 const MV_COST_TYPE mv_cost_type =
96 min_frame_size >= 720
97 ? MV_COST_L1_HDRES
98 : (min_frame_size >= 480 ? MV_COST_L1_MIDRES : MV_COST_L1_LOWRES);
99
100 // Starting position for motion search.
101 FULLPEL_MV start_mv = get_fullmv_from_mv(ref_mv);
102 // Baseline position for motion search (used for rate distortion comparison).
103 const MV baseline_mv = kZeroMv;
104
105 // Setup.
106 mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset;
107 mb->plane[0].src.stride = y_stride;
108 mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset;
109 mbd->plane[0].pre[0].stride = y_stride;
110 // Unused intermediate results for motion search.
111 unsigned int sse, error;
112 int distortion;
113 int cost_list[5];
114
115 // Do motion search.
116 // NOTE: In `av1_full_pixel_search()` and `find_fractional_mv_step()`, the
117 // searched result will be stored in `mb->best_mv`.
118 int_mv best_mv;
119 int block_mse = INT_MAX;
120 mb->mv_cost_type = mv_cost_type;
121
122 av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
123 &baseline_mv, &ss_cfg);
124 full_ms_params.run_mesh_search = 1;
125 full_ms_params.search_method = full_search_method;
126 av1_full_pixel_search(start_mv, &full_ms_params, step_param,
127 cond_cost_list(cpi, cost_list), &best_mv.as_fullmv,
128 NULL);
129
130 // Since we are merely refining the result from full pixel search, we don't
131 // need regularization for subpel search
132 mb->mv_cost_type = MV_COST_NONE;
133 if (force_integer_mv == 1) { // Only do full search on the entire block.
134 const int mv_row = best_mv.as_mv.row;
135 const int mv_col = best_mv.as_mv.col;
136 best_mv.as_mv.row = GET_MV_SUBPEL(mv_row);
137 best_mv.as_mv.col = GET_MV_SUBPEL(mv_col);
138 const int mv_offset = mv_row * y_stride + mv_col;
139 error = cpi->fn_ptr[block_size].vf(
140 ref_frame->y_buffer + y_offset + mv_offset, y_stride,
141 frame_to_filter->y_buffer + y_offset, y_stride, &sse);
142 block_mse = DIVIDE_AND_ROUND(error, mb_pels);
143 mb->e_mbd.mi[0]->mv[0] = best_mv;
144 } else { // Do fractional search on the entire block and all sub-blocks.
145 av1_make_default_subpel_ms_params(&ms_params, cpi, mb, block_size,
146 &baseline_mv, cost_list);
147 ms_params.forced_stop = EIGHTH_PEL;
148 ms_params.var_params.subpel_search_type = subpel_search_type;
149 MV subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
150 error = cpi->mv_search_params.find_fractional_mv_step(
151 &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv, &best_mv.as_mv,
152 &distortion, &sse, NULL);
153 block_mse = DIVIDE_AND_ROUND(error, mb_pels);
154 mb->e_mbd.mi[0]->mv[0] = best_mv;
155 *ref_mv = best_mv.as_mv;
156 // On 4 sub-blocks.
157 const BLOCK_SIZE subblock_size = ss_size_lookup[block_size][1][1];
158 const int subblock_height = block_size_high[subblock_size];
159 const int subblock_width = block_size_wide[subblock_size];
160 const int subblock_pels = subblock_height * subblock_width;
161 start_mv = get_fullmv_from_mv(ref_mv);
162
163 int subblock_idx = 0;
164 for (int i = 0; i < mb_height; i += subblock_height) {
165 for (int j = 0; j < mb_width; j += subblock_width) {
166 const int offset = i * y_stride + j;
167 mb->plane[0].src.buf = frame_to_filter->y_buffer + y_offset + offset;
168 mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset + offset;
169 mb->mv_cost_type = mv_cost_type;
170
171 av1_make_default_fullpel_ms_params(
172 &full_ms_params, cpi, mb, subblock_size, &baseline_mv, &ss_cfg);
173 full_ms_params.run_mesh_search = 1;
174 full_ms_params.search_method = full_search_method;
175 av1_full_pixel_search(start_mv, &full_ms_params, step_param,
176 cond_cost_list(cpi, cost_list),
177 &best_mv.as_fullmv, NULL);
178
179 // Since we are merely refining the result from full pixel search, we
180 // don't need regularization for subpel search
181 mb->mv_cost_type = MV_COST_NONE;
182 av1_make_default_subpel_ms_params(&ms_params, cpi, mb, subblock_size,
183 &baseline_mv, cost_list);
184 ms_params.forced_stop = EIGHTH_PEL;
185 ms_params.var_params.subpel_search_type = subpel_search_type;
186 subpel_start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
187 error = cpi->mv_search_params.find_fractional_mv_step(
188 &mb->e_mbd, &cpi->common, &ms_params, subpel_start_mv,
189 &best_mv.as_mv, &distortion, &sse, NULL);
190 subblock_mses[subblock_idx] = DIVIDE_AND_ROUND(error, subblock_pels);
191 subblock_mvs[subblock_idx] = best_mv.as_mv;
192 ++subblock_idx;
193 }
194 }
195 }
196
197 // Restore input state.
198 mb->plane[0].src = ori_src_buf;
199 mbd->plane[0].pre[0] = ori_pre_buf;
200 mb->mv_cost_type = ori_mv_cost_type;
201
202 return block_mse;
203 }
204
205 // Helper function to get weight according to thresholds.
get_weight_by_thresh(const int value,const int low,const int high)206 static INLINE int get_weight_by_thresh(const int value, const int low,
207 const int high) {
208 return value < low ? 2 : value < high ? 1 : 0;
209 }
210
211 // Gets filter weight for blocks in temporal filtering. The weights will be
212 // assigned based on the motion search errors.
213 // NOTE: Besides assigning filter weight for the block, this function will also
214 // determine whether to split the entire block into 4 sub-blocks for further
215 // filtering.
216 // TODO(any): Many magic numbers are used in this function. They may be tuned
217 // to improve the performance.
218 // Inputs:
219 // block_mse: Motion search error (MSE) for the entire block.
220 // subblock_mses: Pointer to the search errors (MSE) for 4 sub-blocks.
221 // is_second_arf: Whether the to-filter frame is the second ARF. This field
222 // will affect the filter weight for the to-filter frame.
223 // subblock_filter_weights: Pointer to the assigned filter weight for each
224 // sub-block. If not using sub-blocks, the first
225 // element will be used for the entire block.
226 // Returns: Whether to use 4 sub-blocks to replace the original block.
tf_get_filter_weight(const int block_mse,const int * subblock_mses,const int is_second_arf,int * subblock_filter_weights)227 static int tf_get_filter_weight(const int block_mse, const int *subblock_mses,
228 const int is_second_arf,
229 int *subblock_filter_weights) {
230 // `block_mse` is initialized as INT_MAX and will be overwritten after the
231 // motion search with reference frame, therefore INT_MAX can ONLY be accessed
232 // by to-filter frame.
233 if (block_mse == INT_MAX) {
234 const int weight = TF_ENABLE_PLANEWISE_STRATEGY
235 ? TF_PLANEWISE_FILTER_WEIGHT_SCALE
236 : is_second_arf ? 64 : 32;
237 subblock_filter_weights[0] = subblock_filter_weights[1] =
238 subblock_filter_weights[2] = subblock_filter_weights[3] = weight;
239 return 0;
240 }
241
242 const int thresh_low = is_second_arf ? 20 : 40;
243 const int thresh_high = is_second_arf ? 40 : 80;
244
245 int min_subblock_mse = INT_MAX;
246 int max_subblock_mse = INT_MIN;
247 int sum_subblock_mse = 0;
248 for (int i = 0; i < 4; ++i) {
249 sum_subblock_mse += subblock_mses[i];
250 min_subblock_mse = AOMMIN(min_subblock_mse, subblock_mses[i]);
251 max_subblock_mse = AOMMAX(max_subblock_mse, subblock_mses[i]);
252 subblock_filter_weights[i] =
253 get_weight_by_thresh(subblock_mses[i], thresh_low, thresh_high);
254 }
255
256 if (((block_mse * 15 < sum_subblock_mse * 4) &&
257 max_subblock_mse - min_subblock_mse < 48) ||
258 ((block_mse * 14 < sum_subblock_mse * 4) &&
259 max_subblock_mse - min_subblock_mse < 24)) { // No split.
260 const int weight = get_weight_by_thresh(block_mse, thresh_low, thresh_high);
261 subblock_filter_weights[0] = subblock_filter_weights[1] =
262 subblock_filter_weights[2] = subblock_filter_weights[3] = weight;
263 return 0;
264 } else { // Do split.
265 return 1;
266 }
267 }
268
269 // Helper function to determine whether a frame is encoded with high bit-depth.
is_frame_high_bitdepth(const YV12_BUFFER_CONFIG * frame)270 static INLINE int is_frame_high_bitdepth(const YV12_BUFFER_CONFIG *frame) {
271 return (frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
272 }
273
274 // Builds predictor for blocks in temporal filtering. This is the second step
275 // for temporal filtering, which is to construct predictions from all reference
276 // frames INCLUDING the frame to be filtered itself. These predictors are built
277 // based on the motion search results (motion vector is set as 0 for the frame
278 // to be filtered), and will be futher used for weighted averaging.
279 // Inputs:
280 // ref_frame: Pointer to the reference frame (or the frame to be filtered).
281 // mbd: Pointer to the block for filtering. Besides containing the subsampling
282 // information of all planes, this field also gives the searched motion
283 // vector for the entire block, i.e., `mbd->mi[0]->mv[0]`. This vector
284 // should be 0 if the `ref_frame` itself is the frame to be filtered.
285 // block_size: Size of the block.
286 // mb_row: Row index of the block in the entire frame.
287 // mb_col: Column index of the block in the entire frame.
288 // num_planes: Number of planes in the frame.
289 // scale: Scaling factor.
290 // use_subblock: Whether to use 4 sub-blocks to replace the original block.
291 // subblock_mvs: The motion vectors for each sub-block (row-major order).
292 // pred: Pointer to the predictor to build.
293 // Returns:
294 // Nothing will be returned. But the content to which `pred` points will be
295 // modified.
tf_build_predictor(const YV12_BUFFER_CONFIG * ref_frame,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const struct scale_factors * scale,const int use_subblock,const MV * subblock_mvs,uint8_t * pred)296 static void tf_build_predictor(const YV12_BUFFER_CONFIG *ref_frame,
297 const MACROBLOCKD *mbd,
298 const BLOCK_SIZE block_size, const int mb_row,
299 const int mb_col, const int num_planes,
300 const struct scale_factors *scale,
301 const int use_subblock, const MV *subblock_mvs,
302 uint8_t *pred) {
303 assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
304
305 // Information of the entire block.
306 const int mb_height = block_size_high[block_size]; // Height.
307 const int mb_width = block_size_wide[block_size]; // Width.
308 const int mb_pels = mb_height * mb_width; // Number of pixels.
309 const int mb_y = mb_height * mb_row; // Y-coord (Top-left).
310 const int mb_x = mb_width * mb_col; // X-coord (Top-left).
311 const int bit_depth = mbd->bd; // Bit depth.
312 const int is_intrabc = 0; // Is intra-copied?
313 const int mb_mv_row = mbd->mi[0]->mv[0].as_mv.row; // Motion vector (y).
314 const int mb_mv_col = mbd->mi[0]->mv[0].as_mv.col; // Motion vector (x).
315 const MV mb_mv = { (int16_t)mb_mv_row, (int16_t)mb_mv_col };
316 const int is_high_bitdepth = is_frame_high_bitdepth(ref_frame);
317
318 // Information of each sub-block (actually in use).
319 const int num_blocks = use_subblock ? 2 : 1; // Num of blocks on each side.
320 const int block_height = mb_height >> (num_blocks - 1); // Height.
321 const int block_width = mb_width >> (num_blocks - 1); // Width.
322
323 // Default interpolation filters.
324 const int_interpfilters interp_filters =
325 av1_broadcast_interp_filter(MULTITAP_SHARP);
326
327 // Handle Y-plane, U-plane and V-plane (if needed) in sequence.
328 int plane_offset = 0;
329 for (int plane = 0; plane < num_planes; ++plane) {
330 const int subsampling_y = mbd->plane[plane].subsampling_y;
331 const int subsampling_x = mbd->plane[plane].subsampling_x;
332 // Information of each sub-block in current plane.
333 const int plane_h = mb_height >> subsampling_y; // Plane height.
334 const int plane_w = mb_width >> subsampling_x; // Plane width.
335 const int plane_y = mb_y >> subsampling_y; // Y-coord (Top-left).
336 const int plane_x = mb_x >> subsampling_x; // X-coord (Top-left).
337 const int h = block_height >> subsampling_y; // Sub-block height.
338 const int w = block_width >> subsampling_x; // Sub-block width.
339 const int is_y_plane = (plane == 0); // Is Y-plane?
340
341 const struct buf_2d ref_buf = { NULL, ref_frame->buffers[plane],
342 ref_frame->widths[is_y_plane ? 0 : 1],
343 ref_frame->heights[is_y_plane ? 0 : 1],
344 ref_frame->strides[is_y_plane ? 0 : 1] };
345
346 // Handle entire block or sub-blocks if needed.
347 int subblock_idx = 0;
348 for (int i = 0; i < plane_h; i += h) {
349 for (int j = 0; j < plane_w; j += w) {
350 // Choose proper motion vector.
351 const MV mv = use_subblock ? subblock_mvs[subblock_idx] : mb_mv;
352 assert(mv.row >= INT16_MIN && mv.row <= INT16_MAX &&
353 mv.col >= INT16_MIN && mv.col <= INT16_MAX);
354
355 const int y = plane_y + i;
356 const int x = plane_x + j;
357
358 // Build predictior for each sub-block on current plane.
359 InterPredParams inter_pred_params;
360 av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
361 subsampling_y, bit_depth, is_high_bitdepth,
362 is_intrabc, scale, &ref_buf, interp_filters);
363 inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
364 av1_enc_build_one_inter_predictor(&pred[plane_offset + i * plane_w + j],
365 plane_w, &mv, &inter_pred_params);
366
367 ++subblock_idx;
368 }
369 }
370 plane_offset += mb_pels;
371 }
372 }
373
374 // Computes temporal filter weights and accumulators for the frame to be
375 // filtered. More concretely, the filter weights for all pixels are the same.
376 // Inputs:
377 // mbd: Pointer to the block for filtering, which is ONLY used to get
378 // subsampling information of all planes as well as the bit-depth.
379 // block_size: Size of the block.
380 // num_planes: Number of planes in the frame.
381 // filter_weight: Weight used for filtering.
382 // pred: Pointer to the well-built predictors.
383 // accum: Pointer to the pixel-wise accumulator for filtering.
384 // count: Pointer to the pixel-wise counter fot filtering.
385 // Returns:
386 // Nothing will be returned. But the content to which `accum` and `pred`
387 // point will be modified.
av1_apply_temporal_filter_self(const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int num_planes,const int filter_weight,const uint8_t * pred,uint32_t * accum,uint16_t * count)388 void av1_apply_temporal_filter_self(const MACROBLOCKD *mbd,
389 const BLOCK_SIZE block_size,
390 const int num_planes,
391 const int filter_weight,
392 const uint8_t *pred, uint32_t *accum,
393 uint16_t *count) {
394 assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
395
396 // Block information.
397 const int mb_height = block_size_high[block_size];
398 const int mb_width = block_size_wide[block_size];
399 const int mb_pels = mb_height * mb_width;
400 const int is_high_bitdepth = is_cur_buf_hbd(mbd);
401 const uint16_t *pred16 = CONVERT_TO_SHORTPTR(pred);
402
403 int plane_offset = 0;
404 for (int plane = 0; plane < num_planes; ++plane) {
405 const int subsampling_y = mbd->plane[plane].subsampling_y;
406 const int subsampling_x = mbd->plane[plane].subsampling_x;
407 const int h = mb_height >> subsampling_y; // Plane height.
408 const int w = mb_width >> subsampling_x; // Plane width.
409
410 int pred_idx = 0;
411 for (int i = 0; i < h; ++i) {
412 for (int j = 0; j < w; ++j) {
413 const int idx = plane_offset + pred_idx; // Index with plane shift.
414 const int pred_value = is_high_bitdepth ? pred16[idx] : pred[idx];
415 accum[idx] += filter_weight * pred_value;
416 count[idx] += filter_weight;
417 ++pred_idx;
418 }
419 }
420 plane_offset += mb_pels;
421 }
422 }
423
424 // Function to compute pixel-wise squared difference between two buffers.
425 // Inputs:
426 // ref: Pointer to reference buffer.
427 // ref_offset: Start position of reference buffer for computation.
428 // ref_stride: Stride for reference buffer.
429 // tgt: Pointer to target buffer.
430 // tgt_offset: Start position of target buffer for computation.
431 // tgt_stride: Stride for target buffer.
432 // height: Height of block for computation.
433 // width: Width of block for computation.
434 // is_high_bitdepth: Whether the two buffers point to high bit-depth frames.
435 // square_diff: Pointer to save the squared differces.
436 // Returns:
437 // Nothing will be returned. But the content to which `square_diff` points
438 // will be modified.
compute_square_diff(const uint8_t * ref,const int ref_offset,const int ref_stride,const uint8_t * tgt,const int tgt_offset,const int tgt_stride,const int height,const int width,const int is_high_bitdepth,uint32_t * square_diff)439 static INLINE void compute_square_diff(const uint8_t *ref, const int ref_offset,
440 const int ref_stride, const uint8_t *tgt,
441 const int tgt_offset,
442 const int tgt_stride, const int height,
443 const int width,
444 const int is_high_bitdepth,
445 uint32_t *square_diff) {
446 const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
447 const uint16_t *tgt16 = CONVERT_TO_SHORTPTR(tgt);
448
449 int ref_idx = 0;
450 int tgt_idx = 0;
451 int idx = 0;
452 for (int i = 0; i < height; ++i) {
453 for (int j = 0; j < width; ++j) {
454 const uint16_t ref_value = is_high_bitdepth ? ref16[ref_offset + ref_idx]
455 : ref[ref_offset + ref_idx];
456 const uint16_t tgt_value = is_high_bitdepth ? tgt16[tgt_offset + tgt_idx]
457 : tgt[tgt_offset + tgt_idx];
458 const uint32_t diff = (ref_value > tgt_value) ? (ref_value - tgt_value)
459 : (tgt_value - ref_value);
460 square_diff[idx] = diff * diff;
461
462 ++ref_idx;
463 ++tgt_idx;
464 ++idx;
465 }
466 ref_idx += (ref_stride - width);
467 tgt_idx += (tgt_stride - width);
468 }
469 }
470
471 // Function to adjust the filter weight when use YUV strategy.
472 // Inputs:
473 // filter_weight: Original filter weight.
474 // sum_square_diff: Sum of squared difference between input frame and
475 // prediction. This field is computed pixel by pixel, and
476 // is used as a reference for the filter weight adjustment.
477 // num_ref_pixels: Number of pixels used to compute the `sum_square_diff`.
478 // This field should align with the above lookup tables
479 // `filter_weight_adjustment_lookup_table_yuv` and
480 // `highbd_filter_weight_adjustment_lookup_table_yuv`.
481 // strength: Strength for filter weight adjustment.
482 // Returns:
483 // Adjusted filter weight which will finally be used for filtering.
adjust_filter_weight_yuv(const int filter_weight,const uint64_t sum_square_diff,const int num_ref_pixels,const int strength)484 static INLINE int adjust_filter_weight_yuv(const int filter_weight,
485 const uint64_t sum_square_diff,
486 const int num_ref_pixels,
487 const int strength) {
488 int modifier =
489 (int)(AOMMIN(sum_square_diff * TF_YUV_FILTER_WEIGHT_SCALE, INT32_MAX)) /
490 num_ref_pixels;
491 const int rounding = (1 << strength) >> 1;
492 modifier = (modifier + rounding) >> strength;
493 return (modifier >= 16) ? 0 : (16 - modifier) * filter_weight;
494 }
495
496 // Applies temporal filter with YUV strategy.
497 // Inputs:
498 // frame_to_filter: Pointer to the frame to be filtered, which is used as
499 // reference to compute squared differece from the predictor.
500 // mbd: Pointer to the block for filtering, which is ONLY used to get
501 // subsampling information of all YUV planes.
502 // block_size: Size of the block.
503 // mb_row: Row index of the block in the entire frame.
504 // mb_col: Column index of the block in the entire frame.
505 // num_planes: Number of planes in the frame.
506 // strength: Strength for filter weight adjustment.
507 // use_subblock: Whether to use 4 sub-blocks to replace the original block.
508 // subblock_filter_weights: The filter weights for each sub-block (row-major
509 // order). If `use_subblock` is set as 0, the first
510 // weight will be applied to the entire block.
511 // pred: Pointer to the well-built predictors.
512 // accum: Pointer to the pixel-wise accumulator for filtering.
513 // count: Pointer to the pixel-wise counter fot filtering.
514 // Returns:
515 // Nothing will be returned. But the content to which `accum` and `pred`
516 // point will be modified.
av1_apply_temporal_filter_yuv_c(const YV12_BUFFER_CONFIG * frame_to_filter,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const int strength,const int use_subblock,const int * subblock_filter_weights,const uint8_t * pred,uint32_t * accum,uint16_t * count)517 void av1_apply_temporal_filter_yuv_c(
518 const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
519 const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
520 const int num_planes, const int strength, const int use_subblock,
521 const int *subblock_filter_weights, const uint8_t *pred, uint32_t *accum,
522 uint16_t *count) {
523 assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
524
525 // Block information.
526 const int mb_height = block_size_high[block_size];
527 const int mb_width = block_size_wide[block_size];
528 const int mb_pels = mb_height * mb_width;
529 const int is_high_bitdepth = is_frame_high_bitdepth(frame_to_filter);
530 const uint16_t *pred16 = CONVERT_TO_SHORTPTR(pred);
531
532 // Allocate memory for pixel-wise squared differences for all planes. They,
533 // regardless of the subsampling, are assigned with memory of size `mb_pels`.
534 uint32_t *square_diff =
535 aom_memalign(16, num_planes * mb_pels * sizeof(uint32_t));
536 memset(square_diff, 0, num_planes * mb_pels * sizeof(square_diff[0]));
537
538 int plane_offset = 0;
539 for (int plane = 0; plane < num_planes; ++plane) {
540 // Locate pixel on reference frame.
541 const int plane_h = mb_height >> mbd->plane[plane].subsampling_y;
542 const int plane_w = mb_width >> mbd->plane[plane].subsampling_x;
543 const int frame_stride = frame_to_filter->strides[plane == 0 ? 0 : 1];
544 const int frame_offset = mb_row * plane_h * frame_stride + mb_col * plane_w;
545 const uint8_t *ref = frame_to_filter->buffers[plane];
546 compute_square_diff(ref, frame_offset, frame_stride, pred, plane_offset,
547 plane_w, plane_h, plane_w, is_high_bitdepth,
548 square_diff + plane_offset);
549 plane_offset += mb_pels;
550 }
551
552 // Get window size for pixel-wise filtering.
553 assert(TF_YUV_FILTER_WINDOW_LENGTH % 2 == 1);
554 const int half_window = TF_YUV_FILTER_WINDOW_LENGTH >> 1;
555
556 // Handle planes in sequence.
557 plane_offset = 0;
558 for (int plane = 0; plane < num_planes; ++plane) {
559 const int subsampling_y = mbd->plane[plane].subsampling_y;
560 const int subsampling_x = mbd->plane[plane].subsampling_x;
561 const int h = mb_height >> subsampling_y; // Plane height.
562 const int w = mb_width >> subsampling_x; // Plane width.
563
564 // Perform filtering.
565 int pred_idx = 0;
566 for (int i = 0; i < h; ++i) {
567 for (int j = 0; j < w; ++j) {
568 // non-local mean approach
569 uint64_t sum_square_diff = 0;
570 int num_ref_pixels = 0;
571
572 for (int wi = -half_window; wi <= half_window; ++wi) {
573 for (int wj = -half_window; wj <= half_window; ++wj) {
574 const int y = i + wi; // Y-coord on the current plane.
575 const int x = j + wj; // X-coord on the current plane.
576 if (y >= 0 && y < h && x >= 0 && x < w) {
577 sum_square_diff += square_diff[plane_offset + y * w + x];
578 ++num_ref_pixels;
579 }
580 }
581 }
582
583 if (plane == 0) { // Filter Y-plane using both U-plane and V-plane.
584 for (int p = 1; p < num_planes; ++p) {
585 const int ss_y_shift = mbd->plane[p].subsampling_y - subsampling_y;
586 const int ss_x_shift = mbd->plane[p].subsampling_x - subsampling_x;
587 const int yy = i >> ss_y_shift; // Y-coord on UV-plane.
588 const int xx = j >> ss_x_shift; // X-coord on UV-plane.
589 const int ww = w >> ss_x_shift; // Width of UV-plane.
590 sum_square_diff += square_diff[p * mb_pels + yy * ww + xx];
591 ++num_ref_pixels;
592 }
593 } else { // Filter U-plane and V-plane using Y-plane.
594 const int ss_y_shift = subsampling_y - mbd->plane[0].subsampling_y;
595 const int ss_x_shift = subsampling_x - mbd->plane[0].subsampling_x;
596 for (int ii = 0; ii < (1 << ss_y_shift); ++ii) {
597 for (int jj = 0; jj < (1 << ss_x_shift); ++jj) {
598 const int yy = (i << ss_y_shift) + ii; // Y-coord on Y-plane.
599 const int xx = (j << ss_x_shift) + jj; // X-coord on Y-plane.
600 const int ww = w << ss_x_shift; // Width of Y-plane.
601 sum_square_diff += square_diff[yy * ww + xx];
602 ++num_ref_pixels;
603 }
604 }
605 }
606
607 // Base filter weight estimated by motion search error.
608 const int subblock_idx =
609 use_subblock ? (i >= h / 2) * 2 + (j >= w / 2) : 0;
610 const int filter_weight = subblock_filter_weights[subblock_idx];
611
612 const int idx = plane_offset + pred_idx; // Index with plane shift.
613 const int pred_value = is_high_bitdepth ? pred16[idx] : pred[idx];
614 const int adjusted_weight = adjust_filter_weight_yuv(
615 filter_weight, sum_square_diff, num_ref_pixels, strength);
616 accum[idx] += adjusted_weight * pred_value;
617 count[idx] += adjusted_weight;
618
619 ++pred_idx;
620 }
621 }
622 plane_offset += mb_pels;
623 }
624
625 aom_free(square_diff);
626 }
627
628 // Applies temporal filter with plane-wise strategy.
629 // The strategy of filter weight adjustment is different from the function
630 // `av1_apply_temporal_filter_yuv_c()`.
631 // Inputs:
632 // frame_to_filter: Pointer to the frame to be filtered, which is used as
633 // reference to compute squared differece from the predictor.
634 // mbd: Pointer to the block for filtering, which is ONLY used to get
635 // subsampling information of all planes.
636 // block_size: Size of the block.
637 // mb_row: Row index of the block in the entire frame.
638 // mb_col: Column index of the block in the entire frame.
639 // num_planes: Number of planes in the frame.
640 // noise_levels: Pointer to the noise levels of the to-filter frame, estimated
641 // with each plane (in Y, U, V order).
642 // use_subblock: Whether to use 4 sub-blocks to replace the original block.
643 // block_mse: Motion search error (MSE) for the entire block.
644 // subblock_mses: Pointer to the search errors (MSE) for 4 sub-blocks.
645 // q_factor: Quantization factor. This is actually the `q` defined in libaom,
646 // which is converted from `qindex`.
647 // pred: Pointer to the well-built predictors.
648 // accum: Pointer to the pixel-wise accumulator for filtering.
649 // count: Pointer to the pixel-wise counter fot filtering.
650 // Returns:
651 // Nothing will be returned. But the content to which `accum` and `pred`
652 // point will be modified.
av1_apply_temporal_filter_planewise_c(const YV12_BUFFER_CONFIG * frame_to_filter,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const double * noise_levels,const int use_subblock,const int block_mse,const int * subblock_mses,const int q_factor,const uint8_t * pred,uint32_t * accum,uint16_t * count)653 void av1_apply_temporal_filter_planewise_c(
654 const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
655 const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
656 const int num_planes, const double *noise_levels, const int use_subblock,
657 const int block_mse, const int *subblock_mses, const int q_factor,
658 const uint8_t *pred, uint32_t *accum, uint16_t *count) {
659 assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
660
661 // Block information.
662 const int mb_height = block_size_high[block_size];
663 const int mb_width = block_size_wide[block_size];
664 const int mb_pels = mb_height * mb_width;
665 const int is_high_bitdepth = is_frame_high_bitdepth(frame_to_filter);
666 const uint16_t *pred16 = CONVERT_TO_SHORTPTR(pred);
667
668 // Allocate memory for pixel-wise squared differences for all planes. They,
669 // regardless of the subsampling, are assigned with memory of size `mb_pels`.
670 uint32_t *square_diff =
671 aom_memalign(16, num_planes * mb_pels * sizeof(uint32_t));
672 memset(square_diff, 0, num_planes * mb_pels * sizeof(square_diff[0]));
673
674 int plane_offset = 0;
675 for (int plane = 0; plane < num_planes; ++plane) {
676 // Locate pixel on reference frame.
677 const int plane_h = mb_height >> mbd->plane[plane].subsampling_y;
678 const int plane_w = mb_width >> mbd->plane[plane].subsampling_x;
679 const int frame_stride = frame_to_filter->strides[plane == 0 ? 0 : 1];
680 const int frame_offset = mb_row * plane_h * frame_stride + mb_col * plane_w;
681 const uint8_t *ref = frame_to_filter->buffers[plane];
682 compute_square_diff(ref, frame_offset, frame_stride, pred, plane_offset,
683 plane_w, plane_h, plane_w, is_high_bitdepth,
684 square_diff + plane_offset);
685 plane_offset += mb_pels;
686 }
687
688 // Get window size for pixel-wise filtering.
689 assert(TF_PLANEWISE_FILTER_WINDOW_LENGTH % 2 == 1);
690 const int half_window = TF_PLANEWISE_FILTER_WINDOW_LENGTH >> 1;
691
692 // Hyper-parameter for filter weight adjustment.
693 const int frame_height = frame_to_filter->heights[0]
694 << mbd->plane[0].subsampling_y;
695 const int decay_control = frame_height >= 720 ? 4 : 3;
696
697 // Handle planes in sequence.
698 plane_offset = 0;
699 for (int plane = 0; plane < num_planes; ++plane) {
700 const int subsampling_y = mbd->plane[plane].subsampling_y;
701 const int subsampling_x = mbd->plane[plane].subsampling_x;
702 const int h = mb_height >> subsampling_y; // Plane height.
703 const int w = mb_width >> subsampling_x; // Plane width.
704
705 // Perform filtering.
706 int pred_idx = 0;
707 for (int i = 0; i < h; ++i) {
708 for (int j = 0; j < w; ++j) {
709 // non-local mean approach
710 uint64_t sum_square_diff = 0;
711 int num_ref_pixels = 0;
712
713 for (int wi = -half_window; wi <= half_window; ++wi) {
714 for (int wj = -half_window; wj <= half_window; ++wj) {
715 const int y = CLIP(i + wi, 0, h - 1); // Y-coord on current plane.
716 const int x = CLIP(j + wj, 0, w - 1); // X-coord on current plane.
717 sum_square_diff += square_diff[plane_offset + y * w + x];
718 ++num_ref_pixels;
719 }
720 }
721
722 // Filter U-plane and V-plane using Y-plane. This is because motion
723 // search is only done on Y-plane, so the information from Y-plane will
724 // be more accurate.
725 if (plane != 0) {
726 const int ss_y_shift = subsampling_y - mbd->plane[0].subsampling_y;
727 const int ss_x_shift = subsampling_x - mbd->plane[0].subsampling_x;
728 for (int ii = 0; ii < (1 << ss_y_shift); ++ii) {
729 for (int jj = 0; jj < (1 << ss_x_shift); ++jj) {
730 const int yy = (i << ss_y_shift) + ii; // Y-coord on Y-plane.
731 const int xx = (j << ss_x_shift) + jj; // X-coord on Y-plane.
732 const int ww = w << ss_x_shift; // Width of Y-plane.
733 sum_square_diff += square_diff[yy * ww + xx];
734 ++num_ref_pixels;
735 }
736 }
737 }
738
739 // Scale down the difference for high bit depth input.
740 if (mbd->bd > 8) sum_square_diff >>= (mbd->bd - 8) * (mbd->bd - 8);
741 const double window_error = (double)(sum_square_diff) / num_ref_pixels;
742 const int subblock_idx = (i >= h / 2) * 2 + (j >= w / 2);
743 const double block_error =
744 (double)(use_subblock ? subblock_mses[subblock_idx] : block_mse);
745
746 // Control factor for non-local mean approach.
747 const double r =
748 (double)decay_control * (0.7 + log(noise_levels[plane] + 1.0));
749 const double q = AOMMIN((double)(q_factor * q_factor) / 256.0, 1);
750
751 // Compute filter weight.
752 const double scaled_diff =
753 AOMMAX(-(window_error + block_error / 10) / (2 * r * r * q), -15.0);
754 const int adjusted_weight =
755 (int)(exp(scaled_diff) * TF_PLANEWISE_FILTER_WEIGHT_SCALE);
756
757 const int idx = plane_offset + pred_idx; // Index with plane shift.
758 const int pred_value = is_high_bitdepth ? pred16[idx] : pred[idx];
759 accum[idx] += adjusted_weight * pred_value;
760 count[idx] += adjusted_weight;
761
762 ++pred_idx;
763 }
764 }
765 plane_offset += mb_pels;
766 }
767
768 aom_free(square_diff);
769 }
770
771 // Computes temporal filter weights and accumulators from all reference frames
772 // excluding the current frame to be filtered.
773 // Inputs:
774 // frame_to_filter: Pointer to the frame to be filtered, which is used as
775 // reference to compute squared differece from the predictor.
776 // mbd: Pointer to the block for filtering, which is ONLY used to get
777 // subsampling information of all planes and the bit-depth.
778 // block_size: Size of the block.
779 // mb_row: Row index of the block in the entire frame.
780 // mb_col: Column index of the block in the entire frame.
781 // num_planes: Number of planes in the frame.
782 // strength: Strength for filter weight adjustment. (Used in YUV strategy)
783 // use_subblock: Whether to use 4 sub-blocks to replace the original block.
784 // (Used in YUV strategy)
785 // subblock_filter_weights: The filter weights for each sub-block (row-major
786 // order). If `use_subblock` is set as 0, the first
787 // weight will be applied to the entire block. (Used
788 // in YUV strategy)
789 // noise_levels: Pointer to the noise levels of the to-filter frame, estimated
790 // with each plane (in Y, U, V order). (Used in plane-wise
791 // strategy)
792 // block_mse: Motion search error (MSE) for the entire block.
793 // subblock_mses: Pointer to the search errors (MSE) for 4 sub-blocks.
794 // q_factor: Quantization factor.
795 // pred: Pointer to the well-built predictors.
796 // accum: Pointer to the pixel-wise accumulator for filtering.
797 // count: Pointer to the pixel-wise counter fot filtering.
798 // Returns:
799 // Nothing will be returned. But the content to which `accum` and `pred`
800 // point will be modified.
av1_apply_temporal_filter_others(const YV12_BUFFER_CONFIG * frame_to_filter,const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const int strength,const int use_subblock,const int * subblock_filter_weights,const double * noise_levels,const int block_mse,const int * subblock_mses,const int q_factor,const uint8_t * pred,uint32_t * accum,uint16_t * count)801 void av1_apply_temporal_filter_others(
802 const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
803 const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
804 const int num_planes, const int strength, const int use_subblock,
805 const int *subblock_filter_weights, const double *noise_levels,
806 const int block_mse, const int *subblock_mses, const int q_factor,
807 const uint8_t *pred, uint32_t *accum, uint16_t *count) {
808 assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
809
810 if (TF_ENABLE_PLANEWISE_STRATEGY) {
811 // TODO(any): avx2 and sse2 version should be changed to align with C
812 // function before using.
813 if (is_frame_high_bitdepth(frame_to_filter) || block_size != BLOCK_32X32) {
814 av1_apply_temporal_filter_planewise_c(
815 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
816 noise_levels, use_subblock, block_mse, subblock_mses, q_factor, pred,
817 accum, count);
818 } else {
819 av1_apply_temporal_filter_planewise(
820 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
821 noise_levels, use_subblock, block_mse, subblock_mses, q_factor, pred,
822 accum, count);
823 }
824 } else { // Commonly used for low-resolution video.
825 if (subblock_filter_weights[0] == 0 && subblock_filter_weights[1] == 0 &&
826 subblock_filter_weights[2] == 0 && subblock_filter_weights[3] == 0) {
827 return;
828 }
829 const int adj_strength = strength + 2 * (mbd->bd - 8);
830 if (num_planes == 3 && TF_YUV_FILTER_WEIGHT_SCALE == 3 &&
831 block_size != BLOCK_32X32) {
832 av1_apply_temporal_filter_yuv(frame_to_filter, mbd, block_size, mb_row,
833 mb_col, num_planes, adj_strength,
834 use_subblock, subblock_filter_weights, pred,
835 accum, count);
836 } else {
837 // TODO(any): sse4 version should be changed to align with C function
838 // before using.
839 av1_apply_temporal_filter_yuv_c(frame_to_filter, mbd, block_size, mb_row,
840 mb_col, num_planes, adj_strength,
841 use_subblock, subblock_filter_weights,
842 pred, accum, count);
843 }
844 }
845 }
846
847 // Normalizes the accumulated filtering result to produce the filtered frame.
848 // Inputs:
849 // mbd: Pointer to the block for filtering, which is ONLY used to get
850 // subsampling information of all planes.
851 // block_size: Size of the block.
852 // mb_row: Row index of the block in the entire frame.
853 // mb_col: Column index of the block in the entire frame.
854 // num_planes: Number of planes in the frame.
855 // accum: Pointer to the pre-computed accumulator.
856 // count: Pointer to the pre-computed count.
857 // result_buffer: Pointer to result buffer.
858 // Returns:
859 // Nothing will be returned. But the content to which `result_buffer` point
860 // will be modified.
tf_normalize_filtered_frame(const MACROBLOCKD * mbd,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,const int num_planes,const uint32_t * accum,const uint16_t * count,YV12_BUFFER_CONFIG * result_buffer)861 static void tf_normalize_filtered_frame(
862 const MACROBLOCKD *mbd, const BLOCK_SIZE block_size, const int mb_row,
863 const int mb_col, const int num_planes, const uint32_t *accum,
864 const uint16_t *count, YV12_BUFFER_CONFIG *result_buffer) {
865 assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
866
867 // Block information.
868 const int mb_height = block_size_high[block_size];
869 const int mb_width = block_size_wide[block_size];
870 const int mb_pels = mb_height * mb_width;
871 const int is_high_bitdepth = is_frame_high_bitdepth(result_buffer);
872
873 int plane_offset = 0;
874 for (int plane = 0; plane < num_planes; ++plane) {
875 const int plane_h = mb_height >> mbd->plane[plane].subsampling_y;
876 const int plane_w = mb_width >> mbd->plane[plane].subsampling_x;
877 const int frame_stride = result_buffer->strides[plane == 0 ? 0 : 1];
878 const int frame_offset = mb_row * plane_h * frame_stride + mb_col * plane_w;
879 uint8_t *const buf = result_buffer->buffers[plane];
880 uint16_t *const buf16 = CONVERT_TO_SHORTPTR(buf);
881
882 int plane_idx = 0; // Pixel index on current plane (block-base).
883 int frame_idx = frame_offset; // Pixel index on the entire frame.
884 for (int i = 0; i < plane_h; ++i) {
885 for (int j = 0; j < plane_w; ++j) {
886 const int idx = plane_idx + plane_offset;
887 const uint16_t rounding = count[idx] >> 1;
888 if (is_high_bitdepth) {
889 buf16[frame_idx] =
890 (uint16_t)OD_DIVU(accum[idx] + rounding, count[idx]);
891 } else {
892 buf[frame_idx] = (uint8_t)OD_DIVU(accum[idx] + rounding, count[idx]);
893 }
894 ++plane_idx;
895 ++frame_idx;
896 }
897 frame_idx += (frame_stride - plane_w);
898 }
899 plane_offset += mb_pels;
900 }
901 }
902
903 // Helper function to compute number of blocks on either side of the frame.
get_num_blocks(const int frame_length,const int mb_length)904 static INLINE int get_num_blocks(const int frame_length, const int mb_length) {
905 return (frame_length + mb_length - 1) / mb_length;
906 }
907
908 typedef struct {
909 int64_t sum;
910 int64_t sse;
911 } FRAME_DIFF;
912
913 // Does temporal filter for a particular frame.
914 // Inputs:
915 // cpi: Pointer to the composed information of input video.
916 // frames: Frame buffers used for temporal filtering.
917 // num_frames: Number of frames in the frame buffer.
918 // filter_frame_idx: Index of the frame to be filtered.
919 // is_key_frame: Whether the to-filter is a key frame.
920 // is_second_arf: Whether the to-filter frame is the second ARF. This field
921 // is ONLY used for assigning filter weight.
922 // block_size: Block size used for temporal filtering.
923 // scale: Scaling factor.
924 // strength: Pre-estimated strength for filter weight adjustment.
925 // noise_levels: Pointer to the noise levels of the to-filter frame, estimated
926 // with each plane (in Y, U, V order).
927 // Returns:
928 // Difference between filtered frame and the original frame.
tf_do_filtering(AV1_COMP * cpi,YV12_BUFFER_CONFIG ** frames,const int num_frames,const int filter_frame_idx,const int is_key_frame,const int is_second_arf,const BLOCK_SIZE block_size,const struct scale_factors * scale,const int strength,const double * noise_levels)929 static FRAME_DIFF tf_do_filtering(
930 AV1_COMP *cpi, YV12_BUFFER_CONFIG **frames, const int num_frames,
931 const int filter_frame_idx, const int is_key_frame, const int is_second_arf,
932 const BLOCK_SIZE block_size, const struct scale_factors *scale,
933 const int strength, const double *noise_levels) {
934 // Basic information.
935 const YV12_BUFFER_CONFIG *const frame_to_filter = frames[filter_frame_idx];
936 const int frame_height = frame_to_filter->y_crop_height;
937 const int frame_width = frame_to_filter->y_crop_width;
938 const int mb_height = block_size_high[block_size];
939 const int mb_width = block_size_wide[block_size];
940 const int mb_pels = mb_height * mb_width;
941 const int mb_rows = get_num_blocks(frame_height, mb_height);
942 const int mb_cols = get_num_blocks(frame_width, mb_width);
943 const int num_planes = av1_num_planes(&cpi->common);
944 const int mi_h = mi_size_high_log2[block_size];
945 const int mi_w = mi_size_wide_log2[block_size];
946 assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
947 const int is_high_bitdepth = is_frame_high_bitdepth(frame_to_filter);
948
949 // Save input state.
950 MACROBLOCK *const mb = &cpi->td.mb;
951 MACROBLOCKD *const mbd = &mb->e_mbd;
952 uint8_t *input_buffer[MAX_MB_PLANE];
953 for (int i = 0; i < num_planes; i++) {
954 input_buffer[i] = mbd->plane[i].pre[0].buf;
955 }
956 MB_MODE_INFO **input_mb_mode_info = mbd->mi;
957
958 // Setup.
959 mbd->block_ref_scale_factors[0] = scale;
960 mbd->block_ref_scale_factors[1] = scale;
961 // A temporary block info used to store state in temporal filtering process.
962 MB_MODE_INFO *tmp_mb_mode_info = (MB_MODE_INFO *)malloc(sizeof(MB_MODE_INFO));
963 memset(tmp_mb_mode_info, 0, sizeof(MB_MODE_INFO));
964 mbd->mi = &tmp_mb_mode_info;
965 mbd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
966 // Allocate memory for predictor, accumulator and count.
967 uint8_t *pred8 = aom_memalign(32, num_planes * mb_pels * sizeof(uint8_t));
968 uint16_t *pred16 = aom_memalign(32, num_planes * mb_pels * sizeof(uint16_t));
969 uint32_t *accum = aom_memalign(16, num_planes * mb_pels * sizeof(uint32_t));
970 uint16_t *count = aom_memalign(16, num_planes * mb_pels * sizeof(uint16_t));
971 memset(pred8, 0, num_planes * mb_pels * sizeof(pred8[0]));
972 memset(pred16, 0, num_planes * mb_pels * sizeof(pred16[0]));
973 uint8_t *const pred = is_high_bitdepth ? CONVERT_TO_BYTEPTR(pred16) : pred8;
974
975 // Do filtering.
976 FRAME_DIFF diff = { 0, 0 };
977 // Perform temporal filtering block by block.
978 for (int mb_row = 0; mb_row < mb_rows; mb_row++) {
979 av1_set_mv_row_limits(&cpi->common.mi_params, &mb->mv_limits,
980 (mb_row << mi_h), (mb_height >> MI_SIZE_LOG2),
981 cpi->oxcf.border_in_pixels);
982 for (int mb_col = 0; mb_col < mb_cols; mb_col++) {
983 av1_set_mv_col_limits(&cpi->common.mi_params, &mb->mv_limits,
984 (mb_col << mi_w), (mb_width >> MI_SIZE_LOG2),
985 cpi->oxcf.border_in_pixels);
986 memset(accum, 0, num_planes * mb_pels * sizeof(accum[0]));
987 memset(count, 0, num_planes * mb_pels * sizeof(count[0]));
988 MV ref_mv = kZeroMv; // Reference motion vector passed down along frames.
989 // Perform temporal filtering frame by frame.
990 for (int frame = 0; frame < num_frames; frame++) {
991 if (frames[frame] == NULL) continue;
992
993 // Motion search.
994 MV subblock_mvs[4] = { kZeroMv, kZeroMv, kZeroMv, kZeroMv };
995 int subblock_filter_weights[4] = { 0, 0, 0, 0 };
996 int block_mse = INT_MAX;
997 int subblock_mses[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
998
999 if (frame == filter_frame_idx) { // Frame to be filtered.
1000 // Set motion vector as 0 for the frame to be filtered.
1001 mbd->mi[0]->mv[0].as_mv = kZeroMv;
1002 // Change ref_mv sign for following frames.
1003 ref_mv.row *= -1;
1004 ref_mv.col *= -1;
1005 } else { // Other reference frames.
1006 block_mse = tf_motion_search(cpi, frame_to_filter, frames[frame],
1007 block_size, mb_row, mb_col, &ref_mv,
1008 subblock_mvs, subblock_mses);
1009 // Do not pass down the reference motion vector if error is too large.
1010 const int thresh = AOMMIN(frame_height, frame_width) >= 720 ? 12 : 3;
1011 if (block_mse > (thresh << (mbd->bd - 8))) {
1012 ref_mv = kZeroMv;
1013 }
1014 }
1015
1016 // Build predictor.
1017 int use_subblock = tf_get_filter_weight(
1018 block_mse, subblock_mses, is_second_arf, subblock_filter_weights);
1019 tf_build_predictor(frames[frame], mbd, block_size, mb_row, mb_col,
1020 num_planes, scale, use_subblock, subblock_mvs, pred);
1021
1022 // Perform weighted averaging.
1023 if (frame == filter_frame_idx) { // Frame to be filtered.
1024 av1_apply_temporal_filter_self(mbd, block_size, num_planes,
1025 subblock_filter_weights[0], pred,
1026 accum, count);
1027 } else { // Other reference frames.
1028 const FRAME_TYPE frame_type =
1029 (cpi->common.current_frame.frame_number > 1) ? INTER_FRAME
1030 : KEY_FRAME;
1031 const int q_factor =
1032 (int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[frame_type],
1033 cpi->common.seq_params.bit_depth);
1034 av1_apply_temporal_filter_others(
1035 frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
1036 strength, use_subblock, subblock_filter_weights, noise_levels,
1037 block_mse, subblock_mses, q_factor, pred, accum, count);
1038 }
1039 }
1040
1041 tf_normalize_filtered_frame(mbd, block_size, mb_row, mb_col, num_planes,
1042 accum, count, &cpi->alt_ref_buffer);
1043
1044 if (!is_key_frame && cpi->sf.hl_sf.adaptive_overlay_encoding) {
1045 const int y_height = mb_height >> mbd->plane[0].subsampling_y;
1046 const int y_width = mb_width >> mbd->plane[0].subsampling_x;
1047 const int source_y_stride = frame_to_filter->y_stride;
1048 const int filter_y_stride = cpi->alt_ref_buffer.y_stride;
1049 const int source_offset =
1050 mb_row * y_height * source_y_stride + mb_col * y_width;
1051 const int filter_offset =
1052 mb_row * y_height * filter_y_stride + mb_col * y_width;
1053 unsigned int sse = 0;
1054 cpi->fn_ptr[block_size].vf(frame_to_filter->y_buffer + source_offset,
1055 source_y_stride,
1056 cpi->alt_ref_buffer.y_buffer + filter_offset,
1057 filter_y_stride, &sse);
1058 diff.sum += sse;
1059 diff.sse += sse * sse;
1060 }
1061 }
1062 }
1063
1064 // Restore input state
1065 for (int i = 0; i < num_planes; i++) {
1066 mbd->plane[i].pre[0].buf = input_buffer[i];
1067 }
1068 mbd->mi = input_mb_mode_info;
1069
1070 free(tmp_mb_mode_info);
1071 aom_free(pred8);
1072 aom_free(pred16);
1073 aom_free(accum);
1074 aom_free(count);
1075
1076 return diff;
1077 }
1078
1079 // A constant number, sqrt(pi / 2), used for noise estimation.
1080 static const double SQRT_PI_BY_2 = 1.25331413732;
1081
av1_estimate_noise_from_single_plane(const YV12_BUFFER_CONFIG * frame,const int plane,const int bit_depth)1082 double av1_estimate_noise_from_single_plane(const YV12_BUFFER_CONFIG *frame,
1083 const int plane,
1084 const int bit_depth) {
1085 const int is_y_plane = (plane == 0);
1086 const int height = frame->crop_heights[is_y_plane ? 0 : 1];
1087 const int width = frame->crop_widths[is_y_plane ? 0 : 1];
1088 const int stride = frame->strides[is_y_plane ? 0 : 1];
1089 const uint8_t *src = frame->buffers[plane];
1090 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
1091 const int is_high_bitdepth = is_frame_high_bitdepth(frame);
1092
1093 int64_t accum = 0;
1094 int count = 0;
1095 for (int i = 1; i < height - 1; ++i) {
1096 for (int j = 1; j < width - 1; ++j) {
1097 // Setup a small 3x3 matrix.
1098 const int center_idx = i * stride + j;
1099 int mat[3][3];
1100 for (int ii = -1; ii <= 1; ++ii) {
1101 for (int jj = -1; jj <= 1; ++jj) {
1102 const int idx = center_idx + ii * stride + jj;
1103 mat[ii + 1][jj + 1] = is_high_bitdepth ? src16[idx] : src[idx];
1104 }
1105 }
1106 // Compute sobel gradients.
1107 const int Gx = (mat[0][0] - mat[0][2]) + (mat[2][0] - mat[2][2]) +
1108 2 * (mat[1][0] - mat[1][2]);
1109 const int Gy = (mat[0][0] - mat[2][0]) + (mat[0][2] - mat[2][2]) +
1110 2 * (mat[0][1] - mat[2][1]);
1111 const int Ga = ROUND_POWER_OF_TWO(abs(Gx) + abs(Gy), bit_depth - 8);
1112 // Accumulate Laplacian.
1113 if (Ga < NOISE_ESTIMATION_EDGE_THRESHOLD) { // Only count smooth pixels.
1114 const int v = 4 * mat[1][1] -
1115 2 * (mat[0][1] + mat[2][1] + mat[1][0] + mat[1][2]) +
1116 (mat[0][0] + mat[0][2] + mat[2][0] + mat[2][2]);
1117 accum += ROUND_POWER_OF_TWO(abs(v), bit_depth - 8);
1118 ++count;
1119 }
1120 }
1121 }
1122
1123 // Return -1.0 (unreliable estimation) if there are too few smooth pixels.
1124 return (count < 16) ? -1.0 : (double)accum / (6 * count) * SQRT_PI_BY_2;
1125 }
1126
1127 // Estimates the strength for filter weight adjustment, which is used in YUV
1128 // strategy. This estimation is based on the pre-estimated noise level of the
1129 // to-filter frame.
1130 // Inputs:
1131 // cpi: Pointer to the composed information of input video.
1132 // noise_level: Noise level of the to-filter frame, estimated with Y-plane.
1133 // group_boost: Boost level for the current group of frames.
1134 // Returns:
1135 // Estimated strength which will be used for filter weight adjustment.
tf_estimate_strength(const AV1_COMP * cpi,const double noise_level,const int group_boost)1136 static int tf_estimate_strength(const AV1_COMP *cpi, const double noise_level,
1137 const int group_boost) {
1138 int strength = cpi->oxcf.arnr_strength;
1139
1140 // Adjust the strength based on the estimated noise level.
1141 if (noise_level > 0) { // Adjust when the noise level is reliable.
1142 if (noise_level < 0.75) { // Noise level lies in range (0, 0.75).
1143 strength = strength - 2;
1144 } else if (noise_level < 1.75) { // Noise level lies in range [0.75, 1.75).
1145 strength = strength - 1;
1146 } else if (noise_level < 4.0) { // Noise level lies in range [1.75, 4.0).
1147 strength = strength + 0;
1148 } else { // Noise level lies in range [4.0, +inf).
1149 strength = strength + 1;
1150 }
1151 }
1152
1153 // Adjust the strength based on active max q.
1154 const FRAME_TYPE frame_type =
1155 (cpi->common.current_frame.frame_number > 1) ? INTER_FRAME : KEY_FRAME;
1156 const int q = (int)av1_convert_qindex_to_q(
1157 cpi->rc.avg_frame_qindex[frame_type], cpi->common.seq_params.bit_depth);
1158 strength = strength - AOMMAX(0, (16 - q) / 2);
1159
1160 return CLIP(strength, 0, group_boost / 300);
1161 }
1162
1163 // Setups the frame buffer for temporal filtering. Basically, this fuction
1164 // determines how many frames will be used for temporal filtering and then
1165 // groups them into a buffer.
1166 // Inputs:
1167 // cpi: Pointer to the composed information of input video.
1168 // filter_frame_lookahead_idx: The index of the to-filter frame in the
1169 // lookahead buffer `cpi->lookahead`.
1170 // is_second_arf: Whether the to-filter frame is the second ARF. This field
1171 // will affect the number of frames used for filtering.
1172 // frames: Pointer to the frame buffer to setup.
1173 // num_frames_for_filtering: Number of frames used for filtering.
1174 // filter_frame_idx: Index of the to-filter frame in the setup frame buffer.
1175 // Returns:
1176 // Nothing will be returned. But the frame buffer `frames`, number of frames
1177 // in the buffer `num_frames_for_filtering`, and the index of the to-filter
1178 // frame in the buffer `filter_frame_idx` will be updated in this function.
tf_setup_filtering_buffer(const AV1_COMP * cpi,const int filter_frame_lookahead_idx,const int is_second_arf,YV12_BUFFER_CONFIG ** frames,int * num_frames_for_filtering,int * filter_frame_idx)1179 static void tf_setup_filtering_buffer(const AV1_COMP *cpi,
1180 const int filter_frame_lookahead_idx,
1181 const int is_second_arf,
1182 YV12_BUFFER_CONFIG **frames,
1183 int *num_frames_for_filtering,
1184 int *filter_frame_idx) {
1185 int num_frames = 0; // Number of frames used for filtering.
1186 int num_frames_before = -1; // Number of frames before the to-filter frame.
1187 int filter_frame_offset;
1188
1189 if (filter_frame_lookahead_idx == -1) { // Key frame.
1190 num_frames = TF_NUM_FILTERING_FRAMES_FOR_KEY_FRAME;
1191 num_frames_before = 0;
1192 filter_frame_offset = filter_frame_lookahead_idx;
1193 } else if (filter_frame_lookahead_idx < -1) { // Key frame in one-pass mode.
1194 num_frames = TF_NUM_FILTERING_FRAMES_FOR_KEY_FRAME;
1195 num_frames_before = num_frames - 1;
1196 filter_frame_offset = -filter_frame_lookahead_idx;
1197 } else {
1198 num_frames = cpi->oxcf.arnr_max_frames;
1199 if (is_second_arf) { // Only use 2 neighbours for the second ARF.
1200 num_frames = AOMMIN(num_frames, 3);
1201 }
1202 if (num_frames > cpi->rc.gfu_boost / 150) {
1203 num_frames = cpi->rc.gfu_boost / 150;
1204 num_frames += !(num_frames & 1);
1205 }
1206 num_frames_before = AOMMIN(num_frames >> 1, filter_frame_lookahead_idx + 1);
1207 const int lookahead_depth =
1208 av1_lookahead_depth(cpi->lookahead, cpi->compressor_stage);
1209 const int num_frames_after =
1210 AOMMIN((num_frames - 1) >> 1,
1211 lookahead_depth - filter_frame_lookahead_idx - 1);
1212 num_frames = num_frames_before + 1 + num_frames_after;
1213 filter_frame_offset = filter_frame_lookahead_idx;
1214 }
1215 *num_frames_for_filtering = num_frames;
1216 *filter_frame_idx = num_frames_before;
1217
1218 // Setup the frame buffer.
1219 for (int frame = 0; frame < num_frames; ++frame) {
1220 const int lookahead_idx = frame - num_frames_before + filter_frame_offset;
1221 struct lookahead_entry *buf = av1_lookahead_peek(
1222 cpi->lookahead, lookahead_idx, cpi->compressor_stage);
1223 frames[frame] = (buf == NULL) ? NULL : &buf->img;
1224 }
1225 }
1226
av1_temporal_filter(AV1_COMP * cpi,const int filter_frame_lookahead_idx,int * show_existing_arf)1227 int av1_temporal_filter(AV1_COMP *cpi, const int filter_frame_lookahead_idx,
1228 int *show_existing_arf) {
1229 // Basic informaton of the current frame.
1230 const GF_GROUP *const gf_group = &cpi->gf_group;
1231 const uint8_t group_idx = gf_group->index;
1232 const FRAME_UPDATE_TYPE update_type = gf_group->update_type[group_idx];
1233 // Filter one more ARF if the lookahead index is leq 7 (w.r.t. 9-th frame).
1234 // This frame is ALWAYS a show existing frame.
1235 const int is_second_arf = (update_type == INTNL_ARF_UPDATE) &&
1236 (filter_frame_lookahead_idx >= 7) &&
1237 cpi->sf.hl_sf.second_alt_ref_filtering;
1238 // TODO(anyone): Currently, we enforce the filtering strength on internal
1239 // ARFs except the second ARF to be zero. We should investigate in which case
1240 // it is more beneficial to use non-zero strength filtering.
1241 if (update_type == INTNL_ARF_UPDATE && !is_second_arf) {
1242 return 0;
1243 }
1244
1245 // TODO(yunqing): For INTNL_ARF_UPDATE type, the following me initialization
1246 // is used somewhere unexpectedly. Should be resolved later.
1247 // Initialize errorperbit, sadperbit16 and sadperbit4.
1248 const int rdmult = av1_compute_rd_mult_based_on_qindex(cpi, TF_QINDEX);
1249 set_error_per_bit(&cpi->td.mb, rdmult);
1250 av1_initialize_me_consts(cpi, &cpi->td.mb, TF_QINDEX);
1251 av1_fill_mv_costs(cpi->common.fc,
1252 cpi->common.features.cur_frame_force_integer_mv,
1253 cpi->common.features.allow_high_precision_mv, &cpi->td.mb);
1254
1255 // Setup frame buffer for filtering.
1256 YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
1257 int num_frames_for_filtering = 0;
1258 int filter_frame_idx = -1;
1259 tf_setup_filtering_buffer(cpi, filter_frame_lookahead_idx, is_second_arf,
1260 frames, &num_frames_for_filtering,
1261 &filter_frame_idx);
1262
1263 // Estimate noise and strength.
1264 const int bit_depth = cpi->common.seq_params.bit_depth;
1265 const int num_planes = av1_num_planes(&cpi->common);
1266 double noise_levels[MAX_MB_PLANE] = { 0 };
1267 for (int plane = 0; plane < num_planes; ++plane) {
1268 noise_levels[plane] = av1_estimate_noise_from_single_plane(
1269 frames[filter_frame_idx], plane, bit_depth);
1270 }
1271 const int strength =
1272 tf_estimate_strength(cpi, noise_levels[0], cpi->rc.gfu_boost);
1273 if (filter_frame_lookahead_idx >= 0) {
1274 cpi->common.showable_frame =
1275 (strength == 0 && num_frames_for_filtering == 1) || is_second_arf ||
1276 (cpi->oxcf.enable_overlay == 0 || cpi->sf.hl_sf.disable_overlay_frames);
1277 }
1278
1279 // Do filtering.
1280 const int is_key_frame = (filter_frame_lookahead_idx < 0);
1281 FRAME_DIFF diff = { 0, 0 };
1282 if (num_frames_for_filtering > 0 && frames[0] != NULL) {
1283 // Setup scaling factors. Scaling on each of the arnr frames is not
1284 // supported.
1285 // ARF is produced at the native frame size and resized when coded.
1286 struct scale_factors sf;
1287 av1_setup_scale_factors_for_frame(
1288 &sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
1289 frames[0]->y_crop_width, frames[0]->y_crop_height);
1290 diff = tf_do_filtering(cpi, frames, num_frames_for_filtering,
1291 filter_frame_idx, is_key_frame, is_second_arf,
1292 TF_BLOCK_SIZE, &sf, strength, noise_levels);
1293 }
1294
1295 if (is_key_frame) { // Key frame should always be filtered.
1296 return 1;
1297 }
1298
1299 if ((show_existing_arf != NULL && cpi->sf.hl_sf.adaptive_overlay_encoding) ||
1300 is_second_arf) {
1301 const int frame_height = frames[filter_frame_idx]->y_crop_height;
1302 const int frame_width = frames[filter_frame_idx]->y_crop_width;
1303 const int block_height = block_size_high[TF_BLOCK_SIZE];
1304 const int block_width = block_size_wide[TF_BLOCK_SIZE];
1305 const int mb_rows = get_num_blocks(frame_height, block_height);
1306 const int mb_cols = get_num_blocks(frame_width, block_width);
1307 const int num_mbs = AOMMAX(1, mb_rows * mb_cols);
1308 const float mean = (float)diff.sum / num_mbs;
1309 const float std = (float)sqrt((float)diff.sse / num_mbs - mean * mean);
1310
1311 aom_clear_system_state();
1312 // TODO(yunqing): This can be combined with TPL q calculation later.
1313 cpi->rc.base_frame_target = gf_group->bit_allocation[group_idx];
1314 av1_set_target_rate(cpi, cpi->common.width, cpi->common.height);
1315 int top_index = 0;
1316 int bottom_index = 0;
1317 const int q = av1_rc_pick_q_and_bounds(cpi, &cpi->rc, cpi->oxcf.width,
1318 cpi->oxcf.height, group_idx,
1319 &bottom_index, &top_index);
1320 const int ac_q = av1_ac_quant_QTX(q, 0, bit_depth);
1321 const float threshold = 0.7f * ac_q * ac_q;
1322
1323 if (!is_second_arf) {
1324 *show_existing_arf = 0;
1325 if (mean < threshold && std < mean * 1.2) {
1326 *show_existing_arf = 1;
1327 }
1328 cpi->common.showable_frame |= *show_existing_arf;
1329 } else {
1330 // Use source frame if the filtered frame becomes very different.
1331 if (!(mean < threshold && std < mean * 1.2)) {
1332 return 0;
1333 }
1334 }
1335 }
1336
1337 return 1;
1338 }
1339