1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "av1/encoder/tune_vmaf.h"
13
14 #include "aom_dsp/psnr.h"
15 #include "av1/encoder/extend.h"
16 #include "av1/encoder/rdopt.h"
17 #include "config/aom_scale_rtcd.h"
18
19 static const double kBaselineVmaf = 97.42773;
20
get_layer_value(const double * array,int layer)21 static double get_layer_value(const double *array, int layer) {
22 while (array[layer] < 0.0 && layer > 0) layer--;
23 return AOMMAX(array[layer], 0.0);
24 }
25
motion_search(AV1_COMP * cpi,const YV12_BUFFER_CONFIG * src,const YV12_BUFFER_CONFIG * ref,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,FULLPEL_MV * ref_mv)26 static void motion_search(AV1_COMP *cpi, const YV12_BUFFER_CONFIG *src,
27 const YV12_BUFFER_CONFIG *ref,
28 const BLOCK_SIZE block_size, const int mb_row,
29 const int mb_col, FULLPEL_MV *ref_mv) {
30 // Block information (ONLY Y-plane is used for motion search).
31 const int mb_height = block_size_high[block_size];
32 const int mb_width = block_size_wide[block_size];
33 const int y_stride = src->y_stride;
34 assert(y_stride == ref->y_stride);
35 const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
36
37 // Save input state.
38 MACROBLOCK *const mb = &cpi->td.mb;
39 MACROBLOCKD *const mbd = &mb->e_mbd;
40 const struct buf_2d ori_src_buf = mb->plane[0].src;
41 const struct buf_2d ori_pre_buf = mbd->plane[0].pre[0];
42
43 // Parameters used for motion search.
44 FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
45 const SEARCH_METHODS search_method = NSTEP;
46 const search_site_config *search_site_cfg =
47 cpi->mv_search_params.search_site_cfg[SS_CFG_FPF];
48 const int step_param =
49 av1_init_search_range(AOMMAX(src->y_crop_width, src->y_crop_height));
50
51 // Baseline position for motion search (used for rate distortion comparison).
52 const MV baseline_mv = kZeroMv;
53
54 // Setup.
55 mb->plane[0].src.buf = src->y_buffer + y_offset;
56 mb->plane[0].src.stride = y_stride;
57 mbd->plane[0].pre[0].buf = ref->y_buffer + y_offset;
58 mbd->plane[0].pre[0].stride = y_stride;
59
60 // Unused intermediate results for motion search.
61 int cost_list[5];
62
63 // Do motion search.
64 // Only do full search on the entire block.
65 av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
66 &baseline_mv, search_site_cfg,
67 /*fine_search_interval=*/0);
68 av1_set_mv_search_method(&full_ms_params, search_site_cfg, search_method);
69 av1_full_pixel_search(*ref_mv, &full_ms_params, step_param,
70 cond_cost_list(cpi, cost_list), ref_mv, NULL);
71
72 // Restore input state.
73 mb->plane[0].src = ori_src_buf;
74 mbd->plane[0].pre[0] = ori_pre_buf;
75 }
76
residual_variance(const AV1_COMP * cpi,const YV12_BUFFER_CONFIG * src,const YV12_BUFFER_CONFIG * ref,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,FULLPEL_MV ref_mv,unsigned int * sse)77 static unsigned int residual_variance(const AV1_COMP *cpi,
78 const YV12_BUFFER_CONFIG *src,
79 const YV12_BUFFER_CONFIG *ref,
80 const BLOCK_SIZE block_size,
81 const int mb_row, const int mb_col,
82 FULLPEL_MV ref_mv, unsigned int *sse) {
83 const int mb_height = block_size_high[block_size];
84 const int mb_width = block_size_wide[block_size];
85 const int y_stride = src->y_stride;
86 assert(y_stride == ref->y_stride);
87 const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
88 const int mv_offset = ref_mv.row * y_stride + ref_mv.col;
89 const unsigned int var = cpi->ppi->fn_ptr[block_size].vf(
90 ref->y_buffer + y_offset + mv_offset, y_stride, src->y_buffer + y_offset,
91 y_stride, sse);
92 return var;
93 }
94
frame_average_variance(const AV1_COMP * const cpi,const YV12_BUFFER_CONFIG * const frame)95 static double frame_average_variance(const AV1_COMP *const cpi,
96 const YV12_BUFFER_CONFIG *const frame) {
97 const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
98 const uint8_t *const y_buffer = frame->y_buffer;
99 const int y_stride = frame->y_stride;
100 const BLOCK_SIZE block_size = BLOCK_64X64;
101
102 const int block_w = mi_size_wide[block_size] * 4;
103 const int block_h = mi_size_high[block_size] * 4;
104 int row, col;
105 double var = 0.0, var_count = 0.0;
106 const int use_hbd = frame->flags & YV12_FLAG_HIGHBITDEPTH;
107
108 // Loop through each block.
109 for (row = 0; row < frame->y_height / block_h; ++row) {
110 for (col = 0; col < frame->y_width / block_w; ++col) {
111 struct buf_2d buf;
112 const int row_offset_y = row * block_h;
113 const int col_offset_y = col * block_w;
114
115 buf.buf = (uint8_t *)y_buffer + row_offset_y * y_stride + col_offset_y;
116 buf.stride = y_stride;
117
118 var += av1_get_perpixel_variance(cpi, xd, &buf, block_size, AOM_PLANE_Y,
119 use_hbd);
120 var_count += 1.0;
121 }
122 }
123 var /= var_count;
124 return var;
125 }
126
residual_frame_average_variance(AV1_COMP * cpi,const YV12_BUFFER_CONFIG * src,const YV12_BUFFER_CONFIG * ref,FULLPEL_MV * mvs)127 static double residual_frame_average_variance(AV1_COMP *cpi,
128 const YV12_BUFFER_CONFIG *src,
129 const YV12_BUFFER_CONFIG *ref,
130 FULLPEL_MV *mvs) {
131 if (ref == NULL) return frame_average_variance(cpi, src);
132 const BLOCK_SIZE block_size = BLOCK_16X16;
133 const int frame_height = src->y_height;
134 const int frame_width = src->y_width;
135 const int mb_height = block_size_high[block_size];
136 const int mb_width = block_size_wide[block_size];
137 const int mb_rows = (frame_height + mb_height - 1) / mb_height;
138 const int mb_cols = (frame_width + mb_width - 1) / mb_width;
139 const int num_planes = av1_num_planes(&cpi->common);
140 const int mi_h = mi_size_high_log2[block_size];
141 const int mi_w = mi_size_wide_log2[block_size];
142 assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
143
144 // Save input state.
145 MACROBLOCK *const mb = &cpi->td.mb;
146 MACROBLOCKD *const mbd = &mb->e_mbd;
147 uint8_t *input_buffer[MAX_MB_PLANE];
148 for (int i = 0; i < num_planes; i++) {
149 input_buffer[i] = mbd->plane[i].pre[0].buf;
150 }
151 MB_MODE_INFO **input_mb_mode_info = mbd->mi;
152
153 bool do_motion_search = false;
154 if (mvs == NULL) {
155 do_motion_search = true;
156 CHECK_MEM_ERROR(&cpi->common, mvs,
157 (FULLPEL_MV *)aom_calloc(mb_rows * mb_cols, sizeof(*mvs)));
158 }
159
160 unsigned int variance = 0;
161 // Perform temporal filtering block by block.
162 for (int mb_row = 0; mb_row < mb_rows; mb_row++) {
163 av1_set_mv_row_limits(&cpi->common.mi_params, &mb->mv_limits,
164 (mb_row << mi_h), (mb_height >> MI_SIZE_LOG2),
165 cpi->oxcf.border_in_pixels);
166 for (int mb_col = 0; mb_col < mb_cols; mb_col++) {
167 av1_set_mv_col_limits(&cpi->common.mi_params, &mb->mv_limits,
168 (mb_col << mi_w), (mb_width >> MI_SIZE_LOG2),
169 cpi->oxcf.border_in_pixels);
170 FULLPEL_MV *ref_mv = &mvs[mb_col + mb_row * mb_cols];
171 if (do_motion_search) {
172 motion_search(cpi, src, ref, block_size, mb_row, mb_col, ref_mv);
173 }
174 unsigned int mv_sse;
175 const unsigned int blk_var = residual_variance(
176 cpi, src, ref, block_size, mb_row, mb_col, *ref_mv, &mv_sse);
177 variance += blk_var;
178 }
179 }
180
181 // Restore input state
182 for (int i = 0; i < num_planes; i++) {
183 mbd->plane[i].pre[0].buf = input_buffer[i];
184 }
185 mbd->mi = input_mb_mode_info;
186 return (double)variance / (double)(mb_rows * mb_cols);
187 }
188
189 // TODO(sdeng): Add the SIMD implementation.
highbd_unsharp_rect(const uint16_t * source,int source_stride,const uint16_t * blurred,int blurred_stride,uint16_t * dst,int dst_stride,int w,int h,double amount,int bit_depth)190 static AOM_INLINE void highbd_unsharp_rect(const uint16_t *source,
191 int source_stride,
192 const uint16_t *blurred,
193 int blurred_stride, uint16_t *dst,
194 int dst_stride, int w, int h,
195 double amount, int bit_depth) {
196 const int max_value = (1 << bit_depth) - 1;
197 for (int i = 0; i < h; ++i) {
198 for (int j = 0; j < w; ++j) {
199 const double val =
200 (double)source[j] + amount * ((double)source[j] - (double)blurred[j]);
201 dst[j] = (uint16_t)clamp((int)(val + 0.5), 0, max_value);
202 }
203 source += source_stride;
204 blurred += blurred_stride;
205 dst += dst_stride;
206 }
207 }
208
unsharp_rect(const uint8_t * source,int source_stride,const uint8_t * blurred,int blurred_stride,uint8_t * dst,int dst_stride,int w,int h,double amount)209 static AOM_INLINE void unsharp_rect(const uint8_t *source, int source_stride,
210 const uint8_t *blurred, int blurred_stride,
211 uint8_t *dst, int dst_stride, int w, int h,
212 double amount) {
213 for (int i = 0; i < h; ++i) {
214 for (int j = 0; j < w; ++j) {
215 const double val =
216 (double)source[j] + amount * ((double)source[j] - (double)blurred[j]);
217 dst[j] = (uint8_t)clamp((int)(val + 0.5), 0, 255);
218 }
219 source += source_stride;
220 blurred += blurred_stride;
221 dst += dst_stride;
222 }
223 }
224
unsharp(const AV1_COMP * const cpi,const YV12_BUFFER_CONFIG * source,const YV12_BUFFER_CONFIG * blurred,const YV12_BUFFER_CONFIG * dst,double amount)225 static AOM_INLINE void unsharp(const AV1_COMP *const cpi,
226 const YV12_BUFFER_CONFIG *source,
227 const YV12_BUFFER_CONFIG *blurred,
228 const YV12_BUFFER_CONFIG *dst, double amount) {
229 const int bit_depth = cpi->td.mb.e_mbd.bd;
230 if (cpi->common.seq_params->use_highbitdepth) {
231 assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
232 assert(blurred->flags & YV12_FLAG_HIGHBITDEPTH);
233 assert(dst->flags & YV12_FLAG_HIGHBITDEPTH);
234 highbd_unsharp_rect(CONVERT_TO_SHORTPTR(source->y_buffer), source->y_stride,
235 CONVERT_TO_SHORTPTR(blurred->y_buffer),
236 blurred->y_stride, CONVERT_TO_SHORTPTR(dst->y_buffer),
237 dst->y_stride, source->y_width, source->y_height,
238 amount, bit_depth);
239 } else {
240 unsharp_rect(source->y_buffer, source->y_stride, blurred->y_buffer,
241 blurred->y_stride, dst->y_buffer, dst->y_stride,
242 source->y_width, source->y_height, amount);
243 }
244 }
245
246 // 8-tap Gaussian convolution filter with sigma = 1.0, sums to 128,
247 // all co-efficients must be even.
248 DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 0, 8, 30, 52,
249 30, 8, 0, 0 };
gaussian_blur(const int bit_depth,const YV12_BUFFER_CONFIG * source,const YV12_BUFFER_CONFIG * dst)250 static AOM_INLINE void gaussian_blur(const int bit_depth,
251 const YV12_BUFFER_CONFIG *source,
252 const YV12_BUFFER_CONFIG *dst) {
253 const int block_size = BLOCK_128X128;
254 const int block_w = mi_size_wide[block_size] * 4;
255 const int block_h = mi_size_high[block_size] * 4;
256 const int num_cols = (source->y_width + block_w - 1) / block_w;
257 const int num_rows = (source->y_height + block_h - 1) / block_h;
258 int row, col;
259
260 ConvolveParams conv_params = get_conv_params(0, 0, bit_depth);
261 InterpFilterParams filter = { .filter_ptr = gauss_filter,
262 .taps = 8,
263 .interp_filter = EIGHTTAP_REGULAR };
264
265 for (row = 0; row < num_rows; ++row) {
266 for (col = 0; col < num_cols; ++col) {
267 const int row_offset_y = row * block_h;
268 const int col_offset_y = col * block_w;
269
270 uint8_t *src_buf =
271 source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
272 uint8_t *dst_buf =
273 dst->y_buffer + row_offset_y * dst->y_stride + col_offset_y;
274
275 if (source->flags & YV12_FLAG_HIGHBITDEPTH) {
276 av1_highbd_convolve_2d_sr(
277 CONVERT_TO_SHORTPTR(src_buf), source->y_stride,
278 CONVERT_TO_SHORTPTR(dst_buf), dst->y_stride, block_w, block_h,
279 &filter, &filter, 0, 0, &conv_params, bit_depth);
280 } else {
281 av1_convolve_2d_sr(src_buf, source->y_stride, dst_buf, dst->y_stride,
282 block_w, block_h, &filter, &filter, 0, 0,
283 &conv_params);
284 }
285 }
286 }
287 }
288
cal_approx_vmaf(const AV1_COMP * const cpi,double source_variance,YV12_BUFFER_CONFIG * const source,YV12_BUFFER_CONFIG * const sharpened)289 static AOM_INLINE double cal_approx_vmaf(const AV1_COMP *const cpi,
290 double source_variance,
291 YV12_BUFFER_CONFIG *const source,
292 YV12_BUFFER_CONFIG *const sharpened) {
293 const int bit_depth = cpi->td.mb.e_mbd.bd;
294 const bool cal_vmaf_neg =
295 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
296 double new_vmaf;
297
298 aom_calc_vmaf(cpi->vmaf_info.vmaf_model, source, sharpened, bit_depth,
299 cal_vmaf_neg, &new_vmaf);
300
301 const double sharpened_var = frame_average_variance(cpi, sharpened);
302 return source_variance / sharpened_var * (new_vmaf - kBaselineVmaf);
303 }
304
find_best_frame_unsharp_amount_loop(const AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source,YV12_BUFFER_CONFIG * const blurred,YV12_BUFFER_CONFIG * const sharpened,double best_vmaf,const double baseline_variance,const double unsharp_amount_start,const double step_size,const int max_loop_count,const double max_amount)305 static double find_best_frame_unsharp_amount_loop(
306 const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source,
307 YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened,
308 double best_vmaf, const double baseline_variance,
309 const double unsharp_amount_start, const double step_size,
310 const int max_loop_count, const double max_amount) {
311 const double min_amount = 0.0;
312 int loop_count = 0;
313 double approx_vmaf = best_vmaf;
314 double unsharp_amount = unsharp_amount_start;
315 do {
316 best_vmaf = approx_vmaf;
317 unsharp_amount += step_size;
318 if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
319 unsharp(cpi, source, blurred, sharpened, unsharp_amount);
320 approx_vmaf = cal_approx_vmaf(cpi, baseline_variance, source, sharpened);
321
322 loop_count++;
323 } while (approx_vmaf > best_vmaf && loop_count < max_loop_count);
324 unsharp_amount =
325 approx_vmaf > best_vmaf ? unsharp_amount : unsharp_amount - step_size;
326 return AOMMIN(max_amount, AOMMAX(unsharp_amount, min_amount));
327 }
328
find_best_frame_unsharp_amount(const AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source,YV12_BUFFER_CONFIG * const blurred,const double unsharp_amount_start,const double step_size,const int max_loop_count,const double max_filter_amount)329 static double find_best_frame_unsharp_amount(const AV1_COMP *const cpi,
330 YV12_BUFFER_CONFIG *const source,
331 YV12_BUFFER_CONFIG *const blurred,
332 const double unsharp_amount_start,
333 const double step_size,
334 const int max_loop_count,
335 const double max_filter_amount) {
336 const AV1_COMMON *const cm = &cpi->common;
337 const int width = source->y_width;
338 const int height = source->y_height;
339 YV12_BUFFER_CONFIG sharpened;
340 memset(&sharpened, 0, sizeof(sharpened));
341 aom_alloc_frame_buffer(
342 &sharpened, width, height, source->subsampling_x, source->subsampling_y,
343 cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
344 cm->features.byte_alignment, 0);
345
346 const double baseline_variance = frame_average_variance(cpi, source);
347 double unsharp_amount;
348 if (unsharp_amount_start <= step_size) {
349 unsharp_amount = find_best_frame_unsharp_amount_loop(
350 cpi, source, blurred, &sharpened, 0.0, baseline_variance, 0.0,
351 step_size, max_loop_count, max_filter_amount);
352 } else {
353 double a0 = unsharp_amount_start - step_size, a1 = unsharp_amount_start;
354 double v0, v1;
355 unsharp(cpi, source, blurred, &sharpened, a0);
356 v0 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
357 unsharp(cpi, source, blurred, &sharpened, a1);
358 v1 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
359 if (fabs(v0 - v1) < 0.01) {
360 unsharp_amount = a0;
361 } else if (v0 > v1) {
362 unsharp_amount = find_best_frame_unsharp_amount_loop(
363 cpi, source, blurred, &sharpened, v0, baseline_variance, a0,
364 -step_size, max_loop_count, max_filter_amount);
365 } else {
366 unsharp_amount = find_best_frame_unsharp_amount_loop(
367 cpi, source, blurred, &sharpened, v1, baseline_variance, a1,
368 step_size, max_loop_count, max_filter_amount);
369 }
370 }
371
372 aom_free_frame_buffer(&sharpened);
373 return unsharp_amount;
374 }
375
av1_vmaf_neg_preprocessing(AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source)376 void av1_vmaf_neg_preprocessing(AV1_COMP *const cpi,
377 YV12_BUFFER_CONFIG *const source) {
378 const AV1_COMMON *const cm = &cpi->common;
379 const int bit_depth = cpi->td.mb.e_mbd.bd;
380 const int width = source->y_width;
381 const int height = source->y_height;
382
383 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
384 const int layer_depth =
385 AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
386 const double best_frame_unsharp_amount =
387 get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
388
389 if (best_frame_unsharp_amount <= 0.0) return;
390
391 YV12_BUFFER_CONFIG blurred;
392 memset(&blurred, 0, sizeof(blurred));
393 aom_alloc_frame_buffer(
394 &blurred, width, height, source->subsampling_x, source->subsampling_y,
395 cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
396 cm->features.byte_alignment, 0);
397
398 gaussian_blur(bit_depth, source, &blurred);
399 unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount);
400 aom_free_frame_buffer(&blurred);
401 }
402
av1_vmaf_frame_preprocessing(AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source)403 void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi,
404 YV12_BUFFER_CONFIG *const source) {
405 const AV1_COMMON *const cm = &cpi->common;
406 const int bit_depth = cpi->td.mb.e_mbd.bd;
407 const int width = source->y_width;
408 const int height = source->y_height;
409
410 YV12_BUFFER_CONFIG source_extended, blurred;
411 memset(&source_extended, 0, sizeof(source_extended));
412 memset(&blurred, 0, sizeof(blurred));
413 aom_alloc_frame_buffer(
414 &source_extended, width, height, source->subsampling_x,
415 source->subsampling_y, cm->seq_params->use_highbitdepth,
416 cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
417 aom_alloc_frame_buffer(
418 &blurred, width, height, source->subsampling_x, source->subsampling_y,
419 cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
420 cm->features.byte_alignment, 0);
421
422 av1_copy_and_extend_frame(source, &source_extended);
423 gaussian_blur(bit_depth, &source_extended, &blurred);
424 aom_free_frame_buffer(&source_extended);
425
426 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
427 const int layer_depth =
428 AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
429 const double last_frame_unsharp_amount =
430 get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
431
432 const double best_frame_unsharp_amount = find_best_frame_unsharp_amount(
433 cpi, source, &blurred, last_frame_unsharp_amount, 0.05, 20, 1.01);
434
435 cpi->vmaf_info.last_frame_unsharp_amount[layer_depth] =
436 best_frame_unsharp_amount;
437
438 unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount);
439 aom_free_frame_buffer(&blurred);
440 }
441
av1_vmaf_blk_preprocessing(AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source)442 void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
443 YV12_BUFFER_CONFIG *const source) {
444 const AV1_COMMON *const cm = &cpi->common;
445 const int width = source->y_width;
446 const int height = source->y_height;
447 const int bit_depth = cpi->td.mb.e_mbd.bd;
448 const int ss_x = source->subsampling_x;
449 const int ss_y = source->subsampling_y;
450
451 YV12_BUFFER_CONFIG source_extended, blurred;
452 memset(&blurred, 0, sizeof(blurred));
453 memset(&source_extended, 0, sizeof(source_extended));
454 aom_alloc_frame_buffer(
455 &blurred, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
456 cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
457 aom_alloc_frame_buffer(&source_extended, width, height, ss_x, ss_y,
458 cm->seq_params->use_highbitdepth,
459 cpi->oxcf.border_in_pixels,
460 cm->features.byte_alignment, 0);
461
462 av1_copy_and_extend_frame(source, &source_extended);
463 gaussian_blur(bit_depth, &source_extended, &blurred);
464 aom_free_frame_buffer(&source_extended);
465
466 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
467 const int layer_depth =
468 AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
469 const double last_frame_unsharp_amount =
470 get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
471
472 const double best_frame_unsharp_amount = find_best_frame_unsharp_amount(
473 cpi, source, &blurred, last_frame_unsharp_amount, 0.05, 20, 1.01);
474
475 cpi->vmaf_info.last_frame_unsharp_amount[layer_depth] =
476 best_frame_unsharp_amount;
477
478 const int block_size = BLOCK_64X64;
479 const int block_w = mi_size_wide[block_size] * 4;
480 const int block_h = mi_size_high[block_size] * 4;
481 const int num_cols = (source->y_width + block_w - 1) / block_w;
482 const int num_rows = (source->y_height + block_h - 1) / block_h;
483 double *best_unsharp_amounts =
484 aom_calloc(num_cols * num_rows, sizeof(*best_unsharp_amounts));
485 if (!best_unsharp_amounts) {
486 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
487 "Error allocating vmaf data");
488 }
489
490 YV12_BUFFER_CONFIG source_block, blurred_block;
491 memset(&source_block, 0, sizeof(source_block));
492 memset(&blurred_block, 0, sizeof(blurred_block));
493 aom_alloc_frame_buffer(&source_block, block_w, block_h, ss_x, ss_y,
494 cm->seq_params->use_highbitdepth,
495 cpi->oxcf.border_in_pixels,
496 cm->features.byte_alignment, 0);
497 aom_alloc_frame_buffer(&blurred_block, block_w, block_h, ss_x, ss_y,
498 cm->seq_params->use_highbitdepth,
499 cpi->oxcf.border_in_pixels,
500 cm->features.byte_alignment, 0);
501
502 for (int row = 0; row < num_rows; ++row) {
503 for (int col = 0; col < num_cols; ++col) {
504 const int row_offset_y = row * block_h;
505 const int col_offset_y = col * block_w;
506 const int block_width = AOMMIN(width - col_offset_y, block_w);
507 const int block_height = AOMMIN(height - row_offset_y, block_h);
508 const int index = col + row * num_cols;
509
510 if (cm->seq_params->use_highbitdepth) {
511 assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
512 assert(blurred.flags & YV12_FLAG_HIGHBITDEPTH);
513 uint16_t *frame_src_buf = CONVERT_TO_SHORTPTR(source->y_buffer) +
514 row_offset_y * source->y_stride +
515 col_offset_y;
516 uint16_t *frame_blurred_buf = CONVERT_TO_SHORTPTR(blurred.y_buffer) +
517 row_offset_y * blurred.y_stride +
518 col_offset_y;
519 uint16_t *blurred_dst = CONVERT_TO_SHORTPTR(blurred_block.y_buffer);
520 uint16_t *src_dst = CONVERT_TO_SHORTPTR(source_block.y_buffer);
521
522 // Copy block from source frame.
523 for (int i = 0; i < block_h; ++i) {
524 for (int j = 0; j < block_w; ++j) {
525 if (i >= block_height || j >= block_width) {
526 src_dst[j] = 0;
527 blurred_dst[j] = 0;
528 } else {
529 src_dst[j] = frame_src_buf[j];
530 blurred_dst[j] = frame_blurred_buf[j];
531 }
532 }
533 frame_src_buf += source->y_stride;
534 frame_blurred_buf += blurred.y_stride;
535 src_dst += source_block.y_stride;
536 blurred_dst += blurred_block.y_stride;
537 }
538 } else {
539 uint8_t *frame_src_buf =
540 source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
541 uint8_t *frame_blurred_buf =
542 blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
543 uint8_t *blurred_dst = blurred_block.y_buffer;
544 uint8_t *src_dst = source_block.y_buffer;
545
546 // Copy block from source frame.
547 for (int i = 0; i < block_h; ++i) {
548 for (int j = 0; j < block_w; ++j) {
549 if (i >= block_height || j >= block_width) {
550 src_dst[j] = 0;
551 blurred_dst[j] = 0;
552 } else {
553 src_dst[j] = frame_src_buf[j];
554 blurred_dst[j] = frame_blurred_buf[j];
555 }
556 }
557 frame_src_buf += source->y_stride;
558 frame_blurred_buf += blurred.y_stride;
559 src_dst += source_block.y_stride;
560 blurred_dst += blurred_block.y_stride;
561 }
562 }
563
564 best_unsharp_amounts[index] = find_best_frame_unsharp_amount(
565 cpi, &source_block, &blurred_block, best_frame_unsharp_amount, 0.1, 3,
566 1.5);
567 }
568 }
569
570 // Apply best blur amounts
571 for (int row = 0; row < num_rows; ++row) {
572 for (int col = 0; col < num_cols; ++col) {
573 const int row_offset_y = row * block_h;
574 const int col_offset_y = col * block_w;
575 const int block_width = AOMMIN(source->y_width - col_offset_y, block_w);
576 const int block_height = AOMMIN(source->y_height - row_offset_y, block_h);
577 const int index = col + row * num_cols;
578
579 if (cm->seq_params->use_highbitdepth) {
580 assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
581 assert(blurred.flags & YV12_FLAG_HIGHBITDEPTH);
582 uint16_t *src_buf = CONVERT_TO_SHORTPTR(source->y_buffer) +
583 row_offset_y * source->y_stride + col_offset_y;
584 uint16_t *blurred_buf = CONVERT_TO_SHORTPTR(blurred.y_buffer) +
585 row_offset_y * blurred.y_stride + col_offset_y;
586 highbd_unsharp_rect(src_buf, source->y_stride, blurred_buf,
587 blurred.y_stride, src_buf, source->y_stride,
588 block_width, block_height,
589 best_unsharp_amounts[index], bit_depth);
590 } else {
591 uint8_t *src_buf =
592 source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
593 uint8_t *blurred_buf =
594 blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
595 unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
596 src_buf, source->y_stride, block_width, block_height,
597 best_unsharp_amounts[index]);
598 }
599 }
600 }
601
602 aom_free_frame_buffer(&source_block);
603 aom_free_frame_buffer(&blurred_block);
604 aom_free_frame_buffer(&blurred);
605 aom_free(best_unsharp_amounts);
606 }
607
av1_set_mb_vmaf_rdmult_scaling(AV1_COMP * cpi)608 void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
609 AV1_COMMON *cm = &cpi->common;
610 const int y_width = cpi->source->y_width;
611 const int y_height = cpi->source->y_height;
612 const int resized_block_size = BLOCK_32X32;
613 const int resize_factor = 2;
614 const int bit_depth = cpi->td.mb.e_mbd.bd;
615 const int ss_x = cpi->source->subsampling_x;
616 const int ss_y = cpi->source->subsampling_y;
617
618 YV12_BUFFER_CONFIG resized_source;
619 memset(&resized_source, 0, sizeof(resized_source));
620 aom_alloc_frame_buffer(
621 &resized_source, y_width / resize_factor, y_height / resize_factor, ss_x,
622 ss_y, cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
623 cm->features.byte_alignment, 0);
624 av1_resize_and_extend_frame_nonnormative(cpi->source, &resized_source,
625 bit_depth, av1_num_planes(cm));
626
627 const int resized_y_width = resized_source.y_width;
628 const int resized_y_height = resized_source.y_height;
629 const int resized_block_w = mi_size_wide[resized_block_size] * 4;
630 const int resized_block_h = mi_size_high[resized_block_size] * 4;
631 const int num_cols =
632 (resized_y_width + resized_block_w - 1) / resized_block_w;
633 const int num_rows =
634 (resized_y_height + resized_block_h - 1) / resized_block_h;
635
636 YV12_BUFFER_CONFIG blurred;
637 memset(&blurred, 0, sizeof(blurred));
638 aom_alloc_frame_buffer(&blurred, resized_y_width, resized_y_height, ss_x,
639 ss_y, cm->seq_params->use_highbitdepth,
640 cpi->oxcf.border_in_pixels,
641 cm->features.byte_alignment, 0);
642 gaussian_blur(bit_depth, &resized_source, &blurred);
643
644 YV12_BUFFER_CONFIG recon;
645 memset(&recon, 0, sizeof(recon));
646 aom_alloc_frame_buffer(&recon, resized_y_width, resized_y_height, ss_x, ss_y,
647 cm->seq_params->use_highbitdepth,
648 cpi->oxcf.border_in_pixels,
649 cm->features.byte_alignment, 0);
650 aom_yv12_copy_frame(&resized_source, &recon, 1);
651
652 VmafContext *vmaf_context;
653 const bool cal_vmaf_neg =
654 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
655 aom_init_vmaf_context(&vmaf_context, cpi->vmaf_info.vmaf_model, cal_vmaf_neg);
656 unsigned int *sses = aom_calloc(num_rows * num_cols, sizeof(*sses));
657 if (!sses) {
658 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
659 "Error allocating vmaf data");
660 }
661
662 // Loop through each 'block_size' block.
663 for (int row = 0; row < num_rows; ++row) {
664 for (int col = 0; col < num_cols; ++col) {
665 const int index = row * num_cols + col;
666 const int row_offset_y = row * resized_block_h;
667 const int col_offset_y = col * resized_block_w;
668
669 uint8_t *const orig_buf = resized_source.y_buffer +
670 row_offset_y * resized_source.y_stride +
671 col_offset_y;
672 uint8_t *const blurred_buf =
673 blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
674
675 cpi->ppi->fn_ptr[resized_block_size].vf(orig_buf, resized_source.y_stride,
676 blurred_buf, blurred.y_stride,
677 &sses[index]);
678
679 uint8_t *const recon_buf =
680 recon.y_buffer + row_offset_y * recon.y_stride + col_offset_y;
681 // Set recon buf
682 if (cpi->common.seq_params->use_highbitdepth) {
683 highbd_unsharp_rect(CONVERT_TO_SHORTPTR(blurred_buf), blurred.y_stride,
684 CONVERT_TO_SHORTPTR(blurred_buf), blurred.y_stride,
685 CONVERT_TO_SHORTPTR(recon_buf), recon.y_stride,
686 resized_block_w, resized_block_h, 0.0, bit_depth);
687 } else {
688 unsharp_rect(blurred_buf, blurred.y_stride, blurred_buf,
689 blurred.y_stride, recon_buf, recon.y_stride,
690 resized_block_w, resized_block_h, 0.0);
691 }
692
693 aom_read_vmaf_image(vmaf_context, &resized_source, &recon, bit_depth,
694 index);
695
696 // Restore recon buf
697 if (cpi->common.seq_params->use_highbitdepth) {
698 highbd_unsharp_rect(
699 CONVERT_TO_SHORTPTR(orig_buf), resized_source.y_stride,
700 CONVERT_TO_SHORTPTR(orig_buf), resized_source.y_stride,
701 CONVERT_TO_SHORTPTR(recon_buf), recon.y_stride, resized_block_w,
702 resized_block_h, 0.0, bit_depth);
703 } else {
704 unsharp_rect(orig_buf, resized_source.y_stride, orig_buf,
705 resized_source.y_stride, recon_buf, recon.y_stride,
706 resized_block_w, resized_block_h, 0.0);
707 }
708 }
709 }
710 aom_flush_vmaf_context(vmaf_context);
711 for (int row = 0; row < num_rows; ++row) {
712 for (int col = 0; col < num_cols; ++col) {
713 const int index = row * num_cols + col;
714 const double vmaf = aom_calc_vmaf_at_index(
715 vmaf_context, cpi->vmaf_info.vmaf_model, index);
716 const double dvmaf = kBaselineVmaf - vmaf;
717
718 const double mse =
719 (double)sses[index] / (double)(resized_y_width * resized_y_height);
720 double weight;
721 const double eps = 0.01 / (num_rows * num_cols);
722 if (dvmaf < eps || mse < eps) {
723 weight = 1.0;
724 } else {
725 weight = mse / dvmaf;
726 }
727
728 // Normalize it with a data fitted model.
729 weight = 6.0 * (1.0 - exp(-0.05 * weight)) + 0.8;
730 cpi->vmaf_info.rdmult_scaling_factors[index] = weight;
731 }
732 }
733
734 aom_free_frame_buffer(&resized_source);
735 aom_free_frame_buffer(&blurred);
736 aom_close_vmaf_context(vmaf_context);
737 aom_free(sses);
738 }
739
av1_set_vmaf_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,const int mi_row,const int mi_col,int * const rdmult)740 void av1_set_vmaf_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
741 const BLOCK_SIZE bsize, const int mi_row,
742 const int mi_col, int *const rdmult) {
743 const AV1_COMMON *const cm = &cpi->common;
744
745 const int bsize_base = BLOCK_64X64;
746 const int num_mi_w = mi_size_wide[bsize_base];
747 const int num_mi_h = mi_size_high[bsize_base];
748 const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
749 const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
750 const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
751 const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
752 int row, col;
753 double num_of_mi = 0.0;
754 double geom_mean_of_scale = 0.0;
755
756 for (row = mi_row / num_mi_w;
757 row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
758 for (col = mi_col / num_mi_h;
759 col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
760 const int index = row * num_cols + col;
761 geom_mean_of_scale += log(cpi->vmaf_info.rdmult_scaling_factors[index]);
762 num_of_mi += 1.0;
763 }
764 }
765 geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi);
766
767 *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
768 *rdmult = AOMMAX(*rdmult, 0);
769 av1_set_error_per_bit(&x->errorperbit, *rdmult);
770 }
771
772 // TODO(sdeng): replace them with the SIMD versions.
highbd_image_sad_c(const uint16_t * src,int src_stride,const uint16_t * ref,int ref_stride,int w,int h)773 static AOM_INLINE double highbd_image_sad_c(const uint16_t *src, int src_stride,
774 const uint16_t *ref, int ref_stride,
775 int w, int h) {
776 double accum = 0.0;
777 int i, j;
778
779 for (i = 0; i < h; ++i) {
780 for (j = 0; j < w; ++j) {
781 double img1px = src[i * src_stride + j];
782 double img2px = ref[i * ref_stride + j];
783
784 accum += fabs(img1px - img2px);
785 }
786 }
787
788 return accum / (double)(h * w);
789 }
790
image_sad_c(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int w,int h)791 static AOM_INLINE double image_sad_c(const uint8_t *src, int src_stride,
792 const uint8_t *ref, int ref_stride, int w,
793 int h) {
794 double accum = 0.0;
795 int i, j;
796
797 for (i = 0; i < h; ++i) {
798 for (j = 0; j < w; ++j) {
799 double img1px = src[i * src_stride + j];
800 double img2px = ref[i * ref_stride + j];
801
802 accum += fabs(img1px - img2px);
803 }
804 }
805
806 return accum / (double)(h * w);
807 }
808
calc_vmaf_motion_score(const AV1_COMP * const cpi,const AV1_COMMON * const cm,const YV12_BUFFER_CONFIG * const cur,const YV12_BUFFER_CONFIG * const last,const YV12_BUFFER_CONFIG * const next)809 static double calc_vmaf_motion_score(const AV1_COMP *const cpi,
810 const AV1_COMMON *const cm,
811 const YV12_BUFFER_CONFIG *const cur,
812 const YV12_BUFFER_CONFIG *const last,
813 const YV12_BUFFER_CONFIG *const next) {
814 const int y_width = cur->y_width;
815 const int y_height = cur->y_height;
816 YV12_BUFFER_CONFIG blurred_cur, blurred_last, blurred_next;
817 const int bit_depth = cpi->td.mb.e_mbd.bd;
818 const int ss_x = cur->subsampling_x;
819 const int ss_y = cur->subsampling_y;
820
821 memset(&blurred_cur, 0, sizeof(blurred_cur));
822 memset(&blurred_last, 0, sizeof(blurred_last));
823 memset(&blurred_next, 0, sizeof(blurred_next));
824
825 aom_alloc_frame_buffer(&blurred_cur, y_width, y_height, ss_x, ss_y,
826 cm->seq_params->use_highbitdepth,
827 cpi->oxcf.border_in_pixels,
828 cm->features.byte_alignment, 0);
829 aom_alloc_frame_buffer(&blurred_last, y_width, y_height, ss_x, ss_y,
830 cm->seq_params->use_highbitdepth,
831 cpi->oxcf.border_in_pixels,
832 cm->features.byte_alignment, 0);
833 aom_alloc_frame_buffer(&blurred_next, y_width, y_height, ss_x, ss_y,
834 cm->seq_params->use_highbitdepth,
835 cpi->oxcf.border_in_pixels,
836 cm->features.byte_alignment, 0);
837
838 gaussian_blur(bit_depth, cur, &blurred_cur);
839 gaussian_blur(bit_depth, last, &blurred_last);
840 if (next) gaussian_blur(bit_depth, next, &blurred_next);
841
842 double motion1, motion2 = 65536.0;
843 if (cm->seq_params->use_highbitdepth) {
844 assert(blurred_cur.flags & YV12_FLAG_HIGHBITDEPTH);
845 assert(blurred_last.flags & YV12_FLAG_HIGHBITDEPTH);
846 const float scale_factor = 1.0f / (float)(1 << (bit_depth - 8));
847 motion1 = highbd_image_sad_c(CONVERT_TO_SHORTPTR(blurred_cur.y_buffer),
848 blurred_cur.y_stride,
849 CONVERT_TO_SHORTPTR(blurred_last.y_buffer),
850 blurred_last.y_stride, y_width, y_height) *
851 scale_factor;
852 if (next) {
853 assert(blurred_next.flags & YV12_FLAG_HIGHBITDEPTH);
854 motion2 = highbd_image_sad_c(CONVERT_TO_SHORTPTR(blurred_cur.y_buffer),
855 blurred_cur.y_stride,
856 CONVERT_TO_SHORTPTR(blurred_next.y_buffer),
857 blurred_next.y_stride, y_width, y_height) *
858 scale_factor;
859 }
860 } else {
861 motion1 = image_sad_c(blurred_cur.y_buffer, blurred_cur.y_stride,
862 blurred_last.y_buffer, blurred_last.y_stride, y_width,
863 y_height);
864 if (next) {
865 motion2 = image_sad_c(blurred_cur.y_buffer, blurred_cur.y_stride,
866 blurred_next.y_buffer, blurred_next.y_stride,
867 y_width, y_height);
868 }
869 }
870
871 aom_free_frame_buffer(&blurred_cur);
872 aom_free_frame_buffer(&blurred_last);
873 aom_free_frame_buffer(&blurred_next);
874
875 return AOMMIN(motion1, motion2);
876 }
877
get_neighbor_frames(const AV1_COMP * const cpi,YV12_BUFFER_CONFIG ** last,YV12_BUFFER_CONFIG ** next)878 static AOM_INLINE void get_neighbor_frames(const AV1_COMP *const cpi,
879 YV12_BUFFER_CONFIG **last,
880 YV12_BUFFER_CONFIG **next) {
881 const AV1_COMMON *const cm = &cpi->common;
882 const GF_GROUP *gf_group = &cpi->ppi->gf_group;
883 const int src_index =
884 cm->show_frame != 0 ? 0 : gf_group->arf_src_offset[cpi->gf_frame_index];
885 struct lookahead_entry *last_entry = av1_lookahead_peek(
886 cpi->ppi->lookahead, src_index - 1, cpi->compressor_stage);
887 struct lookahead_entry *next_entry = av1_lookahead_peek(
888 cpi->ppi->lookahead, src_index + 1, cpi->compressor_stage);
889 *next = &next_entry->img;
890 *last = cm->show_frame ? cpi->last_source : &last_entry->img;
891 }
892
893 // Calculates the new qindex from the VMAF motion score. This is based on the
894 // observation: when the motion score becomes higher, the VMAF score of the
895 // same source and distorted frames would become higher.
av1_get_vmaf_base_qindex(const AV1_COMP * const cpi,int current_qindex)896 int av1_get_vmaf_base_qindex(const AV1_COMP *const cpi, int current_qindex) {
897 const AV1_COMMON *const cm = &cpi->common;
898 if (cm->current_frame.frame_number == 0 || cpi->oxcf.pass == 1) {
899 return current_qindex;
900 }
901 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
902 const int layer_depth =
903 AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
904 const double last_frame_ysse =
905 get_layer_value(cpi->vmaf_info.last_frame_ysse, layer_depth);
906 const double last_frame_vmaf =
907 get_layer_value(cpi->vmaf_info.last_frame_vmaf, layer_depth);
908 const int bit_depth = cpi->td.mb.e_mbd.bd;
909 const double approx_sse = last_frame_ysse / (double)((1 << (bit_depth - 8)) *
910 (1 << (bit_depth - 8)));
911 const double approx_dvmaf = kBaselineVmaf - last_frame_vmaf;
912 const double sse_threshold =
913 0.01 * cpi->source->y_width * cpi->source->y_height;
914 const double vmaf_threshold = 0.01;
915 if (approx_sse < sse_threshold || approx_dvmaf < vmaf_threshold) {
916 return current_qindex;
917 }
918 YV12_BUFFER_CONFIG *cur_buf = cpi->source;
919 if (cm->show_frame == 0) {
920 const int src_index = gf_group->arf_src_offset[cpi->gf_frame_index];
921 struct lookahead_entry *cur_entry = av1_lookahead_peek(
922 cpi->ppi->lookahead, src_index, cpi->compressor_stage);
923 cur_buf = &cur_entry->img;
924 }
925 assert(cur_buf);
926
927 YV12_BUFFER_CONFIG *next_buf, *last_buf;
928 get_neighbor_frames(cpi, &last_buf, &next_buf);
929 assert(last_buf);
930
931 const double motion =
932 calc_vmaf_motion_score(cpi, cm, cur_buf, last_buf, next_buf);
933
934 // Get dVMAF through a data fitted model.
935 const double dvmaf = 26.11 * (1.0 - exp(-0.06 * motion));
936 const double dsse = dvmaf * approx_sse / approx_dvmaf;
937
938 const double beta = approx_sse / (dsse + approx_sse);
939 const int offset =
940 av1_get_deltaq_offset(cm->seq_params->bit_depth, current_qindex, beta);
941 int qindex = current_qindex + offset;
942
943 qindex = AOMMIN(qindex, MAXQ);
944 qindex = AOMMAX(qindex, MINQ);
945
946 return qindex;
947 }
948
cal_approx_score(AV1_COMP * const cpi,double src_variance,double new_variance,double src_score,YV12_BUFFER_CONFIG * const src,YV12_BUFFER_CONFIG * const recon_sharpened)949 static AOM_INLINE double cal_approx_score(
950 AV1_COMP *const cpi, double src_variance, double new_variance,
951 double src_score, YV12_BUFFER_CONFIG *const src,
952 YV12_BUFFER_CONFIG *const recon_sharpened) {
953 double score;
954 const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
955 const bool cal_vmaf_neg =
956 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
957 aom_calc_vmaf(cpi->vmaf_info.vmaf_model, src, recon_sharpened, bit_depth,
958 cal_vmaf_neg, &score);
959 return src_variance / new_variance * (score - src_score);
960 }
961
find_best_frame_unsharp_amount_loop_neg(AV1_COMP * const cpi,double src_variance,double base_score,YV12_BUFFER_CONFIG * const src,YV12_BUFFER_CONFIG * const recon,YV12_BUFFER_CONFIG * const ref,YV12_BUFFER_CONFIG * const src_blurred,YV12_BUFFER_CONFIG * const recon_blurred,YV12_BUFFER_CONFIG * const src_sharpened,YV12_BUFFER_CONFIG * const recon_sharpened,FULLPEL_MV * mvs,double best_score,const double unsharp_amount_start,const double step_size,const int max_loop_count,const double max_amount)962 static double find_best_frame_unsharp_amount_loop_neg(
963 AV1_COMP *const cpi, double src_variance, double base_score,
964 YV12_BUFFER_CONFIG *const src, YV12_BUFFER_CONFIG *const recon,
965 YV12_BUFFER_CONFIG *const ref, YV12_BUFFER_CONFIG *const src_blurred,
966 YV12_BUFFER_CONFIG *const recon_blurred,
967 YV12_BUFFER_CONFIG *const src_sharpened,
968 YV12_BUFFER_CONFIG *const recon_sharpened, FULLPEL_MV *mvs,
969 double best_score, const double unsharp_amount_start,
970 const double step_size, const int max_loop_count, const double max_amount) {
971 const double min_amount = 0.0;
972 int loop_count = 0;
973 double approx_score = best_score;
974 double unsharp_amount = unsharp_amount_start;
975
976 do {
977 best_score = approx_score;
978 unsharp_amount += step_size;
979 if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
980 unsharp(cpi, recon, recon_blurred, recon_sharpened, unsharp_amount);
981 unsharp(cpi, src, src_blurred, src_sharpened, unsharp_amount);
982 const double new_variance =
983 residual_frame_average_variance(cpi, src_sharpened, ref, mvs);
984 approx_score = cal_approx_score(cpi, src_variance, new_variance, base_score,
985 src, recon_sharpened);
986
987 loop_count++;
988 } while (approx_score > best_score && loop_count < max_loop_count);
989 unsharp_amount =
990 approx_score > best_score ? unsharp_amount : unsharp_amount - step_size;
991
992 return AOMMIN(max_amount, AOMMAX(unsharp_amount, min_amount));
993 }
994
find_best_frame_unsharp_amount_neg(AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const src,YV12_BUFFER_CONFIG * const recon,YV12_BUFFER_CONFIG * const ref,double base_score,const double unsharp_amount_start,const double step_size,const int max_loop_count,const double max_filter_amount)995 static double find_best_frame_unsharp_amount_neg(
996 AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const src,
997 YV12_BUFFER_CONFIG *const recon, YV12_BUFFER_CONFIG *const ref,
998 double base_score, const double unsharp_amount_start,
999 const double step_size, const int max_loop_count,
1000 const double max_filter_amount) {
1001 FULLPEL_MV *mvs = NULL;
1002 const double src_variance =
1003 residual_frame_average_variance(cpi, src, ref, mvs);
1004
1005 const AV1_COMMON *const cm = &cpi->common;
1006 const int width = recon->y_width;
1007 const int height = recon->y_height;
1008 const int bit_depth = cpi->td.mb.e_mbd.bd;
1009 const int ss_x = recon->subsampling_x;
1010 const int ss_y = recon->subsampling_y;
1011
1012 YV12_BUFFER_CONFIG src_blurred, recon_blurred, src_sharpened, recon_sharpened;
1013 memset(&recon_sharpened, 0, sizeof(recon_sharpened));
1014 memset(&src_sharpened, 0, sizeof(src_sharpened));
1015 memset(&recon_blurred, 0, sizeof(recon_blurred));
1016 memset(&src_blurred, 0, sizeof(src_blurred));
1017 aom_alloc_frame_buffer(&recon_sharpened, width, height, ss_x, ss_y,
1018 cm->seq_params->use_highbitdepth,
1019 cpi->oxcf.border_in_pixels,
1020 cm->features.byte_alignment, 0);
1021 aom_alloc_frame_buffer(&src_sharpened, width, height, ss_x, ss_y,
1022 cm->seq_params->use_highbitdepth,
1023 cpi->oxcf.border_in_pixels,
1024 cm->features.byte_alignment, 0);
1025 aom_alloc_frame_buffer(&recon_blurred, width, height, ss_x, ss_y,
1026 cm->seq_params->use_highbitdepth,
1027 cpi->oxcf.border_in_pixels,
1028 cm->features.byte_alignment, 0);
1029 aom_alloc_frame_buffer(
1030 &src_blurred, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
1031 cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
1032
1033 gaussian_blur(bit_depth, recon, &recon_blurred);
1034 gaussian_blur(bit_depth, src, &src_blurred);
1035
1036 unsharp(cpi, recon, &recon_blurred, &recon_sharpened, unsharp_amount_start);
1037 unsharp(cpi, src, &src_blurred, &src_sharpened, unsharp_amount_start);
1038 const double variance_start =
1039 residual_frame_average_variance(cpi, &src_sharpened, ref, mvs);
1040 const double score_start = cal_approx_score(
1041 cpi, src_variance, variance_start, base_score, src, &recon_sharpened);
1042
1043 const double unsharp_amount_next = unsharp_amount_start + step_size;
1044 unsharp(cpi, recon, &recon_blurred, &recon_sharpened, unsharp_amount_next);
1045 unsharp(cpi, src, &src_blurred, &src_sharpened, unsharp_amount_next);
1046 const double variance_next =
1047 residual_frame_average_variance(cpi, &src_sharpened, ref, mvs);
1048 const double score_next = cal_approx_score(cpi, src_variance, variance_next,
1049 base_score, src, &recon_sharpened);
1050
1051 double unsharp_amount;
1052 if (score_next > score_start) {
1053 unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
1054 cpi, src_variance, base_score, src, recon, ref, &src_blurred,
1055 &recon_blurred, &src_sharpened, &recon_sharpened, mvs, score_next,
1056 unsharp_amount_next, step_size, max_loop_count, max_filter_amount);
1057 } else {
1058 unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
1059 cpi, src_variance, base_score, src, recon, ref, &src_blurred,
1060 &recon_blurred, &src_sharpened, &recon_sharpened, mvs, score_start,
1061 unsharp_amount_start, -step_size, max_loop_count, max_filter_amount);
1062 }
1063
1064 aom_free_frame_buffer(&recon_sharpened);
1065 aom_free_frame_buffer(&src_sharpened);
1066 aom_free_frame_buffer(&recon_blurred);
1067 aom_free_frame_buffer(&src_blurred);
1068 aom_free(mvs);
1069 return unsharp_amount;
1070 }
1071
av1_update_vmaf_curve(AV1_COMP * cpi)1072 void av1_update_vmaf_curve(AV1_COMP *cpi) {
1073 YV12_BUFFER_CONFIG *source = cpi->source;
1074 YV12_BUFFER_CONFIG *recon = &cpi->common.cur_frame->buf;
1075 const int bit_depth = cpi->td.mb.e_mbd.bd;
1076 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
1077 const int layer_depth =
1078 AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
1079 double base_score;
1080 const bool cal_vmaf_neg =
1081 cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
1082 aom_calc_vmaf(cpi->vmaf_info.vmaf_model, source, recon, bit_depth,
1083 cal_vmaf_neg, &base_score);
1084 cpi->vmaf_info.last_frame_vmaf[layer_depth] = base_score;
1085 if (cpi->common.seq_params->use_highbitdepth) {
1086 assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
1087 assert(recon->flags & YV12_FLAG_HIGHBITDEPTH);
1088 cpi->vmaf_info.last_frame_ysse[layer_depth] =
1089 (double)aom_highbd_get_y_sse(source, recon);
1090 } else {
1091 cpi->vmaf_info.last_frame_ysse[layer_depth] =
1092 (double)aom_get_y_sse(source, recon);
1093 }
1094
1095 if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
1096 YV12_BUFFER_CONFIG *last, *next;
1097 get_neighbor_frames(cpi, &last, &next);
1098 double best_unsharp_amount_start =
1099 get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
1100 const int max_loop_count = 5;
1101 cpi->vmaf_info.last_frame_unsharp_amount[layer_depth] =
1102 find_best_frame_unsharp_amount_neg(cpi, source, recon, last, base_score,
1103 best_unsharp_amount_start, 0.025,
1104 max_loop_count, 1.01);
1105 }
1106 }
1107