• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "av1/encoder/tune_vmaf.h"
13 
14 #include "aom_dsp/psnr.h"
15 #include "av1/encoder/extend.h"
16 #include "av1/encoder/rdopt.h"
17 #include "config/aom_scale_rtcd.h"
18 
19 static const double kBaselineVmaf = 97.42773;
20 
get_layer_value(const double * array,int layer)21 static double get_layer_value(const double *array, int layer) {
22   while (array[layer] < 0.0 && layer > 0) layer--;
23   return AOMMAX(array[layer], 0.0);
24 }
25 
motion_search(AV1_COMP * cpi,const YV12_BUFFER_CONFIG * src,const YV12_BUFFER_CONFIG * ref,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,FULLPEL_MV * ref_mv)26 static void motion_search(AV1_COMP *cpi, const YV12_BUFFER_CONFIG *src,
27                           const YV12_BUFFER_CONFIG *ref,
28                           const BLOCK_SIZE block_size, const int mb_row,
29                           const int mb_col, FULLPEL_MV *ref_mv) {
30   // Block information (ONLY Y-plane is used for motion search).
31   const int mb_height = block_size_high[block_size];
32   const int mb_width = block_size_wide[block_size];
33   const int y_stride = src->y_stride;
34   assert(y_stride == ref->y_stride);
35   const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
36 
37   // Save input state.
38   MACROBLOCK *const mb = &cpi->td.mb;
39   MACROBLOCKD *const mbd = &mb->e_mbd;
40   const struct buf_2d ori_src_buf = mb->plane[0].src;
41   const struct buf_2d ori_pre_buf = mbd->plane[0].pre[0];
42 
43   // Parameters used for motion search.
44   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
45   const SEARCH_METHODS search_method = NSTEP;
46   const search_site_config *search_site_cfg =
47       cpi->mv_search_params.search_site_cfg[SS_CFG_FPF];
48   const int step_param =
49       av1_init_search_range(AOMMAX(src->y_crop_width, src->y_crop_height));
50 
51   // Baseline position for motion search (used for rate distortion comparison).
52   const MV baseline_mv = kZeroMv;
53 
54   // Setup.
55   mb->plane[0].src.buf = src->y_buffer + y_offset;
56   mb->plane[0].src.stride = y_stride;
57   mbd->plane[0].pre[0].buf = ref->y_buffer + y_offset;
58   mbd->plane[0].pre[0].stride = y_stride;
59 
60   // Unused intermediate results for motion search.
61   int cost_list[5];
62 
63   // Do motion search.
64   // Only do full search on the entire block.
65   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
66                                      &baseline_mv, search_site_cfg,
67                                      /*fine_search_interval=*/0);
68   av1_set_mv_search_method(&full_ms_params, search_site_cfg, search_method);
69   av1_full_pixel_search(*ref_mv, &full_ms_params, step_param,
70                         cond_cost_list(cpi, cost_list), ref_mv, NULL);
71 
72   // Restore input state.
73   mb->plane[0].src = ori_src_buf;
74   mbd->plane[0].pre[0] = ori_pre_buf;
75 }
76 
residual_variance(const AV1_COMP * cpi,const YV12_BUFFER_CONFIG * src,const YV12_BUFFER_CONFIG * ref,const BLOCK_SIZE block_size,const int mb_row,const int mb_col,FULLPEL_MV ref_mv,unsigned int * sse)77 static unsigned int residual_variance(const AV1_COMP *cpi,
78                                       const YV12_BUFFER_CONFIG *src,
79                                       const YV12_BUFFER_CONFIG *ref,
80                                       const BLOCK_SIZE block_size,
81                                       const int mb_row, const int mb_col,
82                                       FULLPEL_MV ref_mv, unsigned int *sse) {
83   const int mb_height = block_size_high[block_size];
84   const int mb_width = block_size_wide[block_size];
85   const int y_stride = src->y_stride;
86   assert(y_stride == ref->y_stride);
87   const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
88   const int mv_offset = ref_mv.row * y_stride + ref_mv.col;
89   const unsigned int var = cpi->ppi->fn_ptr[block_size].vf(
90       ref->y_buffer + y_offset + mv_offset, y_stride, src->y_buffer + y_offset,
91       y_stride, sse);
92   return var;
93 }
94 
frame_average_variance(const AV1_COMP * const cpi,const YV12_BUFFER_CONFIG * const frame)95 static double frame_average_variance(const AV1_COMP *const cpi,
96                                      const YV12_BUFFER_CONFIG *const frame) {
97   const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
98   const uint8_t *const y_buffer = frame->y_buffer;
99   const int y_stride = frame->y_stride;
100   const BLOCK_SIZE block_size = BLOCK_64X64;
101 
102   const int block_w = mi_size_wide[block_size] * 4;
103   const int block_h = mi_size_high[block_size] * 4;
104   int row, col;
105   double var = 0.0, var_count = 0.0;
106   const int use_hbd = frame->flags & YV12_FLAG_HIGHBITDEPTH;
107 
108   // Loop through each block.
109   for (row = 0; row < frame->y_height / block_h; ++row) {
110     for (col = 0; col < frame->y_width / block_w; ++col) {
111       struct buf_2d buf;
112       const int row_offset_y = row * block_h;
113       const int col_offset_y = col * block_w;
114 
115       buf.buf = (uint8_t *)y_buffer + row_offset_y * y_stride + col_offset_y;
116       buf.stride = y_stride;
117 
118       var += av1_get_perpixel_variance(cpi, xd, &buf, block_size, AOM_PLANE_Y,
119                                        use_hbd);
120       var_count += 1.0;
121     }
122   }
123   var /= var_count;
124   return var;
125 }
126 
residual_frame_average_variance(AV1_COMP * cpi,const YV12_BUFFER_CONFIG * src,const YV12_BUFFER_CONFIG * ref,FULLPEL_MV * mvs)127 static double residual_frame_average_variance(AV1_COMP *cpi,
128                                               const YV12_BUFFER_CONFIG *src,
129                                               const YV12_BUFFER_CONFIG *ref,
130                                               FULLPEL_MV *mvs) {
131   if (ref == NULL) return frame_average_variance(cpi, src);
132   const BLOCK_SIZE block_size = BLOCK_16X16;
133   const int frame_height = src->y_height;
134   const int frame_width = src->y_width;
135   const int mb_height = block_size_high[block_size];
136   const int mb_width = block_size_wide[block_size];
137   const int mb_rows = (frame_height + mb_height - 1) / mb_height;
138   const int mb_cols = (frame_width + mb_width - 1) / mb_width;
139   const int num_planes = av1_num_planes(&cpi->common);
140   const int mi_h = mi_size_high_log2[block_size];
141   const int mi_w = mi_size_wide_log2[block_size];
142   assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
143 
144   // Save input state.
145   MACROBLOCK *const mb = &cpi->td.mb;
146   MACROBLOCKD *const mbd = &mb->e_mbd;
147   uint8_t *input_buffer[MAX_MB_PLANE];
148   for (int i = 0; i < num_planes; i++) {
149     input_buffer[i] = mbd->plane[i].pre[0].buf;
150   }
151   MB_MODE_INFO **input_mb_mode_info = mbd->mi;
152 
153   bool do_motion_search = false;
154   if (mvs == NULL) {
155     do_motion_search = true;
156     CHECK_MEM_ERROR(&cpi->common, mvs,
157                     (FULLPEL_MV *)aom_calloc(mb_rows * mb_cols, sizeof(*mvs)));
158   }
159 
160   unsigned int variance = 0;
161   // Perform temporal filtering block by block.
162   for (int mb_row = 0; mb_row < mb_rows; mb_row++) {
163     av1_set_mv_row_limits(&cpi->common.mi_params, &mb->mv_limits,
164                           (mb_row << mi_h), (mb_height >> MI_SIZE_LOG2),
165                           cpi->oxcf.border_in_pixels);
166     for (int mb_col = 0; mb_col < mb_cols; mb_col++) {
167       av1_set_mv_col_limits(&cpi->common.mi_params, &mb->mv_limits,
168                             (mb_col << mi_w), (mb_width >> MI_SIZE_LOG2),
169                             cpi->oxcf.border_in_pixels);
170       FULLPEL_MV *ref_mv = &mvs[mb_col + mb_row * mb_cols];
171       if (do_motion_search) {
172         motion_search(cpi, src, ref, block_size, mb_row, mb_col, ref_mv);
173       }
174       unsigned int mv_sse;
175       const unsigned int blk_var = residual_variance(
176           cpi, src, ref, block_size, mb_row, mb_col, *ref_mv, &mv_sse);
177       variance += blk_var;
178     }
179   }
180 
181   // Restore input state
182   for (int i = 0; i < num_planes; i++) {
183     mbd->plane[i].pre[0].buf = input_buffer[i];
184   }
185   mbd->mi = input_mb_mode_info;
186   return (double)variance / (double)(mb_rows * mb_cols);
187 }
188 
189 // TODO(sdeng): Add the SIMD implementation.
highbd_unsharp_rect(const uint16_t * source,int source_stride,const uint16_t * blurred,int blurred_stride,uint16_t * dst,int dst_stride,int w,int h,double amount,int bit_depth)190 static AOM_INLINE void highbd_unsharp_rect(const uint16_t *source,
191                                            int source_stride,
192                                            const uint16_t *blurred,
193                                            int blurred_stride, uint16_t *dst,
194                                            int dst_stride, int w, int h,
195                                            double amount, int bit_depth) {
196   const int max_value = (1 << bit_depth) - 1;
197   for (int i = 0; i < h; ++i) {
198     for (int j = 0; j < w; ++j) {
199       const double val =
200           (double)source[j] + amount * ((double)source[j] - (double)blurred[j]);
201       dst[j] = (uint16_t)clamp((int)(val + 0.5), 0, max_value);
202     }
203     source += source_stride;
204     blurred += blurred_stride;
205     dst += dst_stride;
206   }
207 }
208 
unsharp_rect(const uint8_t * source,int source_stride,const uint8_t * blurred,int blurred_stride,uint8_t * dst,int dst_stride,int w,int h,double amount)209 static AOM_INLINE void unsharp_rect(const uint8_t *source, int source_stride,
210                                     const uint8_t *blurred, int blurred_stride,
211                                     uint8_t *dst, int dst_stride, int w, int h,
212                                     double amount) {
213   for (int i = 0; i < h; ++i) {
214     for (int j = 0; j < w; ++j) {
215       const double val =
216           (double)source[j] + amount * ((double)source[j] - (double)blurred[j]);
217       dst[j] = (uint8_t)clamp((int)(val + 0.5), 0, 255);
218     }
219     source += source_stride;
220     blurred += blurred_stride;
221     dst += dst_stride;
222   }
223 }
224 
unsharp(const AV1_COMP * const cpi,const YV12_BUFFER_CONFIG * source,const YV12_BUFFER_CONFIG * blurred,const YV12_BUFFER_CONFIG * dst,double amount)225 static AOM_INLINE void unsharp(const AV1_COMP *const cpi,
226                                const YV12_BUFFER_CONFIG *source,
227                                const YV12_BUFFER_CONFIG *blurred,
228                                const YV12_BUFFER_CONFIG *dst, double amount) {
229   const int bit_depth = cpi->td.mb.e_mbd.bd;
230   if (cpi->common.seq_params->use_highbitdepth) {
231     assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
232     assert(blurred->flags & YV12_FLAG_HIGHBITDEPTH);
233     assert(dst->flags & YV12_FLAG_HIGHBITDEPTH);
234     highbd_unsharp_rect(CONVERT_TO_SHORTPTR(source->y_buffer), source->y_stride,
235                         CONVERT_TO_SHORTPTR(blurred->y_buffer),
236                         blurred->y_stride, CONVERT_TO_SHORTPTR(dst->y_buffer),
237                         dst->y_stride, source->y_width, source->y_height,
238                         amount, bit_depth);
239   } else {
240     unsharp_rect(source->y_buffer, source->y_stride, blurred->y_buffer,
241                  blurred->y_stride, dst->y_buffer, dst->y_stride,
242                  source->y_width, source->y_height, amount);
243   }
244 }
245 
246 // 8-tap Gaussian convolution filter with sigma = 1.0, sums to 128,
247 // all co-efficients must be even.
248 DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 0,  8, 30, 52,
249                                                                30, 8, 0,  0 };
gaussian_blur(const int bit_depth,const YV12_BUFFER_CONFIG * source,const YV12_BUFFER_CONFIG * dst)250 static AOM_INLINE void gaussian_blur(const int bit_depth,
251                                      const YV12_BUFFER_CONFIG *source,
252                                      const YV12_BUFFER_CONFIG *dst) {
253   const int block_size = BLOCK_128X128;
254   const int block_w = mi_size_wide[block_size] * 4;
255   const int block_h = mi_size_high[block_size] * 4;
256   const int num_cols = (source->y_width + block_w - 1) / block_w;
257   const int num_rows = (source->y_height + block_h - 1) / block_h;
258   int row, col;
259 
260   ConvolveParams conv_params = get_conv_params(0, 0, bit_depth);
261   InterpFilterParams filter = { .filter_ptr = gauss_filter,
262                                 .taps = 8,
263                                 .interp_filter = EIGHTTAP_REGULAR };
264 
265   for (row = 0; row < num_rows; ++row) {
266     for (col = 0; col < num_cols; ++col) {
267       const int row_offset_y = row * block_h;
268       const int col_offset_y = col * block_w;
269 
270       uint8_t *src_buf =
271           source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
272       uint8_t *dst_buf =
273           dst->y_buffer + row_offset_y * dst->y_stride + col_offset_y;
274 
275       if (source->flags & YV12_FLAG_HIGHBITDEPTH) {
276         av1_highbd_convolve_2d_sr(
277             CONVERT_TO_SHORTPTR(src_buf), source->y_stride,
278             CONVERT_TO_SHORTPTR(dst_buf), dst->y_stride, block_w, block_h,
279             &filter, &filter, 0, 0, &conv_params, bit_depth);
280       } else {
281         av1_convolve_2d_sr(src_buf, source->y_stride, dst_buf, dst->y_stride,
282                            block_w, block_h, &filter, &filter, 0, 0,
283                            &conv_params);
284       }
285     }
286   }
287 }
288 
cal_approx_vmaf(const AV1_COMP * const cpi,double source_variance,YV12_BUFFER_CONFIG * const source,YV12_BUFFER_CONFIG * const sharpened)289 static AOM_INLINE double cal_approx_vmaf(const AV1_COMP *const cpi,
290                                          double source_variance,
291                                          YV12_BUFFER_CONFIG *const source,
292                                          YV12_BUFFER_CONFIG *const sharpened) {
293   const int bit_depth = cpi->td.mb.e_mbd.bd;
294   const bool cal_vmaf_neg =
295       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
296   double new_vmaf;
297 
298   aom_calc_vmaf(cpi->vmaf_info.vmaf_model, source, sharpened, bit_depth,
299                 cal_vmaf_neg, &new_vmaf);
300 
301   const double sharpened_var = frame_average_variance(cpi, sharpened);
302   return source_variance / sharpened_var * (new_vmaf - kBaselineVmaf);
303 }
304 
find_best_frame_unsharp_amount_loop(const AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source,YV12_BUFFER_CONFIG * const blurred,YV12_BUFFER_CONFIG * const sharpened,double best_vmaf,const double baseline_variance,const double unsharp_amount_start,const double step_size,const int max_loop_count,const double max_amount)305 static double find_best_frame_unsharp_amount_loop(
306     const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source,
307     YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened,
308     double best_vmaf, const double baseline_variance,
309     const double unsharp_amount_start, const double step_size,
310     const int max_loop_count, const double max_amount) {
311   const double min_amount = 0.0;
312   int loop_count = 0;
313   double approx_vmaf = best_vmaf;
314   double unsharp_amount = unsharp_amount_start;
315   do {
316     best_vmaf = approx_vmaf;
317     unsharp_amount += step_size;
318     if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
319     unsharp(cpi, source, blurred, sharpened, unsharp_amount);
320     approx_vmaf = cal_approx_vmaf(cpi, baseline_variance, source, sharpened);
321 
322     loop_count++;
323   } while (approx_vmaf > best_vmaf && loop_count < max_loop_count);
324   unsharp_amount =
325       approx_vmaf > best_vmaf ? unsharp_amount : unsharp_amount - step_size;
326   return AOMMIN(max_amount, AOMMAX(unsharp_amount, min_amount));
327 }
328 
find_best_frame_unsharp_amount(const AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source,YV12_BUFFER_CONFIG * const blurred,const double unsharp_amount_start,const double step_size,const int max_loop_count,const double max_filter_amount)329 static double find_best_frame_unsharp_amount(const AV1_COMP *const cpi,
330                                              YV12_BUFFER_CONFIG *const source,
331                                              YV12_BUFFER_CONFIG *const blurred,
332                                              const double unsharp_amount_start,
333                                              const double step_size,
334                                              const int max_loop_count,
335                                              const double max_filter_amount) {
336   const AV1_COMMON *const cm = &cpi->common;
337   const int width = source->y_width;
338   const int height = source->y_height;
339   YV12_BUFFER_CONFIG sharpened;
340   memset(&sharpened, 0, sizeof(sharpened));
341   aom_alloc_frame_buffer(
342       &sharpened, width, height, source->subsampling_x, source->subsampling_y,
343       cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
344       cm->features.byte_alignment, 0);
345 
346   const double baseline_variance = frame_average_variance(cpi, source);
347   double unsharp_amount;
348   if (unsharp_amount_start <= step_size) {
349     unsharp_amount = find_best_frame_unsharp_amount_loop(
350         cpi, source, blurred, &sharpened, 0.0, baseline_variance, 0.0,
351         step_size, max_loop_count, max_filter_amount);
352   } else {
353     double a0 = unsharp_amount_start - step_size, a1 = unsharp_amount_start;
354     double v0, v1;
355     unsharp(cpi, source, blurred, &sharpened, a0);
356     v0 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
357     unsharp(cpi, source, blurred, &sharpened, a1);
358     v1 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
359     if (fabs(v0 - v1) < 0.01) {
360       unsharp_amount = a0;
361     } else if (v0 > v1) {
362       unsharp_amount = find_best_frame_unsharp_amount_loop(
363           cpi, source, blurred, &sharpened, v0, baseline_variance, a0,
364           -step_size, max_loop_count, max_filter_amount);
365     } else {
366       unsharp_amount = find_best_frame_unsharp_amount_loop(
367           cpi, source, blurred, &sharpened, v1, baseline_variance, a1,
368           step_size, max_loop_count, max_filter_amount);
369     }
370   }
371 
372   aom_free_frame_buffer(&sharpened);
373   return unsharp_amount;
374 }
375 
av1_vmaf_neg_preprocessing(AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source)376 void av1_vmaf_neg_preprocessing(AV1_COMP *const cpi,
377                                 YV12_BUFFER_CONFIG *const source) {
378   const AV1_COMMON *const cm = &cpi->common;
379   const int bit_depth = cpi->td.mb.e_mbd.bd;
380   const int width = source->y_width;
381   const int height = source->y_height;
382 
383   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
384   const int layer_depth =
385       AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
386   const double best_frame_unsharp_amount =
387       get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
388 
389   if (best_frame_unsharp_amount <= 0.0) return;
390 
391   YV12_BUFFER_CONFIG blurred;
392   memset(&blurred, 0, sizeof(blurred));
393   aom_alloc_frame_buffer(
394       &blurred, width, height, source->subsampling_x, source->subsampling_y,
395       cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
396       cm->features.byte_alignment, 0);
397 
398   gaussian_blur(bit_depth, source, &blurred);
399   unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount);
400   aom_free_frame_buffer(&blurred);
401 }
402 
av1_vmaf_frame_preprocessing(AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source)403 void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi,
404                                   YV12_BUFFER_CONFIG *const source) {
405   const AV1_COMMON *const cm = &cpi->common;
406   const int bit_depth = cpi->td.mb.e_mbd.bd;
407   const int width = source->y_width;
408   const int height = source->y_height;
409 
410   YV12_BUFFER_CONFIG source_extended, blurred;
411   memset(&source_extended, 0, sizeof(source_extended));
412   memset(&blurred, 0, sizeof(blurred));
413   aom_alloc_frame_buffer(
414       &source_extended, width, height, source->subsampling_x,
415       source->subsampling_y, cm->seq_params->use_highbitdepth,
416       cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
417   aom_alloc_frame_buffer(
418       &blurred, width, height, source->subsampling_x, source->subsampling_y,
419       cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
420       cm->features.byte_alignment, 0);
421 
422   av1_copy_and_extend_frame(source, &source_extended);
423   gaussian_blur(bit_depth, &source_extended, &blurred);
424   aom_free_frame_buffer(&source_extended);
425 
426   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
427   const int layer_depth =
428       AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
429   const double last_frame_unsharp_amount =
430       get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
431 
432   const double best_frame_unsharp_amount = find_best_frame_unsharp_amount(
433       cpi, source, &blurred, last_frame_unsharp_amount, 0.05, 20, 1.01);
434 
435   cpi->vmaf_info.last_frame_unsharp_amount[layer_depth] =
436       best_frame_unsharp_amount;
437 
438   unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount);
439   aom_free_frame_buffer(&blurred);
440 }
441 
av1_vmaf_blk_preprocessing(AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const source)442 void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
443                                 YV12_BUFFER_CONFIG *const source) {
444   const AV1_COMMON *const cm = &cpi->common;
445   const int width = source->y_width;
446   const int height = source->y_height;
447   const int bit_depth = cpi->td.mb.e_mbd.bd;
448   const int ss_x = source->subsampling_x;
449   const int ss_y = source->subsampling_y;
450 
451   YV12_BUFFER_CONFIG source_extended, blurred;
452   memset(&blurred, 0, sizeof(blurred));
453   memset(&source_extended, 0, sizeof(source_extended));
454   aom_alloc_frame_buffer(
455       &blurred, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
456       cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
457   aom_alloc_frame_buffer(&source_extended, width, height, ss_x, ss_y,
458                          cm->seq_params->use_highbitdepth,
459                          cpi->oxcf.border_in_pixels,
460                          cm->features.byte_alignment, 0);
461 
462   av1_copy_and_extend_frame(source, &source_extended);
463   gaussian_blur(bit_depth, &source_extended, &blurred);
464   aom_free_frame_buffer(&source_extended);
465 
466   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
467   const int layer_depth =
468       AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
469   const double last_frame_unsharp_amount =
470       get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
471 
472   const double best_frame_unsharp_amount = find_best_frame_unsharp_amount(
473       cpi, source, &blurred, last_frame_unsharp_amount, 0.05, 20, 1.01);
474 
475   cpi->vmaf_info.last_frame_unsharp_amount[layer_depth] =
476       best_frame_unsharp_amount;
477 
478   const int block_size = BLOCK_64X64;
479   const int block_w = mi_size_wide[block_size] * 4;
480   const int block_h = mi_size_high[block_size] * 4;
481   const int num_cols = (source->y_width + block_w - 1) / block_w;
482   const int num_rows = (source->y_height + block_h - 1) / block_h;
483   double *best_unsharp_amounts =
484       aom_calloc(num_cols * num_rows, sizeof(*best_unsharp_amounts));
485   if (!best_unsharp_amounts) {
486     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
487                        "Error allocating vmaf data");
488   }
489 
490   YV12_BUFFER_CONFIG source_block, blurred_block;
491   memset(&source_block, 0, sizeof(source_block));
492   memset(&blurred_block, 0, sizeof(blurred_block));
493   aom_alloc_frame_buffer(&source_block, block_w, block_h, ss_x, ss_y,
494                          cm->seq_params->use_highbitdepth,
495                          cpi->oxcf.border_in_pixels,
496                          cm->features.byte_alignment, 0);
497   aom_alloc_frame_buffer(&blurred_block, block_w, block_h, ss_x, ss_y,
498                          cm->seq_params->use_highbitdepth,
499                          cpi->oxcf.border_in_pixels,
500                          cm->features.byte_alignment, 0);
501 
502   for (int row = 0; row < num_rows; ++row) {
503     for (int col = 0; col < num_cols; ++col) {
504       const int row_offset_y = row * block_h;
505       const int col_offset_y = col * block_w;
506       const int block_width = AOMMIN(width - col_offset_y, block_w);
507       const int block_height = AOMMIN(height - row_offset_y, block_h);
508       const int index = col + row * num_cols;
509 
510       if (cm->seq_params->use_highbitdepth) {
511         assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
512         assert(blurred.flags & YV12_FLAG_HIGHBITDEPTH);
513         uint16_t *frame_src_buf = CONVERT_TO_SHORTPTR(source->y_buffer) +
514                                   row_offset_y * source->y_stride +
515                                   col_offset_y;
516         uint16_t *frame_blurred_buf = CONVERT_TO_SHORTPTR(blurred.y_buffer) +
517                                       row_offset_y * blurred.y_stride +
518                                       col_offset_y;
519         uint16_t *blurred_dst = CONVERT_TO_SHORTPTR(blurred_block.y_buffer);
520         uint16_t *src_dst = CONVERT_TO_SHORTPTR(source_block.y_buffer);
521 
522         // Copy block from source frame.
523         for (int i = 0; i < block_h; ++i) {
524           for (int j = 0; j < block_w; ++j) {
525             if (i >= block_height || j >= block_width) {
526               src_dst[j] = 0;
527               blurred_dst[j] = 0;
528             } else {
529               src_dst[j] = frame_src_buf[j];
530               blurred_dst[j] = frame_blurred_buf[j];
531             }
532           }
533           frame_src_buf += source->y_stride;
534           frame_blurred_buf += blurred.y_stride;
535           src_dst += source_block.y_stride;
536           blurred_dst += blurred_block.y_stride;
537         }
538       } else {
539         uint8_t *frame_src_buf =
540             source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
541         uint8_t *frame_blurred_buf =
542             blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
543         uint8_t *blurred_dst = blurred_block.y_buffer;
544         uint8_t *src_dst = source_block.y_buffer;
545 
546         // Copy block from source frame.
547         for (int i = 0; i < block_h; ++i) {
548           for (int j = 0; j < block_w; ++j) {
549             if (i >= block_height || j >= block_width) {
550               src_dst[j] = 0;
551               blurred_dst[j] = 0;
552             } else {
553               src_dst[j] = frame_src_buf[j];
554               blurred_dst[j] = frame_blurred_buf[j];
555             }
556           }
557           frame_src_buf += source->y_stride;
558           frame_blurred_buf += blurred.y_stride;
559           src_dst += source_block.y_stride;
560           blurred_dst += blurred_block.y_stride;
561         }
562       }
563 
564       best_unsharp_amounts[index] = find_best_frame_unsharp_amount(
565           cpi, &source_block, &blurred_block, best_frame_unsharp_amount, 0.1, 3,
566           1.5);
567     }
568   }
569 
570   // Apply best blur amounts
571   for (int row = 0; row < num_rows; ++row) {
572     for (int col = 0; col < num_cols; ++col) {
573       const int row_offset_y = row * block_h;
574       const int col_offset_y = col * block_w;
575       const int block_width = AOMMIN(source->y_width - col_offset_y, block_w);
576       const int block_height = AOMMIN(source->y_height - row_offset_y, block_h);
577       const int index = col + row * num_cols;
578 
579       if (cm->seq_params->use_highbitdepth) {
580         assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
581         assert(blurred.flags & YV12_FLAG_HIGHBITDEPTH);
582         uint16_t *src_buf = CONVERT_TO_SHORTPTR(source->y_buffer) +
583                             row_offset_y * source->y_stride + col_offset_y;
584         uint16_t *blurred_buf = CONVERT_TO_SHORTPTR(blurred.y_buffer) +
585                                 row_offset_y * blurred.y_stride + col_offset_y;
586         highbd_unsharp_rect(src_buf, source->y_stride, blurred_buf,
587                             blurred.y_stride, src_buf, source->y_stride,
588                             block_width, block_height,
589                             best_unsharp_amounts[index], bit_depth);
590       } else {
591         uint8_t *src_buf =
592             source->y_buffer + row_offset_y * source->y_stride + col_offset_y;
593         uint8_t *blurred_buf =
594             blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
595         unsharp_rect(src_buf, source->y_stride, blurred_buf, blurred.y_stride,
596                      src_buf, source->y_stride, block_width, block_height,
597                      best_unsharp_amounts[index]);
598       }
599     }
600   }
601 
602   aom_free_frame_buffer(&source_block);
603   aom_free_frame_buffer(&blurred_block);
604   aom_free_frame_buffer(&blurred);
605   aom_free(best_unsharp_amounts);
606 }
607 
av1_set_mb_vmaf_rdmult_scaling(AV1_COMP * cpi)608 void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
609   AV1_COMMON *cm = &cpi->common;
610   const int y_width = cpi->source->y_width;
611   const int y_height = cpi->source->y_height;
612   const int resized_block_size = BLOCK_32X32;
613   const int resize_factor = 2;
614   const int bit_depth = cpi->td.mb.e_mbd.bd;
615   const int ss_x = cpi->source->subsampling_x;
616   const int ss_y = cpi->source->subsampling_y;
617 
618   YV12_BUFFER_CONFIG resized_source;
619   memset(&resized_source, 0, sizeof(resized_source));
620   aom_alloc_frame_buffer(
621       &resized_source, y_width / resize_factor, y_height / resize_factor, ss_x,
622       ss_y, cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
623       cm->features.byte_alignment, 0);
624   av1_resize_and_extend_frame_nonnormative(cpi->source, &resized_source,
625                                            bit_depth, av1_num_planes(cm));
626 
627   const int resized_y_width = resized_source.y_width;
628   const int resized_y_height = resized_source.y_height;
629   const int resized_block_w = mi_size_wide[resized_block_size] * 4;
630   const int resized_block_h = mi_size_high[resized_block_size] * 4;
631   const int num_cols =
632       (resized_y_width + resized_block_w - 1) / resized_block_w;
633   const int num_rows =
634       (resized_y_height + resized_block_h - 1) / resized_block_h;
635 
636   YV12_BUFFER_CONFIG blurred;
637   memset(&blurred, 0, sizeof(blurred));
638   aom_alloc_frame_buffer(&blurred, resized_y_width, resized_y_height, ss_x,
639                          ss_y, cm->seq_params->use_highbitdepth,
640                          cpi->oxcf.border_in_pixels,
641                          cm->features.byte_alignment, 0);
642   gaussian_blur(bit_depth, &resized_source, &blurred);
643 
644   YV12_BUFFER_CONFIG recon;
645   memset(&recon, 0, sizeof(recon));
646   aom_alloc_frame_buffer(&recon, resized_y_width, resized_y_height, ss_x, ss_y,
647                          cm->seq_params->use_highbitdepth,
648                          cpi->oxcf.border_in_pixels,
649                          cm->features.byte_alignment, 0);
650   aom_yv12_copy_frame(&resized_source, &recon, 1);
651 
652   VmafContext *vmaf_context;
653   const bool cal_vmaf_neg =
654       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
655   aom_init_vmaf_context(&vmaf_context, cpi->vmaf_info.vmaf_model, cal_vmaf_neg);
656   unsigned int *sses = aom_calloc(num_rows * num_cols, sizeof(*sses));
657   if (!sses) {
658     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
659                        "Error allocating vmaf data");
660   }
661 
662   // Loop through each 'block_size' block.
663   for (int row = 0; row < num_rows; ++row) {
664     for (int col = 0; col < num_cols; ++col) {
665       const int index = row * num_cols + col;
666       const int row_offset_y = row * resized_block_h;
667       const int col_offset_y = col * resized_block_w;
668 
669       uint8_t *const orig_buf = resized_source.y_buffer +
670                                 row_offset_y * resized_source.y_stride +
671                                 col_offset_y;
672       uint8_t *const blurred_buf =
673           blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
674 
675       cpi->ppi->fn_ptr[resized_block_size].vf(orig_buf, resized_source.y_stride,
676                                               blurred_buf, blurred.y_stride,
677                                               &sses[index]);
678 
679       uint8_t *const recon_buf =
680           recon.y_buffer + row_offset_y * recon.y_stride + col_offset_y;
681       // Set recon buf
682       if (cpi->common.seq_params->use_highbitdepth) {
683         highbd_unsharp_rect(CONVERT_TO_SHORTPTR(blurred_buf), blurred.y_stride,
684                             CONVERT_TO_SHORTPTR(blurred_buf), blurred.y_stride,
685                             CONVERT_TO_SHORTPTR(recon_buf), recon.y_stride,
686                             resized_block_w, resized_block_h, 0.0, bit_depth);
687       } else {
688         unsharp_rect(blurred_buf, blurred.y_stride, blurred_buf,
689                      blurred.y_stride, recon_buf, recon.y_stride,
690                      resized_block_w, resized_block_h, 0.0);
691       }
692 
693       aom_read_vmaf_image(vmaf_context, &resized_source, &recon, bit_depth,
694                           index);
695 
696       // Restore recon buf
697       if (cpi->common.seq_params->use_highbitdepth) {
698         highbd_unsharp_rect(
699             CONVERT_TO_SHORTPTR(orig_buf), resized_source.y_stride,
700             CONVERT_TO_SHORTPTR(orig_buf), resized_source.y_stride,
701             CONVERT_TO_SHORTPTR(recon_buf), recon.y_stride, resized_block_w,
702             resized_block_h, 0.0, bit_depth);
703       } else {
704         unsharp_rect(orig_buf, resized_source.y_stride, orig_buf,
705                      resized_source.y_stride, recon_buf, recon.y_stride,
706                      resized_block_w, resized_block_h, 0.0);
707       }
708     }
709   }
710   aom_flush_vmaf_context(vmaf_context);
711   for (int row = 0; row < num_rows; ++row) {
712     for (int col = 0; col < num_cols; ++col) {
713       const int index = row * num_cols + col;
714       const double vmaf = aom_calc_vmaf_at_index(
715           vmaf_context, cpi->vmaf_info.vmaf_model, index);
716       const double dvmaf = kBaselineVmaf - vmaf;
717 
718       const double mse =
719           (double)sses[index] / (double)(resized_y_width * resized_y_height);
720       double weight;
721       const double eps = 0.01 / (num_rows * num_cols);
722       if (dvmaf < eps || mse < eps) {
723         weight = 1.0;
724       } else {
725         weight = mse / dvmaf;
726       }
727 
728       // Normalize it with a data fitted model.
729       weight = 6.0 * (1.0 - exp(-0.05 * weight)) + 0.8;
730       cpi->vmaf_info.rdmult_scaling_factors[index] = weight;
731     }
732   }
733 
734   aom_free_frame_buffer(&resized_source);
735   aom_free_frame_buffer(&blurred);
736   aom_close_vmaf_context(vmaf_context);
737   aom_free(sses);
738 }
739 
av1_set_vmaf_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,const int mi_row,const int mi_col,int * const rdmult)740 void av1_set_vmaf_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
741                          const BLOCK_SIZE bsize, const int mi_row,
742                          const int mi_col, int *const rdmult) {
743   const AV1_COMMON *const cm = &cpi->common;
744 
745   const int bsize_base = BLOCK_64X64;
746   const int num_mi_w = mi_size_wide[bsize_base];
747   const int num_mi_h = mi_size_high[bsize_base];
748   const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
749   const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
750   const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
751   const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
752   int row, col;
753   double num_of_mi = 0.0;
754   double geom_mean_of_scale = 0.0;
755 
756   for (row = mi_row / num_mi_w;
757        row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
758     for (col = mi_col / num_mi_h;
759          col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
760       const int index = row * num_cols + col;
761       geom_mean_of_scale += log(cpi->vmaf_info.rdmult_scaling_factors[index]);
762       num_of_mi += 1.0;
763     }
764   }
765   geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi);
766 
767   *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
768   *rdmult = AOMMAX(*rdmult, 0);
769   av1_set_error_per_bit(&x->errorperbit, *rdmult);
770 }
771 
772 // TODO(sdeng): replace them with the SIMD versions.
highbd_image_sad_c(const uint16_t * src,int src_stride,const uint16_t * ref,int ref_stride,int w,int h)773 static AOM_INLINE double highbd_image_sad_c(const uint16_t *src, int src_stride,
774                                             const uint16_t *ref, int ref_stride,
775                                             int w, int h) {
776   double accum = 0.0;
777   int i, j;
778 
779   for (i = 0; i < h; ++i) {
780     for (j = 0; j < w; ++j) {
781       double img1px = src[i * src_stride + j];
782       double img2px = ref[i * ref_stride + j];
783 
784       accum += fabs(img1px - img2px);
785     }
786   }
787 
788   return accum / (double)(h * w);
789 }
790 
image_sad_c(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int w,int h)791 static AOM_INLINE double image_sad_c(const uint8_t *src, int src_stride,
792                                      const uint8_t *ref, int ref_stride, int w,
793                                      int h) {
794   double accum = 0.0;
795   int i, j;
796 
797   for (i = 0; i < h; ++i) {
798     for (j = 0; j < w; ++j) {
799       double img1px = src[i * src_stride + j];
800       double img2px = ref[i * ref_stride + j];
801 
802       accum += fabs(img1px - img2px);
803     }
804   }
805 
806   return accum / (double)(h * w);
807 }
808 
calc_vmaf_motion_score(const AV1_COMP * const cpi,const AV1_COMMON * const cm,const YV12_BUFFER_CONFIG * const cur,const YV12_BUFFER_CONFIG * const last,const YV12_BUFFER_CONFIG * const next)809 static double calc_vmaf_motion_score(const AV1_COMP *const cpi,
810                                      const AV1_COMMON *const cm,
811                                      const YV12_BUFFER_CONFIG *const cur,
812                                      const YV12_BUFFER_CONFIG *const last,
813                                      const YV12_BUFFER_CONFIG *const next) {
814   const int y_width = cur->y_width;
815   const int y_height = cur->y_height;
816   YV12_BUFFER_CONFIG blurred_cur, blurred_last, blurred_next;
817   const int bit_depth = cpi->td.mb.e_mbd.bd;
818   const int ss_x = cur->subsampling_x;
819   const int ss_y = cur->subsampling_y;
820 
821   memset(&blurred_cur, 0, sizeof(blurred_cur));
822   memset(&blurred_last, 0, sizeof(blurred_last));
823   memset(&blurred_next, 0, sizeof(blurred_next));
824 
825   aom_alloc_frame_buffer(&blurred_cur, y_width, y_height, ss_x, ss_y,
826                          cm->seq_params->use_highbitdepth,
827                          cpi->oxcf.border_in_pixels,
828                          cm->features.byte_alignment, 0);
829   aom_alloc_frame_buffer(&blurred_last, y_width, y_height, ss_x, ss_y,
830                          cm->seq_params->use_highbitdepth,
831                          cpi->oxcf.border_in_pixels,
832                          cm->features.byte_alignment, 0);
833   aom_alloc_frame_buffer(&blurred_next, y_width, y_height, ss_x, ss_y,
834                          cm->seq_params->use_highbitdepth,
835                          cpi->oxcf.border_in_pixels,
836                          cm->features.byte_alignment, 0);
837 
838   gaussian_blur(bit_depth, cur, &blurred_cur);
839   gaussian_blur(bit_depth, last, &blurred_last);
840   if (next) gaussian_blur(bit_depth, next, &blurred_next);
841 
842   double motion1, motion2 = 65536.0;
843   if (cm->seq_params->use_highbitdepth) {
844     assert(blurred_cur.flags & YV12_FLAG_HIGHBITDEPTH);
845     assert(blurred_last.flags & YV12_FLAG_HIGHBITDEPTH);
846     const float scale_factor = 1.0f / (float)(1 << (bit_depth - 8));
847     motion1 = highbd_image_sad_c(CONVERT_TO_SHORTPTR(blurred_cur.y_buffer),
848                                  blurred_cur.y_stride,
849                                  CONVERT_TO_SHORTPTR(blurred_last.y_buffer),
850                                  blurred_last.y_stride, y_width, y_height) *
851               scale_factor;
852     if (next) {
853       assert(blurred_next.flags & YV12_FLAG_HIGHBITDEPTH);
854       motion2 = highbd_image_sad_c(CONVERT_TO_SHORTPTR(blurred_cur.y_buffer),
855                                    blurred_cur.y_stride,
856                                    CONVERT_TO_SHORTPTR(blurred_next.y_buffer),
857                                    blurred_next.y_stride, y_width, y_height) *
858                 scale_factor;
859     }
860   } else {
861     motion1 = image_sad_c(blurred_cur.y_buffer, blurred_cur.y_stride,
862                           blurred_last.y_buffer, blurred_last.y_stride, y_width,
863                           y_height);
864     if (next) {
865       motion2 = image_sad_c(blurred_cur.y_buffer, blurred_cur.y_stride,
866                             blurred_next.y_buffer, blurred_next.y_stride,
867                             y_width, y_height);
868     }
869   }
870 
871   aom_free_frame_buffer(&blurred_cur);
872   aom_free_frame_buffer(&blurred_last);
873   aom_free_frame_buffer(&blurred_next);
874 
875   return AOMMIN(motion1, motion2);
876 }
877 
get_neighbor_frames(const AV1_COMP * const cpi,YV12_BUFFER_CONFIG ** last,YV12_BUFFER_CONFIG ** next)878 static AOM_INLINE void get_neighbor_frames(const AV1_COMP *const cpi,
879                                            YV12_BUFFER_CONFIG **last,
880                                            YV12_BUFFER_CONFIG **next) {
881   const AV1_COMMON *const cm = &cpi->common;
882   const GF_GROUP *gf_group = &cpi->ppi->gf_group;
883   const int src_index =
884       cm->show_frame != 0 ? 0 : gf_group->arf_src_offset[cpi->gf_frame_index];
885   struct lookahead_entry *last_entry = av1_lookahead_peek(
886       cpi->ppi->lookahead, src_index - 1, cpi->compressor_stage);
887   struct lookahead_entry *next_entry = av1_lookahead_peek(
888       cpi->ppi->lookahead, src_index + 1, cpi->compressor_stage);
889   *next = &next_entry->img;
890   *last = cm->show_frame ? cpi->last_source : &last_entry->img;
891 }
892 
893 // Calculates the new qindex from the VMAF motion score. This is based on the
894 // observation: when the motion score becomes higher, the VMAF score of the
895 // same source and distorted frames would become higher.
av1_get_vmaf_base_qindex(const AV1_COMP * const cpi,int current_qindex)896 int av1_get_vmaf_base_qindex(const AV1_COMP *const cpi, int current_qindex) {
897   const AV1_COMMON *const cm = &cpi->common;
898   if (cm->current_frame.frame_number == 0 || cpi->oxcf.pass == 1) {
899     return current_qindex;
900   }
901   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
902   const int layer_depth =
903       AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
904   const double last_frame_ysse =
905       get_layer_value(cpi->vmaf_info.last_frame_ysse, layer_depth);
906   const double last_frame_vmaf =
907       get_layer_value(cpi->vmaf_info.last_frame_vmaf, layer_depth);
908   const int bit_depth = cpi->td.mb.e_mbd.bd;
909   const double approx_sse = last_frame_ysse / (double)((1 << (bit_depth - 8)) *
910                                                        (1 << (bit_depth - 8)));
911   const double approx_dvmaf = kBaselineVmaf - last_frame_vmaf;
912   const double sse_threshold =
913       0.01 * cpi->source->y_width * cpi->source->y_height;
914   const double vmaf_threshold = 0.01;
915   if (approx_sse < sse_threshold || approx_dvmaf < vmaf_threshold) {
916     return current_qindex;
917   }
918   YV12_BUFFER_CONFIG *cur_buf = cpi->source;
919   if (cm->show_frame == 0) {
920     const int src_index = gf_group->arf_src_offset[cpi->gf_frame_index];
921     struct lookahead_entry *cur_entry = av1_lookahead_peek(
922         cpi->ppi->lookahead, src_index, cpi->compressor_stage);
923     cur_buf = &cur_entry->img;
924   }
925   assert(cur_buf);
926 
927   YV12_BUFFER_CONFIG *next_buf, *last_buf;
928   get_neighbor_frames(cpi, &last_buf, &next_buf);
929   assert(last_buf);
930 
931   const double motion =
932       calc_vmaf_motion_score(cpi, cm, cur_buf, last_buf, next_buf);
933 
934   // Get dVMAF through a data fitted model.
935   const double dvmaf = 26.11 * (1.0 - exp(-0.06 * motion));
936   const double dsse = dvmaf * approx_sse / approx_dvmaf;
937 
938   const double beta = approx_sse / (dsse + approx_sse);
939   const int offset =
940       av1_get_deltaq_offset(cm->seq_params->bit_depth, current_qindex, beta);
941   int qindex = current_qindex + offset;
942 
943   qindex = AOMMIN(qindex, MAXQ);
944   qindex = AOMMAX(qindex, MINQ);
945 
946   return qindex;
947 }
948 
cal_approx_score(AV1_COMP * const cpi,double src_variance,double new_variance,double src_score,YV12_BUFFER_CONFIG * const src,YV12_BUFFER_CONFIG * const recon_sharpened)949 static AOM_INLINE double cal_approx_score(
950     AV1_COMP *const cpi, double src_variance, double new_variance,
951     double src_score, YV12_BUFFER_CONFIG *const src,
952     YV12_BUFFER_CONFIG *const recon_sharpened) {
953   double score;
954   const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
955   const bool cal_vmaf_neg =
956       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
957   aom_calc_vmaf(cpi->vmaf_info.vmaf_model, src, recon_sharpened, bit_depth,
958                 cal_vmaf_neg, &score);
959   return src_variance / new_variance * (score - src_score);
960 }
961 
find_best_frame_unsharp_amount_loop_neg(AV1_COMP * const cpi,double src_variance,double base_score,YV12_BUFFER_CONFIG * const src,YV12_BUFFER_CONFIG * const recon,YV12_BUFFER_CONFIG * const ref,YV12_BUFFER_CONFIG * const src_blurred,YV12_BUFFER_CONFIG * const recon_blurred,YV12_BUFFER_CONFIG * const src_sharpened,YV12_BUFFER_CONFIG * const recon_sharpened,FULLPEL_MV * mvs,double best_score,const double unsharp_amount_start,const double step_size,const int max_loop_count,const double max_amount)962 static double find_best_frame_unsharp_amount_loop_neg(
963     AV1_COMP *const cpi, double src_variance, double base_score,
964     YV12_BUFFER_CONFIG *const src, YV12_BUFFER_CONFIG *const recon,
965     YV12_BUFFER_CONFIG *const ref, YV12_BUFFER_CONFIG *const src_blurred,
966     YV12_BUFFER_CONFIG *const recon_blurred,
967     YV12_BUFFER_CONFIG *const src_sharpened,
968     YV12_BUFFER_CONFIG *const recon_sharpened, FULLPEL_MV *mvs,
969     double best_score, const double unsharp_amount_start,
970     const double step_size, const int max_loop_count, const double max_amount) {
971   const double min_amount = 0.0;
972   int loop_count = 0;
973   double approx_score = best_score;
974   double unsharp_amount = unsharp_amount_start;
975 
976   do {
977     best_score = approx_score;
978     unsharp_amount += step_size;
979     if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
980     unsharp(cpi, recon, recon_blurred, recon_sharpened, unsharp_amount);
981     unsharp(cpi, src, src_blurred, src_sharpened, unsharp_amount);
982     const double new_variance =
983         residual_frame_average_variance(cpi, src_sharpened, ref, mvs);
984     approx_score = cal_approx_score(cpi, src_variance, new_variance, base_score,
985                                     src, recon_sharpened);
986 
987     loop_count++;
988   } while (approx_score > best_score && loop_count < max_loop_count);
989   unsharp_amount =
990       approx_score > best_score ? unsharp_amount : unsharp_amount - step_size;
991 
992   return AOMMIN(max_amount, AOMMAX(unsharp_amount, min_amount));
993 }
994 
find_best_frame_unsharp_amount_neg(AV1_COMP * const cpi,YV12_BUFFER_CONFIG * const src,YV12_BUFFER_CONFIG * const recon,YV12_BUFFER_CONFIG * const ref,double base_score,const double unsharp_amount_start,const double step_size,const int max_loop_count,const double max_filter_amount)995 static double find_best_frame_unsharp_amount_neg(
996     AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const src,
997     YV12_BUFFER_CONFIG *const recon, YV12_BUFFER_CONFIG *const ref,
998     double base_score, const double unsharp_amount_start,
999     const double step_size, const int max_loop_count,
1000     const double max_filter_amount) {
1001   FULLPEL_MV *mvs = NULL;
1002   const double src_variance =
1003       residual_frame_average_variance(cpi, src, ref, mvs);
1004 
1005   const AV1_COMMON *const cm = &cpi->common;
1006   const int width = recon->y_width;
1007   const int height = recon->y_height;
1008   const int bit_depth = cpi->td.mb.e_mbd.bd;
1009   const int ss_x = recon->subsampling_x;
1010   const int ss_y = recon->subsampling_y;
1011 
1012   YV12_BUFFER_CONFIG src_blurred, recon_blurred, src_sharpened, recon_sharpened;
1013   memset(&recon_sharpened, 0, sizeof(recon_sharpened));
1014   memset(&src_sharpened, 0, sizeof(src_sharpened));
1015   memset(&recon_blurred, 0, sizeof(recon_blurred));
1016   memset(&src_blurred, 0, sizeof(src_blurred));
1017   aom_alloc_frame_buffer(&recon_sharpened, width, height, ss_x, ss_y,
1018                          cm->seq_params->use_highbitdepth,
1019                          cpi->oxcf.border_in_pixels,
1020                          cm->features.byte_alignment, 0);
1021   aom_alloc_frame_buffer(&src_sharpened, width, height, ss_x, ss_y,
1022                          cm->seq_params->use_highbitdepth,
1023                          cpi->oxcf.border_in_pixels,
1024                          cm->features.byte_alignment, 0);
1025   aom_alloc_frame_buffer(&recon_blurred, width, height, ss_x, ss_y,
1026                          cm->seq_params->use_highbitdepth,
1027                          cpi->oxcf.border_in_pixels,
1028                          cm->features.byte_alignment, 0);
1029   aom_alloc_frame_buffer(
1030       &src_blurred, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
1031       cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
1032 
1033   gaussian_blur(bit_depth, recon, &recon_blurred);
1034   gaussian_blur(bit_depth, src, &src_blurred);
1035 
1036   unsharp(cpi, recon, &recon_blurred, &recon_sharpened, unsharp_amount_start);
1037   unsharp(cpi, src, &src_blurred, &src_sharpened, unsharp_amount_start);
1038   const double variance_start =
1039       residual_frame_average_variance(cpi, &src_sharpened, ref, mvs);
1040   const double score_start = cal_approx_score(
1041       cpi, src_variance, variance_start, base_score, src, &recon_sharpened);
1042 
1043   const double unsharp_amount_next = unsharp_amount_start + step_size;
1044   unsharp(cpi, recon, &recon_blurred, &recon_sharpened, unsharp_amount_next);
1045   unsharp(cpi, src, &src_blurred, &src_sharpened, unsharp_amount_next);
1046   const double variance_next =
1047       residual_frame_average_variance(cpi, &src_sharpened, ref, mvs);
1048   const double score_next = cal_approx_score(cpi, src_variance, variance_next,
1049                                              base_score, src, &recon_sharpened);
1050 
1051   double unsharp_amount;
1052   if (score_next > score_start) {
1053     unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
1054         cpi, src_variance, base_score, src, recon, ref, &src_blurred,
1055         &recon_blurred, &src_sharpened, &recon_sharpened, mvs, score_next,
1056         unsharp_amount_next, step_size, max_loop_count, max_filter_amount);
1057   } else {
1058     unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
1059         cpi, src_variance, base_score, src, recon, ref, &src_blurred,
1060         &recon_blurred, &src_sharpened, &recon_sharpened, mvs, score_start,
1061         unsharp_amount_start, -step_size, max_loop_count, max_filter_amount);
1062   }
1063 
1064   aom_free_frame_buffer(&recon_sharpened);
1065   aom_free_frame_buffer(&src_sharpened);
1066   aom_free_frame_buffer(&recon_blurred);
1067   aom_free_frame_buffer(&src_blurred);
1068   aom_free(mvs);
1069   return unsharp_amount;
1070 }
1071 
av1_update_vmaf_curve(AV1_COMP * cpi)1072 void av1_update_vmaf_curve(AV1_COMP *cpi) {
1073   YV12_BUFFER_CONFIG *source = cpi->source;
1074   YV12_BUFFER_CONFIG *recon = &cpi->common.cur_frame->buf;
1075   const int bit_depth = cpi->td.mb.e_mbd.bd;
1076   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
1077   const int layer_depth =
1078       AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
1079   double base_score;
1080   const bool cal_vmaf_neg =
1081       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
1082   aom_calc_vmaf(cpi->vmaf_info.vmaf_model, source, recon, bit_depth,
1083                 cal_vmaf_neg, &base_score);
1084   cpi->vmaf_info.last_frame_vmaf[layer_depth] = base_score;
1085   if (cpi->common.seq_params->use_highbitdepth) {
1086     assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
1087     assert(recon->flags & YV12_FLAG_HIGHBITDEPTH);
1088     cpi->vmaf_info.last_frame_ysse[layer_depth] =
1089         (double)aom_highbd_get_y_sse(source, recon);
1090   } else {
1091     cpi->vmaf_info.last_frame_ysse[layer_depth] =
1092         (double)aom_get_y_sse(source, recon);
1093   }
1094 
1095   if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
1096     YV12_BUFFER_CONFIG *last, *next;
1097     get_neighbor_frames(cpi, &last, &next);
1098     double best_unsharp_amount_start =
1099         get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
1100     const int max_loop_count = 5;
1101     cpi->vmaf_info.last_frame_unsharp_amount[layer_depth] =
1102         find_best_frame_unsharp_amount_neg(cpi, source, recon, last, base_score,
1103                                            best_unsharp_amount_start, 0.025,
1104                                            max_loop_count, 1.01);
1105   }
1106 }
1107