• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 
14 #include "config/aom_config.h"
15 
16 #include "aom_util/aom_pthread.h"
17 
18 #if CONFIG_TFLITE
19 #include "tensorflow/lite/c/c_api.h"
20 #include "av1/encoder/deltaq4_model.c"
21 #endif
22 
23 #include "av1/common/common_data.h"
24 #include "av1/common/enums.h"
25 #include "av1/common/idct.h"
26 #include "av1/common/reconinter.h"
27 #include "av1/encoder/allintra_vis.h"
28 #include "av1/encoder/aq_variance.h"
29 #include "av1/encoder/encoder.h"
30 #include "av1/encoder/ethread.h"
31 #include "av1/encoder/hybrid_fwd_txfm.h"
32 #include "av1/encoder/model_rd.h"
33 #include "av1/encoder/rdopt_utils.h"
34 
35 #define MB_WIENER_PRED_BLOCK_SIZE BLOCK_128X128
36 #define MB_WIENER_PRED_BUF_STRIDE 128
37 
38 // Maximum delta-q range allowed for Variance Boost after scaling
39 #define VAR_BOOST_MAX_DELTAQ_RANGE 80
40 // Maximum quantization step boost allowed for Variance Boost
41 #define VAR_BOOST_MAX_BOOST 8.0
42 
av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON * cm,ThreadData * td)43 void av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON *cm, ThreadData *td) {
44   const int is_high_bitdepth = is_cur_buf_hbd(&td->mb.e_mbd);
45   assert(MB_WIENER_PRED_BLOCK_SIZE < BLOCK_SIZES_ALL);
46   const int buf_width = block_size_wide[MB_WIENER_PRED_BLOCK_SIZE];
47   const int buf_height = block_size_high[MB_WIENER_PRED_BLOCK_SIZE];
48   assert(buf_width == MB_WIENER_PRED_BUF_STRIDE);
49   const size_t buf_size =
50       (buf_width * buf_height * sizeof(*td->wiener_tmp_pred_buf))
51       << is_high_bitdepth;
52   CHECK_MEM_ERROR(cm, td->wiener_tmp_pred_buf, aom_memalign(32, buf_size));
53 }
54 
av1_dealloc_mb_wiener_var_pred_buf(ThreadData * td)55 void av1_dealloc_mb_wiener_var_pred_buf(ThreadData *td) {
56   aom_free(td->wiener_tmp_pred_buf);
57   td->wiener_tmp_pred_buf = NULL;
58 }
59 
av1_init_mb_wiener_var_buffer(AV1_COMP * cpi)60 void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) {
61   AV1_COMMON *cm = &cpi->common;
62 
63   // This block size is also used to determine number of workers in
64   // multi-threading. If it is changed, one needs to change it accordingly in
65   // "compute_num_ai_workers()".
66   cpi->weber_bsize = BLOCK_8X8;
67 
68   if (cpi->oxcf.enable_rate_guide_deltaq) {
69     if (cpi->mb_weber_stats && cpi->prep_rate_estimates &&
70         cpi->ext_rate_distribution)
71       return;
72   } else {
73     if (cpi->mb_weber_stats) return;
74   }
75 
76   CHECK_MEM_ERROR(cm, cpi->mb_weber_stats,
77                   aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
78                              sizeof(*cpi->mb_weber_stats)));
79 
80   if (cpi->oxcf.enable_rate_guide_deltaq) {
81     CHECK_MEM_ERROR(
82         cm, cpi->prep_rate_estimates,
83         aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
84                    sizeof(*cpi->prep_rate_estimates)));
85 
86     CHECK_MEM_ERROR(
87         cm, cpi->ext_rate_distribution,
88         aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
89                    sizeof(*cpi->ext_rate_distribution)));
90   }
91 }
92 
get_satd(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)93 static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
94                         int mi_col) {
95   AV1_COMMON *const cm = &cpi->common;
96   const int mi_wide = mi_size_wide[bsize];
97   const int mi_high = mi_size_high[bsize];
98 
99   const int mi_step = mi_size_wide[cpi->weber_bsize];
100   int mb_stride = cpi->frame_info.mi_cols;
101   int mb_count = 0;
102   int64_t satd = 0;
103 
104   for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
105     for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
106       if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
107         continue;
108 
109       satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
110                   .satd;
111       ++mb_count;
112     }
113   }
114 
115   if (mb_count) satd = (int)(satd / mb_count);
116   satd = AOMMAX(1, satd);
117 
118   return (int)satd;
119 }
120 
get_sse(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)121 static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
122                        int mi_col) {
123   AV1_COMMON *const cm = &cpi->common;
124   const int mi_wide = mi_size_wide[bsize];
125   const int mi_high = mi_size_high[bsize];
126 
127   const int mi_step = mi_size_wide[cpi->weber_bsize];
128   int mb_stride = cpi->frame_info.mi_cols;
129   int mb_count = 0;
130   int64_t distortion = 0;
131 
132   for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
133     for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
134       if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
135         continue;
136 
137       distortion +=
138           cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
139               .distortion;
140       ++mb_count;
141     }
142   }
143 
144   if (mb_count) distortion = (int)(distortion / mb_count);
145   distortion = AOMMAX(1, distortion);
146 
147   return (int)distortion;
148 }
149 
get_max_scale(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)150 static double get_max_scale(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
151                             int mi_row, int mi_col) {
152   const AV1_COMMON *const cm = &cpi->common;
153   const int mi_wide = mi_size_wide[bsize];
154   const int mi_high = mi_size_high[bsize];
155   const int mi_step = mi_size_wide[cpi->weber_bsize];
156   int mb_stride = cpi->frame_info.mi_cols;
157   double min_max_scale = 10.0;
158 
159   for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
160     for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
161       if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
162         continue;
163       const WeberStats *weber_stats =
164           &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
165       if (weber_stats->max_scale < 1.0) continue;
166       if (weber_stats->max_scale < min_max_scale)
167         min_max_scale = weber_stats->max_scale;
168     }
169   }
170   return min_max_scale;
171 }
172 
get_window_wiener_var(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)173 static int get_window_wiener_var(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
174                                  int mi_row, int mi_col) {
175   const AV1_COMMON *const cm = &cpi->common;
176   const int mi_wide = mi_size_wide[bsize];
177   const int mi_high = mi_size_high[bsize];
178 
179   const int mi_step = mi_size_wide[cpi->weber_bsize];
180   int sb_wiener_var = 0;
181   int mb_stride = cpi->frame_info.mi_cols;
182   int mb_count = 0;
183   double base_num = 1;
184   double base_den = 1;
185   double base_reg = 1;
186 
187   for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
188     for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
189       if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
190         continue;
191 
192       const WeberStats *weber_stats =
193           &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
194 
195       base_num += ((double)weber_stats->distortion) *
196                   sqrt((double)weber_stats->src_variance) *
197                   weber_stats->rec_pix_max;
198 
199       base_den += fabs(
200           weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) -
201           weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance));
202 
203       base_reg += sqrt((double)weber_stats->distortion) *
204                   sqrt((double)weber_stats->src_pix_max) * 0.1;
205       ++mb_count;
206     }
207   }
208 
209   sb_wiener_var =
210       (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count);
211   sb_wiener_var = AOMMAX(1, sb_wiener_var);
212 
213   return (int)sb_wiener_var;
214 }
215 
get_var_perceptual_ai(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)216 static int get_var_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
217                                  int mi_row, int mi_col) {
218   const AV1_COMMON *const cm = &cpi->common;
219   const int mi_wide = mi_size_wide[bsize];
220   const int mi_high = mi_size_high[bsize];
221 
222   int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col);
223 
224   if (mi_row >= (mi_high / 2)) {
225     sb_wiener_var =
226         AOMMIN(sb_wiener_var,
227                get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col));
228   }
229   if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) {
230     sb_wiener_var =
231         AOMMIN(sb_wiener_var,
232                get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col));
233   }
234   if (mi_col >= (mi_wide / 2)) {
235     sb_wiener_var =
236         AOMMIN(sb_wiener_var,
237                get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2));
238   }
239   if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) {
240     sb_wiener_var =
241         AOMMIN(sb_wiener_var,
242                get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2));
243   }
244 
245   return sb_wiener_var;
246 }
247 
rate_estimator(const tran_low_t * qcoeff,int eob,TX_SIZE tx_size)248 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
249   const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
250 
251   assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
252   int rate_cost = 1;
253 
254   for (int idx = 0; idx < eob; ++idx) {
255     int abs_level = abs(qcoeff[scan_order->scan[idx]]);
256     rate_cost += (int)(log1p(abs_level) / log(2.0)) + 1 + (abs_level > 0);
257   }
258 
259   return (rate_cost << AV1_PROB_COST_SHIFT);
260 }
261 
av1_calc_mb_wiener_var_row(AV1_COMP * const cpi,MACROBLOCK * x,MACROBLOCKD * xd,const int mi_row,int16_t * src_diff,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,double * sum_rec_distortion,double * sum_est_rate,uint8_t * pred_buffer)262 void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x,
263                                 MACROBLOCKD *xd, const int mi_row,
264                                 int16_t *src_diff, tran_low_t *coeff,
265                                 tran_low_t *qcoeff, tran_low_t *dqcoeff,
266                                 double *sum_rec_distortion,
267                                 double *sum_est_rate, uint8_t *pred_buffer) {
268   AV1_COMMON *const cm = &cpi->common;
269   uint8_t *buffer = cpi->source->y_buffer;
270   int buf_stride = cpi->source->y_stride;
271   MB_MODE_INFO mbmi;
272   memset(&mbmi, 0, sizeof(mbmi));
273   MB_MODE_INFO *mbmi_ptr = &mbmi;
274   xd->mi = &mbmi_ptr;
275   const BLOCK_SIZE bsize = cpi->weber_bsize;
276   const TX_SIZE tx_size = max_txsize_lookup[bsize];
277   const int block_size = tx_size_wide[tx_size];
278   const int coeff_count = block_size * block_size;
279   const int mb_step = mi_size_wide[bsize];
280   const BitDepthInfo bd_info = get_bit_depth_info(xd);
281   const MultiThreadInfo *const mt_info = &cpi->mt_info;
282   const AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
283   AV1EncRowMultiThreadSync *const intra_row_mt_sync =
284       &cpi->ppi->intra_row_mt_sync;
285   const int mi_cols = cm->mi_params.mi_cols;
286   const int mt_thread_id = mi_row / mb_step;
287   // TODO(chengchen): test different unit step size
288   const int mt_unit_step = mi_size_wide[MB_WIENER_MT_UNIT_SIZE];
289   const int mt_unit_cols = (mi_cols + (mt_unit_step >> 1)) / mt_unit_step;
290   int mt_unit_col = 0;
291   const int is_high_bitdepth = is_cur_buf_hbd(xd);
292 
293   uint8_t *dst_buffer = pred_buffer;
294   const int dst_buffer_stride = MB_WIENER_PRED_BUF_STRIDE;
295 
296   if (is_high_bitdepth) {
297     uint16_t *pred_buffer_16 = (uint16_t *)pred_buffer;
298     dst_buffer = CONVERT_TO_BYTEPTR(pred_buffer_16);
299   }
300 
301   for (int mi_col = 0; mi_col < mi_cols; mi_col += mb_step) {
302     if (mi_col % mt_unit_step == 0) {
303       intra_mt->intra_sync_read_ptr(intra_row_mt_sync, mt_thread_id,
304                                     mt_unit_col);
305 #if CONFIG_MULTITHREAD
306       const int num_workers =
307           AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
308       if (num_workers > 1) {
309         const AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
310         pthread_mutex_lock(enc_row_mt->mutex_);
311         const bool exit = enc_row_mt->mb_wiener_mt_exit;
312         pthread_mutex_unlock(enc_row_mt->mutex_);
313         // Stop further processing in case any worker has encountered an error.
314         if (exit) break;
315       }
316 #endif
317     }
318 
319     PREDICTION_MODE best_mode = DC_PRED;
320     int best_intra_cost = INT_MAX;
321     const int mi_width = mi_size_wide[bsize];
322     const int mi_height = mi_size_high[bsize];
323     set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
324                           mi_row, mi_col);
325     set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width,
326                    AOMMIN(mi_row + mi_height, cm->mi_params.mi_rows),
327                    AOMMIN(mi_col + mi_width, cm->mi_params.mi_cols));
328     set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize],
329                  av1_num_planes(cm));
330     xd->mi[0]->bsize = bsize;
331     xd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
332     // Set above and left mbmi to NULL as they are not available in the
333     // preprocessing stage.
334     // They are used to detemine intra edge filter types in intra prediction.
335     if (xd->up_available) {
336       xd->above_mbmi = NULL;
337     }
338     if (xd->left_available) {
339       xd->left_mbmi = NULL;
340     }
341     uint8_t *mb_buffer =
342         buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE;
343     for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END;
344          ++mode) {
345       // TODO(chengchen): Here we use src instead of reconstructed frame as
346       // the intra predictor to make single and multithread version match.
347       // Ideally we want to use the reconstructed.
348       av1_predict_intra_block(
349           xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
350           block_size, block_size, tx_size, mode, 0, 0, FILTER_INTRA_MODES,
351           mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
352       av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
353                          mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
354       av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
355       int intra_cost = aom_satd(coeff, coeff_count);
356       if (intra_cost < best_intra_cost) {
357         best_intra_cost = intra_cost;
358         best_mode = mode;
359       }
360     }
361 
362     av1_predict_intra_block(
363         xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
364         block_size, block_size, tx_size, best_mode, 0, 0, FILTER_INTRA_MODES,
365         mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
366     av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
367                        mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
368     av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
369 
370     const struct macroblock_plane *const p = &x->plane[0];
371     uint16_t eob;
372     const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
373     QUANT_PARAM quant_param;
374     int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
375     av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param);
376 #if CONFIG_AV1_HIGHBITDEPTH
377     if (is_cur_buf_hbd(xd)) {
378       av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
379                                     scan_order, &quant_param);
380     } else {
381       av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
382                              scan_order, &quant_param);
383     }
384 #else
385     av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, scan_order,
386                            &quant_param);
387 #endif  // CONFIG_AV1_HIGHBITDEPTH
388 
389     if (cpi->oxcf.enable_rate_guide_deltaq) {
390       const int rate_cost = rate_estimator(qcoeff, eob, tx_size);
391       cpi->prep_rate_estimates[(mi_row / mb_step) * cpi->frame_info.mi_cols +
392                                (mi_col / mb_step)] = rate_cost;
393     }
394 
395     av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer,
396                                 dst_buffer_stride, eob, 0);
397     WeberStats *weber_stats =
398         &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols +
399                              (mi_col / mb_step)];
400 
401     weber_stats->rec_pix_max = 1;
402     weber_stats->rec_variance = 0;
403     weber_stats->src_pix_max = 1;
404     weber_stats->src_variance = 0;
405     weber_stats->distortion = 0;
406 
407     int64_t src_mean = 0;
408     int64_t rec_mean = 0;
409     int64_t dist_mean = 0;
410 
411     for (int pix_row = 0; pix_row < block_size; ++pix_row) {
412       for (int pix_col = 0; pix_col < block_size; ++pix_col) {
413         int src_pix, rec_pix;
414 #if CONFIG_AV1_HIGHBITDEPTH
415         if (is_cur_buf_hbd(xd)) {
416           uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer);
417           uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer);
418           src_pix = src[pix_row * buf_stride + pix_col];
419           rec_pix = rec[pix_row * dst_buffer_stride + pix_col];
420         } else {
421           src_pix = mb_buffer[pix_row * buf_stride + pix_col];
422           rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
423         }
424 #else
425         src_pix = mb_buffer[pix_row * buf_stride + pix_col];
426         rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
427 #endif
428         src_mean += src_pix;
429         rec_mean += rec_pix;
430         dist_mean += src_pix - rec_pix;
431         weber_stats->src_variance += src_pix * src_pix;
432         weber_stats->rec_variance += rec_pix * rec_pix;
433         weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix);
434         weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix);
435         weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix);
436       }
437     }
438 
439     if (cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
440       *sum_rec_distortion += weber_stats->distortion;
441       int est_block_rate = 0;
442       int64_t est_block_dist = 0;
443       model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion,
444                                       pix_num, &est_block_rate,
445                                       &est_block_dist);
446       *sum_est_rate += est_block_rate;
447     }
448 
449     weber_stats->src_variance -= (src_mean * src_mean) / pix_num;
450     weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num;
451     weber_stats->distortion -= (dist_mean * dist_mean) / pix_num;
452     weber_stats->satd = best_intra_cost;
453 
454     qcoeff[0] = 0;
455     int max_scale = 0;
456     for (int idx = 1; idx < coeff_count; ++idx) {
457       const int abs_qcoeff = abs(qcoeff[idx]);
458       max_scale = AOMMAX(max_scale, abs_qcoeff);
459     }
460     weber_stats->max_scale = max_scale;
461 
462     if ((mi_col + mb_step) % mt_unit_step == 0 ||
463         (mi_col + mb_step) >= mi_cols) {
464       intra_mt->intra_sync_write_ptr(intra_row_mt_sync, mt_thread_id,
465                                      mt_unit_col, mt_unit_cols);
466       ++mt_unit_col;
467     }
468   }
469   // Set the pointer to null since mbmi is only allocated inside this function.
470   xd->mi = NULL;
471 }
472 
calc_mb_wiener_var(AV1_COMP * const cpi,double * sum_rec_distortion,double * sum_est_rate)473 static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion,
474                                double *sum_est_rate) {
475   MACROBLOCK *x = &cpi->td.mb;
476   MACROBLOCKD *xd = &x->e_mbd;
477   const BLOCK_SIZE bsize = cpi->weber_bsize;
478   const int mb_step = mi_size_wide[bsize];
479   DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]);
480   DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]);
481   DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]);
482   DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]);
483   for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
484     av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff,
485                                dqcoeff, sum_rec_distortion, sum_est_rate,
486                                cpi->td.wiener_tmp_pred_buf);
487   }
488 }
489 
estimate_wiener_var_norm(AV1_COMP * const cpi,const BLOCK_SIZE norm_block_size)490 static int64_t estimate_wiener_var_norm(AV1_COMP *const cpi,
491                                         const BLOCK_SIZE norm_block_size) {
492   const AV1_COMMON *const cm = &cpi->common;
493   int64_t norm_factor = 1;
494   assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_128X128);
495   const int norm_step = mi_size_wide[norm_block_size];
496   double sb_wiener_log = 0;
497   double sb_count = 0;
498   for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
499     for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) {
500       const int sb_wiener_var =
501           get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
502       const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
503       const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
504       const double scaled_satd = (double)satd / sqrt((double)sse);
505       sb_wiener_log += scaled_satd * log(sb_wiener_var);
506       sb_count += scaled_satd;
507     }
508   }
509   if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count));
510   norm_factor = AOMMAX(1, norm_factor);
511 
512   return norm_factor;
513 }
514 
automatic_intra_tools_off(AV1_COMP * cpi,const double sum_rec_distortion,const double sum_est_rate)515 static void automatic_intra_tools_off(AV1_COMP *cpi,
516                                       const double sum_rec_distortion,
517                                       const double sum_est_rate) {
518   if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return;
519 
520   // Thresholds
521   const int high_quality_qindex = 128;
522   const double high_quality_bpp = 2.0;
523   const double high_quality_dist_per_pix = 4.0;
524 
525   AV1_COMMON *const cm = &cpi->common;
526   const int qindex = cm->quant_params.base_qindex;
527   const double dist_per_pix =
528       (double)sum_rec_distortion / (cm->width * cm->height);
529   // The estimate bpp is not accurate, an empirical constant 100 is divided.
530   const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100);
531 
532   if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp &&
533       dist_per_pix < high_quality_dist_per_pix) {
534     cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0;
535     cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0;
536     cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0;
537     cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0;
538   }
539 }
540 
ext_rate_guided_quantization(AV1_COMP * cpi)541 static void ext_rate_guided_quantization(AV1_COMP *cpi) {
542   // Calculation uses 8x8.
543   const int mb_step = mi_size_wide[cpi->weber_bsize];
544   // Accumulate to 16x16, step size is in the unit of mi.
545   const int block_step = 4;
546 
547   const char *filename = cpi->oxcf.rate_distribution_info;
548   FILE *pfile = fopen(filename, "r");
549   if (pfile == NULL) {
550     assert(pfile != NULL);
551     return;
552   }
553 
554   double ext_rate_sum = 0.0;
555   for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
556     for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
557       float val;
558       const int fields_converted = fscanf(pfile, "%f", &val);
559       if (fields_converted != 1) {
560         assert(fields_converted == 1);
561         fclose(pfile);
562         return;
563       }
564       ext_rate_sum += val;
565       cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
566                                  (col / mb_step)] = val;
567     }
568   }
569   fclose(pfile);
570 
571   int uniform_rate_sum = 0;
572   for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
573     for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
574       int rate_sum = 0;
575       for (int r = 0; r < block_step; r += mb_step) {
576         for (int c = 0; c < block_step; c += mb_step) {
577           const int mi_row = row + r;
578           const int mi_col = col + c;
579           rate_sum += cpi->prep_rate_estimates[(mi_row / mb_step) *
580                                                    cpi->frame_info.mi_cols +
581                                                (mi_col / mb_step)];
582         }
583       }
584       uniform_rate_sum += rate_sum;
585     }
586   }
587 
588   const double scale = uniform_rate_sum / ext_rate_sum;
589   cpi->ext_rate_scale = scale;
590 }
591 
av1_set_mb_wiener_variance(AV1_COMP * cpi)592 void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
593   AV1_COMMON *const cm = &cpi->common;
594   const SequenceHeader *const seq_params = cm->seq_params;
595   if (aom_realloc_frame_buffer(
596           &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
597           seq_params->subsampling_y, seq_params->use_highbitdepth,
598           cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
599           NULL, cpi->alloc_pyramid, 0))
600     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
601                        "Failed to allocate frame buffer");
602   av1_alloc_mb_wiener_var_pred_buf(&cpi->common, &cpi->td);
603   cpi->norm_wiener_variance = 0;
604 
605   MACROBLOCK *x = &cpi->td.mb;
606   MACROBLOCKD *xd = &x->e_mbd;
607   // xd->mi needs to be setup since it is used in av1_frame_init_quantizer.
608   MB_MODE_INFO mbmi;
609   memset(&mbmi, 0, sizeof(mbmi));
610   MB_MODE_INFO *mbmi_ptr = &mbmi;
611   xd->mi = &mbmi_ptr;
612   cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level;
613   av1_frame_init_quantizer(cpi);
614 
615   double sum_rec_distortion = 0.0;
616   double sum_est_rate = 0.0;
617 
618   MultiThreadInfo *const mt_info = &cpi->mt_info;
619   const int num_workers =
620       AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
621   AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
622   intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read_dummy;
623   intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write_dummy;
624   // Calculate differential contrast for each block for the entire image.
625   // TODO(chengchen): properly accumulate the distortion and rate in
626   // av1_calc_mb_wiener_var_mt(). Until then, call calc_mb_wiener_var() if
627   // auto_intra_tools_off is true.
628   if (num_workers > 1 && !cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
629     intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read;
630     intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write;
631     av1_calc_mb_wiener_var_mt(cpi, num_workers, &sum_rec_distortion,
632                               &sum_est_rate);
633   } else {
634     calc_mb_wiener_var(cpi, &sum_rec_distortion, &sum_est_rate);
635   }
636 
637   // Determine whether to turn off several intra coding tools.
638   automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate);
639 
640   // Read external rate distribution and use it to guide delta quantization
641   if (cpi->oxcf.enable_rate_guide_deltaq) ext_rate_guided_quantization(cpi);
642 
643   const BLOCK_SIZE norm_block_size = cm->seq_params->sb_size;
644   cpi->norm_wiener_variance = estimate_wiener_var_norm(cpi, norm_block_size);
645   const int norm_step = mi_size_wide[norm_block_size];
646 
647   double sb_wiener_log = 0;
648   double sb_count = 0;
649   for (int its_cnt = 0; its_cnt < 2; ++its_cnt) {
650     sb_wiener_log = 0;
651     sb_count = 0;
652     for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
653       for (int mi_col = 0; mi_col < cm->mi_params.mi_cols;
654            mi_col += norm_step) {
655         int sb_wiener_var =
656             get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
657 
658         double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
659         double min_max_scale = AOMMAX(
660             1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
661 
662         beta = AOMMIN(beta, 4);
663         beta = AOMMAX(beta, 0.25);
664 
665         if (beta < 1 / min_max_scale) continue;
666 
667         sb_wiener_var = (int)(cpi->norm_wiener_variance / beta);
668 
669         int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
670         int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
671         double scaled_satd = (double)satd / sqrt((double)sse);
672         sb_wiener_log += scaled_satd * log(sb_wiener_var);
673         sb_count += scaled_satd;
674       }
675     }
676 
677     if (sb_count > 0)
678       cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count));
679     cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance);
680   }
681 
682   // Set the pointer to null since mbmi is only allocated inside this function.
683   xd->mi = NULL;
684   aom_free_frame_buffer(&cm->cur_frame->buf);
685   av1_dealloc_mb_wiener_var_pred_buf(&cpi->td);
686 }
687 
get_rate_guided_quantizer(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)688 static int get_rate_guided_quantizer(const AV1_COMP *const cpi,
689                                      BLOCK_SIZE bsize, int mi_row, int mi_col) {
690   // Calculation uses 8x8.
691   const int mb_step = mi_size_wide[cpi->weber_bsize];
692   // Accumulate to 16x16
693   const int block_step = mi_size_wide[BLOCK_16X16];
694   double sb_rate_hific = 0.0;
695   double sb_rate_uniform = 0.0;
696   for (int row = mi_row; row < mi_row + mi_size_wide[bsize];
697        row += block_step) {
698     for (int col = mi_col; col < mi_col + mi_size_high[bsize];
699          col += block_step) {
700       sb_rate_hific +=
701           cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
702                                      (col / mb_step)];
703 
704       for (int r = 0; r < block_step; r += mb_step) {
705         for (int c = 0; c < block_step; c += mb_step) {
706           const int this_row = row + r;
707           const int this_col = col + c;
708           sb_rate_uniform +=
709               cpi->prep_rate_estimates[(this_row / mb_step) *
710                                            cpi->frame_info.mi_cols +
711                                        (this_col / mb_step)];
712         }
713       }
714     }
715   }
716   sb_rate_hific *= cpi->ext_rate_scale;
717 
718   const double weight = 1.0;
719   const double rate_diff =
720       weight * (sb_rate_hific - sb_rate_uniform) / sb_rate_uniform;
721   double scale = pow(2, rate_diff);
722 
723   scale = scale * scale;
724   double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
725   scale = 1.0 / AOMMIN(1.0 / scale, min_max_scale);
726 
727   const AV1_COMMON *const cm = &cpi->common;
728   const int base_qindex = cm->quant_params.base_qindex;
729   int offset =
730       av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, scale);
731   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
732   const int max_offset = delta_q_info->delta_q_res * 10;
733   offset = AOMMIN(offset, max_offset - 1);
734   offset = AOMMAX(offset, -max_offset + 1);
735   int qindex = cm->quant_params.base_qindex + offset;
736   qindex = AOMMIN(qindex, MAXQ);
737   qindex = AOMMAX(qindex, MINQ);
738   if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
739 
740   return qindex;
741 }
742 
av1_get_sbq_perceptual_ai(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)743 int av1_get_sbq_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
744                               int mi_row, int mi_col) {
745   if (cpi->oxcf.enable_rate_guide_deltaq) {
746     return get_rate_guided_quantizer(cpi, bsize, mi_row, mi_col);
747   }
748 
749   const AV1_COMMON *const cm = &cpi->common;
750   const int base_qindex = cm->quant_params.base_qindex;
751   int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col);
752   int offset = 0;
753   double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
754   double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
755   beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
756 
757   // Cap beta such that the delta q value is not much far away from the base q.
758   beta = AOMMIN(beta, 4);
759   beta = AOMMAX(beta, 0.25);
760   offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
761   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
762   offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1);
763   offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1);
764   int qindex = cm->quant_params.base_qindex + offset;
765   qindex = AOMMIN(qindex, MAXQ);
766   qindex = AOMMAX(qindex, MINQ);
767   if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
768 
769   return qindex;
770 }
771 
av1_init_mb_ur_var_buffer(AV1_COMP * cpi)772 void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) {
773   AV1_COMMON *cm = &cpi->common;
774 
775   if (cpi->mb_delta_q) return;
776 
777   CHECK_MEM_ERROR(cm, cpi->mb_delta_q,
778                   aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols,
779                              sizeof(*cpi->mb_delta_q)));
780 }
781 
782 #if CONFIG_TFLITE
model_predict(BLOCK_SIZE block_size,int num_cols,int num_rows,int bit_depth,uint8_t * y_buffer,int y_stride,float * predicts0,float * predicts1)783 static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows,
784                          int bit_depth, uint8_t *y_buffer, int y_stride,
785                          float *predicts0, float *predicts1) {
786   // Create the model and interpreter options.
787   TfLiteModel *model =
788       TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize);
789   if (model == NULL) return 1;
790 
791   TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate();
792   TfLiteInterpreterOptionsSetNumThreads(options, 2);
793   if (options == NULL) {
794     TfLiteModelDelete(model);
795     return 1;
796   }
797 
798   // Create the interpreter.
799   TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
800   if (interpreter == NULL) {
801     TfLiteInterpreterOptionsDelete(options);
802     TfLiteModelDelete(model);
803     return 1;
804   }
805 
806   // Allocate tensors and populate the input tensor data.
807   TfLiteInterpreterAllocateTensors(interpreter);
808   TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0);
809   if (input_tensor == NULL) {
810     TfLiteInterpreterDelete(interpreter);
811     TfLiteInterpreterOptionsDelete(options);
812     TfLiteModelDelete(model);
813     return 1;
814   }
815 
816   size_t input_size = TfLiteTensorByteSize(input_tensor);
817   float *input_data = aom_calloc(input_size, 1);
818   if (input_data == NULL) {
819     TfLiteInterpreterDelete(interpreter);
820     TfLiteInterpreterOptionsDelete(options);
821     TfLiteModelDelete(model);
822     return 1;
823   }
824 
825   const int num_mi_w = mi_size_wide[block_size];
826   const int num_mi_h = mi_size_high[block_size];
827   for (int row = 0; row < num_rows; ++row) {
828     for (int col = 0; col < num_cols; ++col) {
829       const int row_offset = (row * num_mi_h) << 2;
830       const int col_offset = (col * num_mi_w) << 2;
831 
832       uint8_t *buf = y_buffer + row_offset * y_stride + col_offset;
833       int r = row_offset, pos = 0;
834       const float base = (float)((1 << bit_depth) - 1);
835       while (r < row_offset + (num_mi_h << 2)) {
836         for (int c = 0; c < (num_mi_w << 2); ++c) {
837           input_data[pos++] = bit_depth > 8
838                                   ? (float)*CONVERT_TO_SHORTPTR(buf + c) / base
839                                   : (float)*(buf + c) / base;
840         }
841         buf += y_stride;
842         ++r;
843       }
844       TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size);
845 
846       // Execute inference.
847       if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) {
848         TfLiteInterpreterDelete(interpreter);
849         TfLiteInterpreterOptionsDelete(options);
850         TfLiteModelDelete(model);
851         return 1;
852       }
853 
854       // Extract the output tensor data.
855       const TfLiteTensor *output_tensor =
856           TfLiteInterpreterGetOutputTensor(interpreter, 0);
857       if (output_tensor == NULL) {
858         TfLiteInterpreterDelete(interpreter);
859         TfLiteInterpreterOptionsDelete(options);
860         TfLiteModelDelete(model);
861         return 1;
862       }
863 
864       size_t output_size = TfLiteTensorByteSize(output_tensor);
865       float output_data[2];
866 
867       TfLiteTensorCopyToBuffer(output_tensor, output_data, output_size);
868       predicts0[row * num_cols + col] = output_data[0];
869       predicts1[row * num_cols + col] = output_data[1];
870     }
871   }
872 
873   // Dispose of the model and interpreter objects.
874   TfLiteInterpreterDelete(interpreter);
875   TfLiteInterpreterOptionsDelete(options);
876   TfLiteModelDelete(model);
877   aom_free(input_data);
878   return 0;
879 }
880 
av1_set_mb_ur_variance(AV1_COMP * cpi)881 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
882   const AV1_COMMON *cm = &cpi->common;
883   const CommonModeInfoParams *const mi_params = &cm->mi_params;
884   uint8_t *y_buffer = cpi->source->y_buffer;
885   const int y_stride = cpi->source->y_stride;
886   const int block_size = cpi->common.seq_params->sb_size;
887   const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
888 
889   const int num_mi_w = mi_size_wide[block_size];
890   const int num_mi_h = mi_size_high[block_size];
891   const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
892   const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
893 
894   // TODO(sdeng): fit a better model_1; disable it at this time.
895   float *mb_delta_q0, *mb_delta_q1, delta_q_avg0 = 0.0f;
896   CHECK_MEM_ERROR(cm, mb_delta_q0,
897                   aom_calloc(num_rows * num_cols, sizeof(float)));
898   CHECK_MEM_ERROR(cm, mb_delta_q1,
899                   aom_calloc(num_rows * num_cols, sizeof(float)));
900 
901   if (model_predict(block_size, num_cols, num_rows, bit_depth, y_buffer,
902                     y_stride, mb_delta_q0, mb_delta_q1)) {
903     aom_internal_error(cm->error, AOM_CODEC_ERROR,
904                        "Failed to call TFlite functions.");
905   }
906 
907   // Loop through each SB block.
908   for (int row = 0; row < num_rows; ++row) {
909     for (int col = 0; col < num_cols; ++col) {
910       const int index = row * num_cols + col;
911       delta_q_avg0 += mb_delta_q0[index];
912     }
913   }
914 
915   delta_q_avg0 /= (float)(num_rows * num_cols);
916 
917   float scaling_factor;
918   const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ;
919   if (cq_level < delta_q_avg0) {
920     scaling_factor = cq_level / delta_q_avg0;
921   } else {
922     scaling_factor = 1.0f - (cq_level - delta_q_avg0) / (1.0f - delta_q_avg0);
923   }
924 
925   for (int row = 0; row < num_rows; ++row) {
926     for (int col = 0; col < num_cols; ++col) {
927       const int index = row * num_cols + col;
928       cpi->mb_delta_q[index] =
929           RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0f * (float)MAXQ *
930                scaling_factor * (mb_delta_q0[index] - delta_q_avg0));
931     }
932   }
933 
934   aom_free(mb_delta_q0);
935   aom_free(mb_delta_q1);
936 }
937 #else  // !CONFIG_TFLITE
av1_set_mb_ur_variance(AV1_COMP * cpi)938 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
939   const AV1_COMMON *cm = &cpi->common;
940   const CommonModeInfoParams *const mi_params = &cm->mi_params;
941   const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
942   uint8_t *y_buffer = cpi->source->y_buffer;
943   const int y_stride = cpi->source->y_stride;
944   const int block_size = cpi->common.seq_params->sb_size;
945 
946   const int num_mi_w = mi_size_wide[block_size];
947   const int num_mi_h = mi_size_high[block_size];
948   const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
949   const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
950 
951   int *mb_delta_q[2];
952   CHECK_MEM_ERROR(cm, mb_delta_q[0],
953                   aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0])));
954   CHECK_MEM_ERROR(cm, mb_delta_q[1],
955                   aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1])));
956 
957   // Approximates the model change between current version (Spet 2021) and the
958   // baseline (July 2021).
959   const double model_change[] = { 3.0, 3.0 };
960   // The following parameters are fitted from user labeled data.
961   const double a[] = { -24.50 * 4.0, -17.20 * 4.0 };
962   const double b[] = { 0.004898, 0.003093 };
963   const double c[] = { (29.932 + model_change[0]) * 4.0,
964                        (42.100 + model_change[1]) * 4.0 };
965   int delta_q_avg[2] = { 0, 0 };
966   // Loop through each SB block.
967   for (int row = 0; row < num_rows; ++row) {
968     for (int col = 0; col < num_cols; ++col) {
969       double var = 0.0, num_of_var = 0.0;
970       const int index = row * num_cols + col;
971 
972       // Loop through each 8x8 block.
973       for (int mi_row = row * num_mi_h;
974            mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h;
975            mi_row += 2) {
976         for (int mi_col = col * num_mi_w;
977              mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w;
978              mi_col += 2) {
979           struct buf_2d buf;
980           const int row_offset_y = mi_row << 2;
981           const int col_offset_y = mi_col << 2;
982 
983           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
984           buf.stride = y_stride;
985 
986           unsigned int block_variance;
987           block_variance = av1_get_perpixel_variance_facade(
988               cpi, xd, &buf, BLOCK_8X8, AOM_PLANE_Y);
989 
990           block_variance = AOMMAX(block_variance, 1);
991           var += log((double)block_variance);
992           num_of_var += 1.0;
993         }
994       }
995       var = exp(var / num_of_var);
996       mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]);
997       mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]);
998       delta_q_avg[0] += mb_delta_q[0][index];
999       delta_q_avg[1] += mb_delta_q[1][index];
1000     }
1001   }
1002 
1003   delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols));
1004   delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols));
1005 
1006   int model_idx;
1007   double scaling_factor;
1008   const int cq_level = cpi->oxcf.rc_cfg.cq_level;
1009   if (cq_level < delta_q_avg[0]) {
1010     model_idx = 0;
1011     scaling_factor = (double)cq_level / delta_q_avg[0];
1012   } else if (cq_level < delta_q_avg[1]) {
1013     model_idx = 2;
1014     scaling_factor =
1015         (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]);
1016   } else {
1017     model_idx = 1;
1018     scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]);
1019   }
1020 
1021   const double new_delta_q_avg =
1022       delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]);
1023   for (int row = 0; row < num_rows; ++row) {
1024     for (int col = 0; col < num_cols; ++col) {
1025       const int index = row * num_cols + col;
1026       if (model_idx == 2) {
1027         const double delta_q =
1028             mb_delta_q[0][index] +
1029             scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]);
1030         cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength /
1031                                       100.0 * (delta_q - new_delta_q_avg));
1032       } else {
1033         cpi->mb_delta_q[index] = RINT(
1034             (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor *
1035             (mb_delta_q[model_idx][index] - delta_q_avg[model_idx]));
1036       }
1037     }
1038   }
1039 
1040   aom_free(mb_delta_q[0]);
1041   aom_free(mb_delta_q[1]);
1042 }
1043 #endif
1044 
av1_get_sbq_user_rating_based(const AV1_COMP * const cpi,int mi_row,int mi_col)1045 int av1_get_sbq_user_rating_based(const AV1_COMP *const cpi, int mi_row,
1046                                   int mi_col) {
1047   const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
1048   const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
1049   const AV1_COMMON *const cm = &cpi->common;
1050   const int base_qindex = cm->quant_params.base_qindex;
1051   if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex;
1052 
1053   const int num_mi_w = mi_size_wide[bsize];
1054   const int num_mi_h = mi_size_high[bsize];
1055   const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
1056   const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w);
1057   const int delta_q = cpi->mb_delta_q[index];
1058 
1059   int qindex = base_qindex + delta_q;
1060   qindex = AOMMIN(qindex, MAXQ);
1061   qindex = AOMMAX(qindex, MINQ + 1);
1062 
1063   return qindex;
1064 }
1065 
1066 #if !CONFIG_REALTIME_ONLY
1067 
1068 // Variance Boost: a variance adaptive quantization implementation
1069 // SVT-AV1 appendix with an overview and a graphical, step-by-step explanation
1070 // of the implementation
1071 // https://gitlab.com/AOMediaCodec/SVT-AV1/-/blob/master/Docs/Appendix-Variance-Boost.md
av1_get_sbq_variance_boost(const AV1_COMP * cpi,const MACROBLOCK * x)1072 int av1_get_sbq_variance_boost(const AV1_COMP *cpi, const MACROBLOCK *x) {
1073   const AV1_COMMON *cm = &cpi->common;
1074   const int base_qindex = cm->quant_params.base_qindex;
1075   const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth;
1076 
1077   // Variance Boost only supports 64x64 SBs.
1078   assert(cm->seq_params->sb_size == BLOCK_64X64);
1079 
1080   // Strength is currently hard-coded and optimized for still pictures. In the
1081   // future, we might want to expose this as a parameter that can be fine-tuned
1082   // by the caller.
1083   const int strength = 3;
1084   unsigned int variance = av1_get_variance_boost_block_variance(cpi, x);
1085 
1086   // Variance = 0 areas are either completely flat patches or have very fine
1087   // gradients. Boost these blocks as if they have a variance of 1.
1088   if (variance == 0) {
1089     variance = 1;
1090   }
1091 
1092   // Compute a boost based on a fast-growing formula.
1093   // High and medium variance SBs essentially get no boost, while lower variance
1094   // SBs get increasingly stronger boosts.
1095   assert(strength >= 1 && strength <= 4);
1096 
1097   // Still picture curve, with variance crossover point at 1024.
1098   double qstep_ratio = 0.15 * strength * (-log2((double)variance) + 10.0) + 1.0;
1099   qstep_ratio = fclamp(qstep_ratio, 1.0, VAR_BOOST_MAX_BOOST);
1100 
1101   double base_q = av1_convert_qindex_to_q(base_qindex, bit_depth);
1102   double target_q = base_q / qstep_ratio;
1103   int target_qindex = av1_convert_q_to_qindex(target_q, bit_depth);
1104 
1105   // Determine the SB's delta_q boost by computing an (unscaled) delta_q from
1106   // the base and target q values, then scale that delta_q according to the
1107   // frame's base qindex.
1108   // The scaling coefficients were chosen empirically to maximize SSIMULACRA 2
1109   // scores, 10th percentile scores, and subjective quality. Boosts become
1110   // smaller (for a given variance) the lower the base qindex.
1111   int boost = (int)round((base_qindex + 544.0) * (base_qindex - target_qindex) /
1112                          1279.0);
1113   boost = AOMMIN(VAR_BOOST_MAX_DELTAQ_RANGE, boost);
1114 
1115   // Variance Boost was designed to always operate in the lossy domain, so MINQ
1116   // is excluded.
1117   int sb_qindex = AOMMAX(base_qindex - boost, MINQ + 1);
1118 
1119   return sb_qindex;
1120 }
1121 #endif
1122