1 /*
2 * Copyright (c) 2021, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13
14 #include "config/aom_config.h"
15
16 #include "aom_util/aom_pthread.h"
17
18 #if CONFIG_TFLITE
19 #include "tensorflow/lite/c/c_api.h"
20 #include "av1/encoder/deltaq4_model.c"
21 #endif
22
23 #include "av1/common/common_data.h"
24 #include "av1/common/enums.h"
25 #include "av1/common/idct.h"
26 #include "av1/common/reconinter.h"
27 #include "av1/encoder/allintra_vis.h"
28 #include "av1/encoder/aq_variance.h"
29 #include "av1/encoder/encoder.h"
30 #include "av1/encoder/ethread.h"
31 #include "av1/encoder/hybrid_fwd_txfm.h"
32 #include "av1/encoder/model_rd.h"
33 #include "av1/encoder/rdopt_utils.h"
34
35 #define MB_WIENER_PRED_BLOCK_SIZE BLOCK_128X128
36 #define MB_WIENER_PRED_BUF_STRIDE 128
37
38 // Maximum delta-q range allowed for Variance Boost after scaling
39 #define VAR_BOOST_MAX_DELTAQ_RANGE 80
40 // Maximum quantization step boost allowed for Variance Boost
41 #define VAR_BOOST_MAX_BOOST 8.0
42
av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON * cm,ThreadData * td)43 void av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON *cm, ThreadData *td) {
44 const int is_high_bitdepth = is_cur_buf_hbd(&td->mb.e_mbd);
45 assert(MB_WIENER_PRED_BLOCK_SIZE < BLOCK_SIZES_ALL);
46 const int buf_width = block_size_wide[MB_WIENER_PRED_BLOCK_SIZE];
47 const int buf_height = block_size_high[MB_WIENER_PRED_BLOCK_SIZE];
48 assert(buf_width == MB_WIENER_PRED_BUF_STRIDE);
49 const size_t buf_size =
50 (buf_width * buf_height * sizeof(*td->wiener_tmp_pred_buf))
51 << is_high_bitdepth;
52 CHECK_MEM_ERROR(cm, td->wiener_tmp_pred_buf, aom_memalign(32, buf_size));
53 }
54
av1_dealloc_mb_wiener_var_pred_buf(ThreadData * td)55 void av1_dealloc_mb_wiener_var_pred_buf(ThreadData *td) {
56 aom_free(td->wiener_tmp_pred_buf);
57 td->wiener_tmp_pred_buf = NULL;
58 }
59
av1_init_mb_wiener_var_buffer(AV1_COMP * cpi)60 void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) {
61 AV1_COMMON *cm = &cpi->common;
62
63 // This block size is also used to determine number of workers in
64 // multi-threading. If it is changed, one needs to change it accordingly in
65 // "compute_num_ai_workers()".
66 cpi->weber_bsize = BLOCK_8X8;
67
68 if (cpi->oxcf.enable_rate_guide_deltaq) {
69 if (cpi->mb_weber_stats && cpi->prep_rate_estimates &&
70 cpi->ext_rate_distribution)
71 return;
72 } else {
73 if (cpi->mb_weber_stats) return;
74 }
75
76 CHECK_MEM_ERROR(cm, cpi->mb_weber_stats,
77 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
78 sizeof(*cpi->mb_weber_stats)));
79
80 if (cpi->oxcf.enable_rate_guide_deltaq) {
81 CHECK_MEM_ERROR(
82 cm, cpi->prep_rate_estimates,
83 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
84 sizeof(*cpi->prep_rate_estimates)));
85
86 CHECK_MEM_ERROR(
87 cm, cpi->ext_rate_distribution,
88 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
89 sizeof(*cpi->ext_rate_distribution)));
90 }
91 }
92
get_satd(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)93 static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
94 int mi_col) {
95 AV1_COMMON *const cm = &cpi->common;
96 const int mi_wide = mi_size_wide[bsize];
97 const int mi_high = mi_size_high[bsize];
98
99 const int mi_step = mi_size_wide[cpi->weber_bsize];
100 int mb_stride = cpi->frame_info.mi_cols;
101 int mb_count = 0;
102 int64_t satd = 0;
103
104 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
105 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
106 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
107 continue;
108
109 satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
110 .satd;
111 ++mb_count;
112 }
113 }
114
115 if (mb_count) satd = (int)(satd / mb_count);
116 satd = AOMMAX(1, satd);
117
118 return (int)satd;
119 }
120
get_sse(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)121 static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
122 int mi_col) {
123 AV1_COMMON *const cm = &cpi->common;
124 const int mi_wide = mi_size_wide[bsize];
125 const int mi_high = mi_size_high[bsize];
126
127 const int mi_step = mi_size_wide[cpi->weber_bsize];
128 int mb_stride = cpi->frame_info.mi_cols;
129 int mb_count = 0;
130 int64_t distortion = 0;
131
132 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
133 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
134 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
135 continue;
136
137 distortion +=
138 cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
139 .distortion;
140 ++mb_count;
141 }
142 }
143
144 if (mb_count) distortion = (int)(distortion / mb_count);
145 distortion = AOMMAX(1, distortion);
146
147 return (int)distortion;
148 }
149
get_max_scale(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)150 static double get_max_scale(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
151 int mi_row, int mi_col) {
152 const AV1_COMMON *const cm = &cpi->common;
153 const int mi_wide = mi_size_wide[bsize];
154 const int mi_high = mi_size_high[bsize];
155 const int mi_step = mi_size_wide[cpi->weber_bsize];
156 int mb_stride = cpi->frame_info.mi_cols;
157 double min_max_scale = 10.0;
158
159 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
160 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
161 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
162 continue;
163 const WeberStats *weber_stats =
164 &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
165 if (weber_stats->max_scale < 1.0) continue;
166 if (weber_stats->max_scale < min_max_scale)
167 min_max_scale = weber_stats->max_scale;
168 }
169 }
170 return min_max_scale;
171 }
172
get_window_wiener_var(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)173 static int get_window_wiener_var(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
174 int mi_row, int mi_col) {
175 const AV1_COMMON *const cm = &cpi->common;
176 const int mi_wide = mi_size_wide[bsize];
177 const int mi_high = mi_size_high[bsize];
178
179 const int mi_step = mi_size_wide[cpi->weber_bsize];
180 int sb_wiener_var = 0;
181 int mb_stride = cpi->frame_info.mi_cols;
182 int mb_count = 0;
183 double base_num = 1;
184 double base_den = 1;
185 double base_reg = 1;
186
187 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
188 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
189 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
190 continue;
191
192 const WeberStats *weber_stats =
193 &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
194
195 base_num += ((double)weber_stats->distortion) *
196 sqrt((double)weber_stats->src_variance) *
197 weber_stats->rec_pix_max;
198
199 base_den += fabs(
200 weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) -
201 weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance));
202
203 base_reg += sqrt((double)weber_stats->distortion) *
204 sqrt((double)weber_stats->src_pix_max) * 0.1;
205 ++mb_count;
206 }
207 }
208
209 sb_wiener_var =
210 (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count);
211 sb_wiener_var = AOMMAX(1, sb_wiener_var);
212
213 return (int)sb_wiener_var;
214 }
215
get_var_perceptual_ai(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)216 static int get_var_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
217 int mi_row, int mi_col) {
218 const AV1_COMMON *const cm = &cpi->common;
219 const int mi_wide = mi_size_wide[bsize];
220 const int mi_high = mi_size_high[bsize];
221
222 int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col);
223
224 if (mi_row >= (mi_high / 2)) {
225 sb_wiener_var =
226 AOMMIN(sb_wiener_var,
227 get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col));
228 }
229 if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) {
230 sb_wiener_var =
231 AOMMIN(sb_wiener_var,
232 get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col));
233 }
234 if (mi_col >= (mi_wide / 2)) {
235 sb_wiener_var =
236 AOMMIN(sb_wiener_var,
237 get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2));
238 }
239 if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) {
240 sb_wiener_var =
241 AOMMIN(sb_wiener_var,
242 get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2));
243 }
244
245 return sb_wiener_var;
246 }
247
rate_estimator(const tran_low_t * qcoeff,int eob,TX_SIZE tx_size)248 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
249 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
250
251 assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
252 int rate_cost = 1;
253
254 for (int idx = 0; idx < eob; ++idx) {
255 int abs_level = abs(qcoeff[scan_order->scan[idx]]);
256 rate_cost += (int)(log1p(abs_level) / log(2.0)) + 1 + (abs_level > 0);
257 }
258
259 return (rate_cost << AV1_PROB_COST_SHIFT);
260 }
261
av1_calc_mb_wiener_var_row(AV1_COMP * const cpi,MACROBLOCK * x,MACROBLOCKD * xd,const int mi_row,int16_t * src_diff,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,double * sum_rec_distortion,double * sum_est_rate,uint8_t * pred_buffer)262 void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x,
263 MACROBLOCKD *xd, const int mi_row,
264 int16_t *src_diff, tran_low_t *coeff,
265 tran_low_t *qcoeff, tran_low_t *dqcoeff,
266 double *sum_rec_distortion,
267 double *sum_est_rate, uint8_t *pred_buffer) {
268 AV1_COMMON *const cm = &cpi->common;
269 uint8_t *buffer = cpi->source->y_buffer;
270 int buf_stride = cpi->source->y_stride;
271 MB_MODE_INFO mbmi;
272 memset(&mbmi, 0, sizeof(mbmi));
273 MB_MODE_INFO *mbmi_ptr = &mbmi;
274 xd->mi = &mbmi_ptr;
275 const BLOCK_SIZE bsize = cpi->weber_bsize;
276 const TX_SIZE tx_size = max_txsize_lookup[bsize];
277 const int block_size = tx_size_wide[tx_size];
278 const int coeff_count = block_size * block_size;
279 const int mb_step = mi_size_wide[bsize];
280 const BitDepthInfo bd_info = get_bit_depth_info(xd);
281 const MultiThreadInfo *const mt_info = &cpi->mt_info;
282 const AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
283 AV1EncRowMultiThreadSync *const intra_row_mt_sync =
284 &cpi->ppi->intra_row_mt_sync;
285 const int mi_cols = cm->mi_params.mi_cols;
286 const int mt_thread_id = mi_row / mb_step;
287 // TODO(chengchen): test different unit step size
288 const int mt_unit_step = mi_size_wide[MB_WIENER_MT_UNIT_SIZE];
289 const int mt_unit_cols = (mi_cols + (mt_unit_step >> 1)) / mt_unit_step;
290 int mt_unit_col = 0;
291 const int is_high_bitdepth = is_cur_buf_hbd(xd);
292
293 uint8_t *dst_buffer = pred_buffer;
294 const int dst_buffer_stride = MB_WIENER_PRED_BUF_STRIDE;
295
296 if (is_high_bitdepth) {
297 uint16_t *pred_buffer_16 = (uint16_t *)pred_buffer;
298 dst_buffer = CONVERT_TO_BYTEPTR(pred_buffer_16);
299 }
300
301 for (int mi_col = 0; mi_col < mi_cols; mi_col += mb_step) {
302 if (mi_col % mt_unit_step == 0) {
303 intra_mt->intra_sync_read_ptr(intra_row_mt_sync, mt_thread_id,
304 mt_unit_col);
305 #if CONFIG_MULTITHREAD
306 const int num_workers =
307 AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
308 if (num_workers > 1) {
309 const AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
310 pthread_mutex_lock(enc_row_mt->mutex_);
311 const bool exit = enc_row_mt->mb_wiener_mt_exit;
312 pthread_mutex_unlock(enc_row_mt->mutex_);
313 // Stop further processing in case any worker has encountered an error.
314 if (exit) break;
315 }
316 #endif
317 }
318
319 PREDICTION_MODE best_mode = DC_PRED;
320 int best_intra_cost = INT_MAX;
321 const int mi_width = mi_size_wide[bsize];
322 const int mi_height = mi_size_high[bsize];
323 set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
324 mi_row, mi_col);
325 set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width,
326 AOMMIN(mi_row + mi_height, cm->mi_params.mi_rows),
327 AOMMIN(mi_col + mi_width, cm->mi_params.mi_cols));
328 set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize],
329 av1_num_planes(cm));
330 xd->mi[0]->bsize = bsize;
331 xd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
332 // Set above and left mbmi to NULL as they are not available in the
333 // preprocessing stage.
334 // They are used to detemine intra edge filter types in intra prediction.
335 if (xd->up_available) {
336 xd->above_mbmi = NULL;
337 }
338 if (xd->left_available) {
339 xd->left_mbmi = NULL;
340 }
341 uint8_t *mb_buffer =
342 buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE;
343 for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END;
344 ++mode) {
345 // TODO(chengchen): Here we use src instead of reconstructed frame as
346 // the intra predictor to make single and multithread version match.
347 // Ideally we want to use the reconstructed.
348 av1_predict_intra_block(
349 xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
350 block_size, block_size, tx_size, mode, 0, 0, FILTER_INTRA_MODES,
351 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
352 av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
353 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
354 av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
355 int intra_cost = aom_satd(coeff, coeff_count);
356 if (intra_cost < best_intra_cost) {
357 best_intra_cost = intra_cost;
358 best_mode = mode;
359 }
360 }
361
362 av1_predict_intra_block(
363 xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
364 block_size, block_size, tx_size, best_mode, 0, 0, FILTER_INTRA_MODES,
365 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
366 av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
367 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
368 av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
369
370 const struct macroblock_plane *const p = &x->plane[0];
371 uint16_t eob;
372 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
373 QUANT_PARAM quant_param;
374 int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
375 av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param);
376 #if CONFIG_AV1_HIGHBITDEPTH
377 if (is_cur_buf_hbd(xd)) {
378 av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
379 scan_order, &quant_param);
380 } else {
381 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
382 scan_order, &quant_param);
383 }
384 #else
385 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, scan_order,
386 &quant_param);
387 #endif // CONFIG_AV1_HIGHBITDEPTH
388
389 if (cpi->oxcf.enable_rate_guide_deltaq) {
390 const int rate_cost = rate_estimator(qcoeff, eob, tx_size);
391 cpi->prep_rate_estimates[(mi_row / mb_step) * cpi->frame_info.mi_cols +
392 (mi_col / mb_step)] = rate_cost;
393 }
394
395 av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer,
396 dst_buffer_stride, eob, 0);
397 WeberStats *weber_stats =
398 &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols +
399 (mi_col / mb_step)];
400
401 weber_stats->rec_pix_max = 1;
402 weber_stats->rec_variance = 0;
403 weber_stats->src_pix_max = 1;
404 weber_stats->src_variance = 0;
405 weber_stats->distortion = 0;
406
407 int64_t src_mean = 0;
408 int64_t rec_mean = 0;
409 int64_t dist_mean = 0;
410
411 for (int pix_row = 0; pix_row < block_size; ++pix_row) {
412 for (int pix_col = 0; pix_col < block_size; ++pix_col) {
413 int src_pix, rec_pix;
414 #if CONFIG_AV1_HIGHBITDEPTH
415 if (is_cur_buf_hbd(xd)) {
416 uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer);
417 uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer);
418 src_pix = src[pix_row * buf_stride + pix_col];
419 rec_pix = rec[pix_row * dst_buffer_stride + pix_col];
420 } else {
421 src_pix = mb_buffer[pix_row * buf_stride + pix_col];
422 rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
423 }
424 #else
425 src_pix = mb_buffer[pix_row * buf_stride + pix_col];
426 rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
427 #endif
428 src_mean += src_pix;
429 rec_mean += rec_pix;
430 dist_mean += src_pix - rec_pix;
431 weber_stats->src_variance += src_pix * src_pix;
432 weber_stats->rec_variance += rec_pix * rec_pix;
433 weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix);
434 weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix);
435 weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix);
436 }
437 }
438
439 if (cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
440 *sum_rec_distortion += weber_stats->distortion;
441 int est_block_rate = 0;
442 int64_t est_block_dist = 0;
443 model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion,
444 pix_num, &est_block_rate,
445 &est_block_dist);
446 *sum_est_rate += est_block_rate;
447 }
448
449 weber_stats->src_variance -= (src_mean * src_mean) / pix_num;
450 weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num;
451 weber_stats->distortion -= (dist_mean * dist_mean) / pix_num;
452 weber_stats->satd = best_intra_cost;
453
454 qcoeff[0] = 0;
455 int max_scale = 0;
456 for (int idx = 1; idx < coeff_count; ++idx) {
457 const int abs_qcoeff = abs(qcoeff[idx]);
458 max_scale = AOMMAX(max_scale, abs_qcoeff);
459 }
460 weber_stats->max_scale = max_scale;
461
462 if ((mi_col + mb_step) % mt_unit_step == 0 ||
463 (mi_col + mb_step) >= mi_cols) {
464 intra_mt->intra_sync_write_ptr(intra_row_mt_sync, mt_thread_id,
465 mt_unit_col, mt_unit_cols);
466 ++mt_unit_col;
467 }
468 }
469 // Set the pointer to null since mbmi is only allocated inside this function.
470 xd->mi = NULL;
471 }
472
calc_mb_wiener_var(AV1_COMP * const cpi,double * sum_rec_distortion,double * sum_est_rate)473 static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion,
474 double *sum_est_rate) {
475 MACROBLOCK *x = &cpi->td.mb;
476 MACROBLOCKD *xd = &x->e_mbd;
477 const BLOCK_SIZE bsize = cpi->weber_bsize;
478 const int mb_step = mi_size_wide[bsize];
479 DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]);
480 DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]);
481 DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]);
482 DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]);
483 for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
484 av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff,
485 dqcoeff, sum_rec_distortion, sum_est_rate,
486 cpi->td.wiener_tmp_pred_buf);
487 }
488 }
489
estimate_wiener_var_norm(AV1_COMP * const cpi,const BLOCK_SIZE norm_block_size)490 static int64_t estimate_wiener_var_norm(AV1_COMP *const cpi,
491 const BLOCK_SIZE norm_block_size) {
492 const AV1_COMMON *const cm = &cpi->common;
493 int64_t norm_factor = 1;
494 assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_128X128);
495 const int norm_step = mi_size_wide[norm_block_size];
496 double sb_wiener_log = 0;
497 double sb_count = 0;
498 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
499 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) {
500 const int sb_wiener_var =
501 get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
502 const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
503 const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
504 const double scaled_satd = (double)satd / sqrt((double)sse);
505 sb_wiener_log += scaled_satd * log(sb_wiener_var);
506 sb_count += scaled_satd;
507 }
508 }
509 if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count));
510 norm_factor = AOMMAX(1, norm_factor);
511
512 return norm_factor;
513 }
514
automatic_intra_tools_off(AV1_COMP * cpi,const double sum_rec_distortion,const double sum_est_rate)515 static void automatic_intra_tools_off(AV1_COMP *cpi,
516 const double sum_rec_distortion,
517 const double sum_est_rate) {
518 if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return;
519
520 // Thresholds
521 const int high_quality_qindex = 128;
522 const double high_quality_bpp = 2.0;
523 const double high_quality_dist_per_pix = 4.0;
524
525 AV1_COMMON *const cm = &cpi->common;
526 const int qindex = cm->quant_params.base_qindex;
527 const double dist_per_pix =
528 (double)sum_rec_distortion / (cm->width * cm->height);
529 // The estimate bpp is not accurate, an empirical constant 100 is divided.
530 const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100);
531
532 if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp &&
533 dist_per_pix < high_quality_dist_per_pix) {
534 cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0;
535 cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0;
536 cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0;
537 cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0;
538 }
539 }
540
ext_rate_guided_quantization(AV1_COMP * cpi)541 static void ext_rate_guided_quantization(AV1_COMP *cpi) {
542 // Calculation uses 8x8.
543 const int mb_step = mi_size_wide[cpi->weber_bsize];
544 // Accumulate to 16x16, step size is in the unit of mi.
545 const int block_step = 4;
546
547 const char *filename = cpi->oxcf.rate_distribution_info;
548 FILE *pfile = fopen(filename, "r");
549 if (pfile == NULL) {
550 assert(pfile != NULL);
551 return;
552 }
553
554 double ext_rate_sum = 0.0;
555 for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
556 for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
557 float val;
558 const int fields_converted = fscanf(pfile, "%f", &val);
559 if (fields_converted != 1) {
560 assert(fields_converted == 1);
561 fclose(pfile);
562 return;
563 }
564 ext_rate_sum += val;
565 cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
566 (col / mb_step)] = val;
567 }
568 }
569 fclose(pfile);
570
571 int uniform_rate_sum = 0;
572 for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) {
573 for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) {
574 int rate_sum = 0;
575 for (int r = 0; r < block_step; r += mb_step) {
576 for (int c = 0; c < block_step; c += mb_step) {
577 const int mi_row = row + r;
578 const int mi_col = col + c;
579 rate_sum += cpi->prep_rate_estimates[(mi_row / mb_step) *
580 cpi->frame_info.mi_cols +
581 (mi_col / mb_step)];
582 }
583 }
584 uniform_rate_sum += rate_sum;
585 }
586 }
587
588 const double scale = uniform_rate_sum / ext_rate_sum;
589 cpi->ext_rate_scale = scale;
590 }
591
av1_set_mb_wiener_variance(AV1_COMP * cpi)592 void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
593 AV1_COMMON *const cm = &cpi->common;
594 const SequenceHeader *const seq_params = cm->seq_params;
595 if (aom_realloc_frame_buffer(
596 &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
597 seq_params->subsampling_y, seq_params->use_highbitdepth,
598 cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
599 NULL, cpi->alloc_pyramid, 0))
600 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
601 "Failed to allocate frame buffer");
602 av1_alloc_mb_wiener_var_pred_buf(&cpi->common, &cpi->td);
603 cpi->norm_wiener_variance = 0;
604
605 MACROBLOCK *x = &cpi->td.mb;
606 MACROBLOCKD *xd = &x->e_mbd;
607 // xd->mi needs to be setup since it is used in av1_frame_init_quantizer.
608 MB_MODE_INFO mbmi;
609 memset(&mbmi, 0, sizeof(mbmi));
610 MB_MODE_INFO *mbmi_ptr = &mbmi;
611 xd->mi = &mbmi_ptr;
612 cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level;
613 av1_frame_init_quantizer(cpi);
614
615 double sum_rec_distortion = 0.0;
616 double sum_est_rate = 0.0;
617
618 MultiThreadInfo *const mt_info = &cpi->mt_info;
619 const int num_workers =
620 AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
621 AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt;
622 intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read_dummy;
623 intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write_dummy;
624 // Calculate differential contrast for each block for the entire image.
625 // TODO(chengchen): properly accumulate the distortion and rate in
626 // av1_calc_mb_wiener_var_mt(). Until then, call calc_mb_wiener_var() if
627 // auto_intra_tools_off is true.
628 if (num_workers > 1 && !cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
629 intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read;
630 intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write;
631 av1_calc_mb_wiener_var_mt(cpi, num_workers, &sum_rec_distortion,
632 &sum_est_rate);
633 } else {
634 calc_mb_wiener_var(cpi, &sum_rec_distortion, &sum_est_rate);
635 }
636
637 // Determine whether to turn off several intra coding tools.
638 automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate);
639
640 // Read external rate distribution and use it to guide delta quantization
641 if (cpi->oxcf.enable_rate_guide_deltaq) ext_rate_guided_quantization(cpi);
642
643 const BLOCK_SIZE norm_block_size = cm->seq_params->sb_size;
644 cpi->norm_wiener_variance = estimate_wiener_var_norm(cpi, norm_block_size);
645 const int norm_step = mi_size_wide[norm_block_size];
646
647 double sb_wiener_log = 0;
648 double sb_count = 0;
649 for (int its_cnt = 0; its_cnt < 2; ++its_cnt) {
650 sb_wiener_log = 0;
651 sb_count = 0;
652 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
653 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols;
654 mi_col += norm_step) {
655 int sb_wiener_var =
656 get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
657
658 double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
659 double min_max_scale = AOMMAX(
660 1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
661
662 beta = AOMMIN(beta, 4);
663 beta = AOMMAX(beta, 0.25);
664
665 if (beta < 1 / min_max_scale) continue;
666
667 sb_wiener_var = (int)(cpi->norm_wiener_variance / beta);
668
669 int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
670 int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
671 double scaled_satd = (double)satd / sqrt((double)sse);
672 sb_wiener_log += scaled_satd * log(sb_wiener_var);
673 sb_count += scaled_satd;
674 }
675 }
676
677 if (sb_count > 0)
678 cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count));
679 cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance);
680 }
681
682 // Set the pointer to null since mbmi is only allocated inside this function.
683 xd->mi = NULL;
684 aom_free_frame_buffer(&cm->cur_frame->buf);
685 av1_dealloc_mb_wiener_var_pred_buf(&cpi->td);
686 }
687
get_rate_guided_quantizer(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)688 static int get_rate_guided_quantizer(const AV1_COMP *const cpi,
689 BLOCK_SIZE bsize, int mi_row, int mi_col) {
690 // Calculation uses 8x8.
691 const int mb_step = mi_size_wide[cpi->weber_bsize];
692 // Accumulate to 16x16
693 const int block_step = mi_size_wide[BLOCK_16X16];
694 double sb_rate_hific = 0.0;
695 double sb_rate_uniform = 0.0;
696 for (int row = mi_row; row < mi_row + mi_size_wide[bsize];
697 row += block_step) {
698 for (int col = mi_col; col < mi_col + mi_size_high[bsize];
699 col += block_step) {
700 sb_rate_hific +=
701 cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols +
702 (col / mb_step)];
703
704 for (int r = 0; r < block_step; r += mb_step) {
705 for (int c = 0; c < block_step; c += mb_step) {
706 const int this_row = row + r;
707 const int this_col = col + c;
708 sb_rate_uniform +=
709 cpi->prep_rate_estimates[(this_row / mb_step) *
710 cpi->frame_info.mi_cols +
711 (this_col / mb_step)];
712 }
713 }
714 }
715 }
716 sb_rate_hific *= cpi->ext_rate_scale;
717
718 const double weight = 1.0;
719 const double rate_diff =
720 weight * (sb_rate_hific - sb_rate_uniform) / sb_rate_uniform;
721 double scale = pow(2, rate_diff);
722
723 scale = scale * scale;
724 double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
725 scale = 1.0 / AOMMIN(1.0 / scale, min_max_scale);
726
727 const AV1_COMMON *const cm = &cpi->common;
728 const int base_qindex = cm->quant_params.base_qindex;
729 int offset =
730 av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, scale);
731 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
732 const int max_offset = delta_q_info->delta_q_res * 10;
733 offset = AOMMIN(offset, max_offset - 1);
734 offset = AOMMAX(offset, -max_offset + 1);
735 int qindex = cm->quant_params.base_qindex + offset;
736 qindex = AOMMIN(qindex, MAXQ);
737 qindex = AOMMAX(qindex, MINQ);
738 if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
739
740 return qindex;
741 }
742
av1_get_sbq_perceptual_ai(const AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)743 int av1_get_sbq_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
744 int mi_row, int mi_col) {
745 if (cpi->oxcf.enable_rate_guide_deltaq) {
746 return get_rate_guided_quantizer(cpi, bsize, mi_row, mi_col);
747 }
748
749 const AV1_COMMON *const cm = &cpi->common;
750 const int base_qindex = cm->quant_params.base_qindex;
751 int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col);
752 int offset = 0;
753 double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
754 double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
755 beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
756
757 // Cap beta such that the delta q value is not much far away from the base q.
758 beta = AOMMIN(beta, 4);
759 beta = AOMMAX(beta, 0.25);
760 offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
761 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
762 offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1);
763 offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1);
764 int qindex = cm->quant_params.base_qindex + offset;
765 qindex = AOMMIN(qindex, MAXQ);
766 qindex = AOMMAX(qindex, MINQ);
767 if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
768
769 return qindex;
770 }
771
av1_init_mb_ur_var_buffer(AV1_COMP * cpi)772 void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) {
773 AV1_COMMON *cm = &cpi->common;
774
775 if (cpi->mb_delta_q) return;
776
777 CHECK_MEM_ERROR(cm, cpi->mb_delta_q,
778 aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols,
779 sizeof(*cpi->mb_delta_q)));
780 }
781
782 #if CONFIG_TFLITE
model_predict(BLOCK_SIZE block_size,int num_cols,int num_rows,int bit_depth,uint8_t * y_buffer,int y_stride,float * predicts0,float * predicts1)783 static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows,
784 int bit_depth, uint8_t *y_buffer, int y_stride,
785 float *predicts0, float *predicts1) {
786 // Create the model and interpreter options.
787 TfLiteModel *model =
788 TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize);
789 if (model == NULL) return 1;
790
791 TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate();
792 TfLiteInterpreterOptionsSetNumThreads(options, 2);
793 if (options == NULL) {
794 TfLiteModelDelete(model);
795 return 1;
796 }
797
798 // Create the interpreter.
799 TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
800 if (interpreter == NULL) {
801 TfLiteInterpreterOptionsDelete(options);
802 TfLiteModelDelete(model);
803 return 1;
804 }
805
806 // Allocate tensors and populate the input tensor data.
807 TfLiteInterpreterAllocateTensors(interpreter);
808 TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0);
809 if (input_tensor == NULL) {
810 TfLiteInterpreterDelete(interpreter);
811 TfLiteInterpreterOptionsDelete(options);
812 TfLiteModelDelete(model);
813 return 1;
814 }
815
816 size_t input_size = TfLiteTensorByteSize(input_tensor);
817 float *input_data = aom_calloc(input_size, 1);
818 if (input_data == NULL) {
819 TfLiteInterpreterDelete(interpreter);
820 TfLiteInterpreterOptionsDelete(options);
821 TfLiteModelDelete(model);
822 return 1;
823 }
824
825 const int num_mi_w = mi_size_wide[block_size];
826 const int num_mi_h = mi_size_high[block_size];
827 for (int row = 0; row < num_rows; ++row) {
828 for (int col = 0; col < num_cols; ++col) {
829 const int row_offset = (row * num_mi_h) << 2;
830 const int col_offset = (col * num_mi_w) << 2;
831
832 uint8_t *buf = y_buffer + row_offset * y_stride + col_offset;
833 int r = row_offset, pos = 0;
834 const float base = (float)((1 << bit_depth) - 1);
835 while (r < row_offset + (num_mi_h << 2)) {
836 for (int c = 0; c < (num_mi_w << 2); ++c) {
837 input_data[pos++] = bit_depth > 8
838 ? (float)*CONVERT_TO_SHORTPTR(buf + c) / base
839 : (float)*(buf + c) / base;
840 }
841 buf += y_stride;
842 ++r;
843 }
844 TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size);
845
846 // Execute inference.
847 if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) {
848 TfLiteInterpreterDelete(interpreter);
849 TfLiteInterpreterOptionsDelete(options);
850 TfLiteModelDelete(model);
851 return 1;
852 }
853
854 // Extract the output tensor data.
855 const TfLiteTensor *output_tensor =
856 TfLiteInterpreterGetOutputTensor(interpreter, 0);
857 if (output_tensor == NULL) {
858 TfLiteInterpreterDelete(interpreter);
859 TfLiteInterpreterOptionsDelete(options);
860 TfLiteModelDelete(model);
861 return 1;
862 }
863
864 size_t output_size = TfLiteTensorByteSize(output_tensor);
865 float output_data[2];
866
867 TfLiteTensorCopyToBuffer(output_tensor, output_data, output_size);
868 predicts0[row * num_cols + col] = output_data[0];
869 predicts1[row * num_cols + col] = output_data[1];
870 }
871 }
872
873 // Dispose of the model and interpreter objects.
874 TfLiteInterpreterDelete(interpreter);
875 TfLiteInterpreterOptionsDelete(options);
876 TfLiteModelDelete(model);
877 aom_free(input_data);
878 return 0;
879 }
880
av1_set_mb_ur_variance(AV1_COMP * cpi)881 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
882 const AV1_COMMON *cm = &cpi->common;
883 const CommonModeInfoParams *const mi_params = &cm->mi_params;
884 uint8_t *y_buffer = cpi->source->y_buffer;
885 const int y_stride = cpi->source->y_stride;
886 const int block_size = cpi->common.seq_params->sb_size;
887 const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
888
889 const int num_mi_w = mi_size_wide[block_size];
890 const int num_mi_h = mi_size_high[block_size];
891 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
892 const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
893
894 // TODO(sdeng): fit a better model_1; disable it at this time.
895 float *mb_delta_q0, *mb_delta_q1, delta_q_avg0 = 0.0f;
896 CHECK_MEM_ERROR(cm, mb_delta_q0,
897 aom_calloc(num_rows * num_cols, sizeof(float)));
898 CHECK_MEM_ERROR(cm, mb_delta_q1,
899 aom_calloc(num_rows * num_cols, sizeof(float)));
900
901 if (model_predict(block_size, num_cols, num_rows, bit_depth, y_buffer,
902 y_stride, mb_delta_q0, mb_delta_q1)) {
903 aom_internal_error(cm->error, AOM_CODEC_ERROR,
904 "Failed to call TFlite functions.");
905 }
906
907 // Loop through each SB block.
908 for (int row = 0; row < num_rows; ++row) {
909 for (int col = 0; col < num_cols; ++col) {
910 const int index = row * num_cols + col;
911 delta_q_avg0 += mb_delta_q0[index];
912 }
913 }
914
915 delta_q_avg0 /= (float)(num_rows * num_cols);
916
917 float scaling_factor;
918 const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ;
919 if (cq_level < delta_q_avg0) {
920 scaling_factor = cq_level / delta_q_avg0;
921 } else {
922 scaling_factor = 1.0f - (cq_level - delta_q_avg0) / (1.0f - delta_q_avg0);
923 }
924
925 for (int row = 0; row < num_rows; ++row) {
926 for (int col = 0; col < num_cols; ++col) {
927 const int index = row * num_cols + col;
928 cpi->mb_delta_q[index] =
929 RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0f * (float)MAXQ *
930 scaling_factor * (mb_delta_q0[index] - delta_q_avg0));
931 }
932 }
933
934 aom_free(mb_delta_q0);
935 aom_free(mb_delta_q1);
936 }
937 #else // !CONFIG_TFLITE
av1_set_mb_ur_variance(AV1_COMP * cpi)938 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
939 const AV1_COMMON *cm = &cpi->common;
940 const CommonModeInfoParams *const mi_params = &cm->mi_params;
941 const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
942 uint8_t *y_buffer = cpi->source->y_buffer;
943 const int y_stride = cpi->source->y_stride;
944 const int block_size = cpi->common.seq_params->sb_size;
945
946 const int num_mi_w = mi_size_wide[block_size];
947 const int num_mi_h = mi_size_high[block_size];
948 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
949 const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
950
951 int *mb_delta_q[2];
952 CHECK_MEM_ERROR(cm, mb_delta_q[0],
953 aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0])));
954 CHECK_MEM_ERROR(cm, mb_delta_q[1],
955 aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1])));
956
957 // Approximates the model change between current version (Spet 2021) and the
958 // baseline (July 2021).
959 const double model_change[] = { 3.0, 3.0 };
960 // The following parameters are fitted from user labeled data.
961 const double a[] = { -24.50 * 4.0, -17.20 * 4.0 };
962 const double b[] = { 0.004898, 0.003093 };
963 const double c[] = { (29.932 + model_change[0]) * 4.0,
964 (42.100 + model_change[1]) * 4.0 };
965 int delta_q_avg[2] = { 0, 0 };
966 // Loop through each SB block.
967 for (int row = 0; row < num_rows; ++row) {
968 for (int col = 0; col < num_cols; ++col) {
969 double var = 0.0, num_of_var = 0.0;
970 const int index = row * num_cols + col;
971
972 // Loop through each 8x8 block.
973 for (int mi_row = row * num_mi_h;
974 mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h;
975 mi_row += 2) {
976 for (int mi_col = col * num_mi_w;
977 mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w;
978 mi_col += 2) {
979 struct buf_2d buf;
980 const int row_offset_y = mi_row << 2;
981 const int col_offset_y = mi_col << 2;
982
983 buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
984 buf.stride = y_stride;
985
986 unsigned int block_variance;
987 block_variance = av1_get_perpixel_variance_facade(
988 cpi, xd, &buf, BLOCK_8X8, AOM_PLANE_Y);
989
990 block_variance = AOMMAX(block_variance, 1);
991 var += log((double)block_variance);
992 num_of_var += 1.0;
993 }
994 }
995 var = exp(var / num_of_var);
996 mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]);
997 mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]);
998 delta_q_avg[0] += mb_delta_q[0][index];
999 delta_q_avg[1] += mb_delta_q[1][index];
1000 }
1001 }
1002
1003 delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols));
1004 delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols));
1005
1006 int model_idx;
1007 double scaling_factor;
1008 const int cq_level = cpi->oxcf.rc_cfg.cq_level;
1009 if (cq_level < delta_q_avg[0]) {
1010 model_idx = 0;
1011 scaling_factor = (double)cq_level / delta_q_avg[0];
1012 } else if (cq_level < delta_q_avg[1]) {
1013 model_idx = 2;
1014 scaling_factor =
1015 (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]);
1016 } else {
1017 model_idx = 1;
1018 scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]);
1019 }
1020
1021 const double new_delta_q_avg =
1022 delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]);
1023 for (int row = 0; row < num_rows; ++row) {
1024 for (int col = 0; col < num_cols; ++col) {
1025 const int index = row * num_cols + col;
1026 if (model_idx == 2) {
1027 const double delta_q =
1028 mb_delta_q[0][index] +
1029 scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]);
1030 cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength /
1031 100.0 * (delta_q - new_delta_q_avg));
1032 } else {
1033 cpi->mb_delta_q[index] = RINT(
1034 (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor *
1035 (mb_delta_q[model_idx][index] - delta_q_avg[model_idx]));
1036 }
1037 }
1038 }
1039
1040 aom_free(mb_delta_q[0]);
1041 aom_free(mb_delta_q[1]);
1042 }
1043 #endif
1044
av1_get_sbq_user_rating_based(const AV1_COMP * const cpi,int mi_row,int mi_col)1045 int av1_get_sbq_user_rating_based(const AV1_COMP *const cpi, int mi_row,
1046 int mi_col) {
1047 const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
1048 const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
1049 const AV1_COMMON *const cm = &cpi->common;
1050 const int base_qindex = cm->quant_params.base_qindex;
1051 if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex;
1052
1053 const int num_mi_w = mi_size_wide[bsize];
1054 const int num_mi_h = mi_size_high[bsize];
1055 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
1056 const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w);
1057 const int delta_q = cpi->mb_delta_q[index];
1058
1059 int qindex = base_qindex + delta_q;
1060 qindex = AOMMIN(qindex, MAXQ);
1061 qindex = AOMMAX(qindex, MINQ + 1);
1062
1063 return qindex;
1064 }
1065
1066 #if !CONFIG_REALTIME_ONLY
1067
1068 // Variance Boost: a variance adaptive quantization implementation
1069 // SVT-AV1 appendix with an overview and a graphical, step-by-step explanation
1070 // of the implementation
1071 // https://gitlab.com/AOMediaCodec/SVT-AV1/-/blob/master/Docs/Appendix-Variance-Boost.md
av1_get_sbq_variance_boost(const AV1_COMP * cpi,const MACROBLOCK * x)1072 int av1_get_sbq_variance_boost(const AV1_COMP *cpi, const MACROBLOCK *x) {
1073 const AV1_COMMON *cm = &cpi->common;
1074 const int base_qindex = cm->quant_params.base_qindex;
1075 const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth;
1076
1077 // Variance Boost only supports 64x64 SBs.
1078 assert(cm->seq_params->sb_size == BLOCK_64X64);
1079
1080 // Strength is currently hard-coded and optimized for still pictures. In the
1081 // future, we might want to expose this as a parameter that can be fine-tuned
1082 // by the caller.
1083 const int strength = 3;
1084 unsigned int variance = av1_get_variance_boost_block_variance(cpi, x);
1085
1086 // Variance = 0 areas are either completely flat patches or have very fine
1087 // gradients. Boost these blocks as if they have a variance of 1.
1088 if (variance == 0) {
1089 variance = 1;
1090 }
1091
1092 // Compute a boost based on a fast-growing formula.
1093 // High and medium variance SBs essentially get no boost, while lower variance
1094 // SBs get increasingly stronger boosts.
1095 assert(strength >= 1 && strength <= 4);
1096
1097 // Still picture curve, with variance crossover point at 1024.
1098 double qstep_ratio = 0.15 * strength * (-log2((double)variance) + 10.0) + 1.0;
1099 qstep_ratio = fclamp(qstep_ratio, 1.0, VAR_BOOST_MAX_BOOST);
1100
1101 double base_q = av1_convert_qindex_to_q(base_qindex, bit_depth);
1102 double target_q = base_q / qstep_ratio;
1103 int target_qindex = av1_convert_q_to_qindex(target_q, bit_depth);
1104
1105 // Determine the SB's delta_q boost by computing an (unscaled) delta_q from
1106 // the base and target q values, then scale that delta_q according to the
1107 // frame's base qindex.
1108 // The scaling coefficients were chosen empirically to maximize SSIMULACRA 2
1109 // scores, 10th percentile scores, and subjective quality. Boosts become
1110 // smaller (for a given variance) the lower the base qindex.
1111 int boost = (int)round((base_qindex + 544.0) * (base_qindex - target_qindex) /
1112 1279.0);
1113 boost = AOMMIN(VAR_BOOST_MAX_DELTAQ_RANGE, boost);
1114
1115 // Variance Boost was designed to always operate in the lossy domain, so MINQ
1116 // is excluded.
1117 int sb_qindex = AOMMAX(base_qindex - boost, MINQ + 1);
1118
1119 return sb_qindex;
1120 }
1121 #endif
1122