1 /*
2 * Copyright (c) 2021, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "config/aom_config.h"
13
14 #if CONFIG_TFLITE
15 #include "tensorflow/lite/c/c_api.h"
16 #include "av1/encoder/deltaq4_model.c"
17 #endif
18
19 #include "av1/common/common_data.h"
20 #include "av1/common/enums.h"
21 #include "av1/common/idct.h"
22 #include "av1/common/reconinter.h"
23 #include "av1/encoder/allintra_vis.h"
24 #include "av1/encoder/encoder.h"
25 #include "av1/encoder/ethread.h"
26 #include "av1/encoder/hybrid_fwd_txfm.h"
27 #include "av1/encoder/model_rd.h"
28 #include "av1/encoder/rdopt_utils.h"
29
av1_init_mb_wiener_var_buffer(AV1_COMP * cpi)30 void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) {
31 AV1_COMMON *cm = &cpi->common;
32
33 // This block size is also used to determine number of workers in
34 // multi-threading. If it is changed, one needs to change it accordingly in
35 // "compute_num_ai_workers()".
36 cpi->weber_bsize = BLOCK_8X8;
37
38 if (cpi->mb_weber_stats) return;
39
40 CHECK_MEM_ERROR(cm, cpi->mb_weber_stats,
41 aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols,
42 sizeof(*cpi->mb_weber_stats)));
43 }
44
get_satd(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)45 static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
46 int mi_col) {
47 AV1_COMMON *const cm = &cpi->common;
48 const int mi_wide = mi_size_wide[bsize];
49 const int mi_high = mi_size_high[bsize];
50
51 const int mi_step = mi_size_wide[cpi->weber_bsize];
52 int mb_stride = cpi->frame_info.mi_cols;
53 int mb_count = 0;
54 int64_t satd = 0;
55
56 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
57 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
58 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
59 continue;
60
61 satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
62 .satd;
63 ++mb_count;
64 }
65 }
66
67 if (mb_count) satd = (int)(satd / mb_count);
68 satd = AOMMAX(1, satd);
69
70 return (int)satd;
71 }
72
get_sse(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)73 static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
74 int mi_col) {
75 AV1_COMMON *const cm = &cpi->common;
76 const int mi_wide = mi_size_wide[bsize];
77 const int mi_high = mi_size_high[bsize];
78
79 const int mi_step = mi_size_wide[cpi->weber_bsize];
80 int mb_stride = cpi->frame_info.mi_cols;
81 int mb_count = 0;
82 int64_t distortion = 0;
83
84 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
85 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
86 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
87 continue;
88
89 distortion +=
90 cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]
91 .distortion;
92 ++mb_count;
93 }
94 }
95
96 if (mb_count) distortion = (int)(distortion / mb_count);
97 distortion = AOMMAX(1, distortion);
98
99 return (int)distortion;
100 }
101
get_max_scale(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)102 static double get_max_scale(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
103 int mi_col) {
104 AV1_COMMON *const cm = &cpi->common;
105 const int mi_wide = mi_size_wide[bsize];
106 const int mi_high = mi_size_high[bsize];
107 const int mi_step = mi_size_wide[cpi->weber_bsize];
108 int mb_stride = cpi->frame_info.mi_cols;
109 double min_max_scale = 10.0;
110
111 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
112 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
113 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
114 continue;
115 WeberStats *weber_stats =
116 &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
117 if (weber_stats->max_scale < 1.0) continue;
118 if (weber_stats->max_scale < min_max_scale)
119 min_max_scale = weber_stats->max_scale;
120 }
121 }
122 return min_max_scale;
123 }
124
get_window_wiener_var(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)125 static int get_window_wiener_var(AV1_COMP *const cpi, BLOCK_SIZE bsize,
126 int mi_row, int mi_col) {
127 AV1_COMMON *const cm = &cpi->common;
128 const int mi_wide = mi_size_wide[bsize];
129 const int mi_high = mi_size_high[bsize];
130
131 const int mi_step = mi_size_wide[cpi->weber_bsize];
132 int sb_wiener_var = 0;
133 int mb_stride = cpi->frame_info.mi_cols;
134 int mb_count = 0;
135 double base_num = 1;
136 double base_den = 1;
137 double base_reg = 1;
138
139 for (int row = mi_row; row < mi_row + mi_high; row += mi_step) {
140 for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) {
141 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
142 continue;
143
144 WeberStats *weber_stats =
145 &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)];
146
147 base_num += ((double)weber_stats->distortion) *
148 sqrt((double)weber_stats->src_variance) *
149 weber_stats->rec_pix_max;
150
151 base_den += fabs(
152 weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) -
153 weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance));
154
155 base_reg += sqrt((double)weber_stats->distortion) *
156 sqrt((double)weber_stats->src_pix_max) * 0.1;
157 ++mb_count;
158 }
159 }
160
161 sb_wiener_var =
162 (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count);
163 sb_wiener_var = AOMMAX(1, sb_wiener_var);
164
165 return (int)sb_wiener_var;
166 }
167
get_var_perceptual_ai(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)168 static int get_var_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize,
169 int mi_row, int mi_col) {
170 AV1_COMMON *const cm = &cpi->common;
171 const int mi_wide = mi_size_wide[bsize];
172 const int mi_high = mi_size_high[bsize];
173
174 int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col);
175
176 if (mi_row >= (mi_high / 2)) {
177 sb_wiener_var =
178 AOMMIN(sb_wiener_var,
179 get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col));
180 }
181 if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) {
182 sb_wiener_var =
183 AOMMIN(sb_wiener_var,
184 get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col));
185 }
186 if (mi_col >= (mi_wide / 2)) {
187 sb_wiener_var =
188 AOMMIN(sb_wiener_var,
189 get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2));
190 }
191 if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) {
192 sb_wiener_var =
193 AOMMIN(sb_wiener_var,
194 get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2));
195 }
196
197 return sb_wiener_var;
198 }
199
av1_calc_mb_wiener_var_row(AV1_COMP * const cpi,MACROBLOCK * x,MACROBLOCKD * xd,const int mi_row,int16_t * src_diff,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,double * sum_rec_distortion,double * sum_est_rate)200 void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x,
201 MACROBLOCKD *xd, const int mi_row,
202 int16_t *src_diff, tran_low_t *coeff,
203 tran_low_t *qcoeff, tran_low_t *dqcoeff,
204 double *sum_rec_distortion,
205 double *sum_est_rate) {
206 AV1_COMMON *const cm = &cpi->common;
207 uint8_t *buffer = cpi->source->y_buffer;
208 int buf_stride = cpi->source->y_stride;
209 MB_MODE_INFO mbmi;
210 memset(&mbmi, 0, sizeof(mbmi));
211 MB_MODE_INFO *mbmi_ptr = &mbmi;
212 xd->mi = &mbmi_ptr;
213 const BLOCK_SIZE bsize = cpi->weber_bsize;
214 const TX_SIZE tx_size = max_txsize_lookup[bsize];
215 const int block_size = tx_size_wide[tx_size];
216 const int coeff_count = block_size * block_size;
217 const int mb_step = mi_size_wide[bsize];
218 const BitDepthInfo bd_info = get_bit_depth_info(xd);
219 const AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
220 // We allocate cpi->tile_data (of size 1) when we call this function in
221 // multithreaded mode, so cpi->tile_data may be a null pointer when we call
222 // this function in single-threaded mode.
223 AV1EncRowMultiThreadSync *const row_mt_sync =
224 cpi->tile_data ? &cpi->tile_data[0].row_mt_sync : NULL;
225 const int mi_cols = cm->mi_params.mi_cols;
226 const int mt_thread_id = mi_row / mb_step;
227 // TODO(chengchen): test different unit step size
228 const int mt_unit_step = mi_size_wide[BLOCK_64X64];
229 const int mt_unit_cols = (mi_cols + (mt_unit_step >> 1)) / mt_unit_step;
230 int mt_unit_col = 0;
231
232 for (int mi_col = 0; mi_col < mi_cols; mi_col += mb_step) {
233 if (mi_col % mt_unit_step == 0) {
234 enc_row_mt->sync_read_ptr(row_mt_sync, mt_thread_id, mt_unit_col);
235 }
236
237 PREDICTION_MODE best_mode = DC_PRED;
238 int best_intra_cost = INT_MAX;
239 const int mi_width = mi_size_wide[bsize];
240 const int mi_height = mi_size_high[bsize];
241 set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
242 mi_row, mi_col);
243 set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width,
244 cm->mi_params.mi_rows, cm->mi_params.mi_cols);
245 set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize],
246 av1_num_planes(cm));
247 xd->mi[0]->bsize = bsize;
248 xd->mi[0]->motion_mode = SIMPLE_TRANSLATION;
249 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col,
250 0, av1_num_planes(cm));
251 int dst_buffer_stride = xd->plane[0].dst.stride;
252 uint8_t *dst_buffer = xd->plane[0].dst.buf;
253 uint8_t *mb_buffer =
254 buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE;
255 for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END;
256 ++mode) {
257 av1_predict_intra_block(xd, cm->seq_params->sb_size,
258 cm->seq_params->enable_intra_edge_filter,
259 block_size, block_size, tx_size, mode, 0, 0,
260 FILTER_INTRA_MODES, dst_buffer, dst_buffer_stride,
261 dst_buffer, dst_buffer_stride, 0, 0, 0);
262 av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
263 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
264 av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
265 int intra_cost = aom_satd(coeff, coeff_count);
266 if (intra_cost < best_intra_cost) {
267 best_intra_cost = intra_cost;
268 best_mode = mode;
269 }
270 }
271
272 av1_predict_intra_block(
273 xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter,
274 block_size, block_size, tx_size, best_mode, 0, 0, FILTER_INTRA_MODES,
275 dst_buffer, dst_buffer_stride, dst_buffer, dst_buffer_stride, 0, 0, 0);
276 av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size,
277 mb_buffer, buf_stride, dst_buffer, dst_buffer_stride);
278 av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff);
279
280 const struct macroblock_plane *const p = &x->plane[0];
281 uint16_t eob;
282 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
283 QUANT_PARAM quant_param;
284 int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
285 av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param);
286 #if CONFIG_AV1_HIGHBITDEPTH
287 if (is_cur_buf_hbd(xd)) {
288 av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
289 scan_order, &quant_param);
290 } else {
291 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob,
292 scan_order, &quant_param);
293 }
294 #else
295 av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, scan_order,
296 &quant_param);
297 #endif // CONFIG_AV1_HIGHBITDEPTH
298 av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer,
299 dst_buffer_stride, eob, 0);
300 WeberStats *weber_stats =
301 &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols +
302 (mi_col / mb_step)];
303
304 weber_stats->rec_pix_max = 1;
305 weber_stats->rec_variance = 0;
306 weber_stats->src_pix_max = 1;
307 weber_stats->src_variance = 0;
308 weber_stats->distortion = 0;
309
310 int64_t src_mean = 0;
311 int64_t rec_mean = 0;
312 int64_t dist_mean = 0;
313
314 for (int pix_row = 0; pix_row < block_size; ++pix_row) {
315 for (int pix_col = 0; pix_col < block_size; ++pix_col) {
316 int src_pix, rec_pix;
317 #if CONFIG_AV1_HIGHBITDEPTH
318 if (is_cur_buf_hbd(xd)) {
319 uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer);
320 uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer);
321 src_pix = src[pix_row * buf_stride + pix_col];
322 rec_pix = rec[pix_row * dst_buffer_stride + pix_col];
323 } else {
324 src_pix = mb_buffer[pix_row * buf_stride + pix_col];
325 rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
326 }
327 #else
328 src_pix = mb_buffer[pix_row * buf_stride + pix_col];
329 rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col];
330 #endif
331 src_mean += src_pix;
332 rec_mean += rec_pix;
333 dist_mean += src_pix - rec_pix;
334 weber_stats->src_variance += src_pix * src_pix;
335 weber_stats->rec_variance += rec_pix * rec_pix;
336 weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix);
337 weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix);
338 weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix);
339 }
340 }
341
342 if (cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) {
343 *sum_rec_distortion += weber_stats->distortion;
344 int est_block_rate = 0;
345 int64_t est_block_dist = 0;
346 model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion,
347 pix_num, &est_block_rate,
348 &est_block_dist);
349 *sum_est_rate += est_block_rate;
350 }
351
352 weber_stats->src_variance -= (src_mean * src_mean) / pix_num;
353 weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num;
354 weber_stats->distortion -= (dist_mean * dist_mean) / pix_num;
355 weber_stats->satd = best_intra_cost;
356
357 qcoeff[0] = 0;
358 int max_scale = 0;
359 for (int idx = 1; idx < coeff_count; ++idx) {
360 const int abs_qcoeff = abs(qcoeff[idx]);
361 max_scale = AOMMAX(max_scale, abs_qcoeff);
362 }
363 weber_stats->max_scale = max_scale;
364
365 if ((mi_col + mb_step) % mt_unit_step == 0 ||
366 (mi_col + mb_step) >= mi_cols) {
367 enc_row_mt->sync_write_ptr(row_mt_sync, mt_thread_id, mt_unit_col,
368 mt_unit_cols);
369 ++mt_unit_col;
370 }
371 }
372 // Set the pointer to null since mbmi is only allocated inside this function.
373 xd->mi = NULL;
374 }
375
calc_mb_wiener_var(AV1_COMP * const cpi,double * sum_rec_distortion,double * sum_est_rate)376 static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion,
377 double *sum_est_rate) {
378 MACROBLOCK *x = &cpi->td.mb;
379 MACROBLOCKD *xd = &x->e_mbd;
380 const BLOCK_SIZE bsize = cpi->weber_bsize;
381 const int mb_step = mi_size_wide[bsize];
382 DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]);
383 DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]);
384 DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]);
385 DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]);
386 for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) {
387 av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff,
388 dqcoeff, sum_rec_distortion, sum_est_rate);
389 }
390 }
391
estimate_wiener_var_norm(AV1_COMP * const cpi,const BLOCK_SIZE norm_block_size)392 static int64_t estimate_wiener_var_norm(AV1_COMP *const cpi,
393 const BLOCK_SIZE norm_block_size) {
394 const AV1_COMMON *const cm = &cpi->common;
395 int64_t norm_factor = 1;
396 assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_128X128);
397 const int norm_step = mi_size_wide[norm_block_size];
398 double sb_wiener_log = 0;
399 double sb_count = 0;
400 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
401 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) {
402 const int sb_wiener_var =
403 get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
404 const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
405 const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
406 const double scaled_satd = (double)satd / sqrt((double)sse);
407 sb_wiener_log += scaled_satd * log(sb_wiener_var);
408 sb_count += scaled_satd;
409 }
410 }
411 if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count));
412 norm_factor = AOMMAX(1, norm_factor);
413
414 return norm_factor;
415 }
416
automatic_intra_tools_off(AV1_COMP * cpi,const double sum_rec_distortion,const double sum_est_rate)417 static void automatic_intra_tools_off(AV1_COMP *cpi,
418 const double sum_rec_distortion,
419 const double sum_est_rate) {
420 if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return;
421
422 // Thresholds
423 const int high_quality_qindex = 128;
424 const double high_quality_bpp = 2.0;
425 const double high_quality_dist_per_pix = 4.0;
426
427 AV1_COMMON *const cm = &cpi->common;
428 const int qindex = cm->quant_params.base_qindex;
429 const double dist_per_pix =
430 (double)sum_rec_distortion / (cm->width * cm->height);
431 // The estimate bpp is not accurate, an empirical constant 100 is divided.
432 const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100);
433
434 if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp &&
435 dist_per_pix < high_quality_dist_per_pix) {
436 cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0;
437 cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0;
438 cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0;
439 cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0;
440 }
441 }
442
av1_set_mb_wiener_variance(AV1_COMP * cpi)443 void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
444 AV1_COMMON *const cm = &cpi->common;
445 const SequenceHeader *const seq_params = cm->seq_params;
446 if (aom_realloc_frame_buffer(
447 &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
448 seq_params->subsampling_y, seq_params->use_highbitdepth,
449 cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
450 NULL, cpi->oxcf.tool_cfg.enable_global_motion, 0))
451 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
452 "Failed to allocate frame buffer");
453 cpi->norm_wiener_variance = 0;
454
455 MACROBLOCK *x = &cpi->td.mb;
456 MACROBLOCKD *xd = &x->e_mbd;
457 // xd->mi needs to be setup since it is used in av1_frame_init_quantizer.
458 MB_MODE_INFO mbmi;
459 memset(&mbmi, 0, sizeof(mbmi));
460 MB_MODE_INFO *mbmi_ptr = &mbmi;
461 xd->mi = &mbmi_ptr;
462 cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level;
463 av1_frame_init_quantizer(cpi);
464
465 double sum_rec_distortion = 0.0;
466 double sum_est_rate = 0.0;
467
468 MultiThreadInfo *const mt_info = &cpi->mt_info;
469 const int num_workers =
470 AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers);
471 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
472 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
473 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
474 // Calculate differential contrast for each block for the entire image.
475 // TODO(aomedia:3376): Remove " && 0" when there are no data races in
476 // av1_calc_mb_wiener_var_mt(). See also bug aomedia:3380.
477 if (num_workers > 1 && 0) {
478 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
479 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
480 av1_calc_mb_wiener_var_mt(cpi, num_workers, &sum_rec_distortion,
481 &sum_est_rate);
482 } else {
483 calc_mb_wiener_var(cpi, &sum_rec_distortion, &sum_est_rate);
484 }
485
486 // Determine whether to turn off several intra coding tools.
487 automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate);
488
489 const BLOCK_SIZE norm_block_size = cm->seq_params->sb_size;
490 cpi->norm_wiener_variance = estimate_wiener_var_norm(cpi, norm_block_size);
491 const int norm_step = mi_size_wide[norm_block_size];
492
493 double sb_wiener_log = 0;
494 double sb_count = 0;
495 for (int its_cnt = 0; its_cnt < 2; ++its_cnt) {
496 sb_wiener_log = 0;
497 sb_count = 0;
498 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) {
499 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols;
500 mi_col += norm_step) {
501 int sb_wiener_var =
502 get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col);
503
504 double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
505 double min_max_scale = AOMMAX(
506 1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col));
507
508 beta = AOMMIN(beta, 4);
509 beta = AOMMAX(beta, 0.25);
510
511 if (beta < 1 / min_max_scale) continue;
512
513 sb_wiener_var = (int)(cpi->norm_wiener_variance / beta);
514
515 int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col);
516 int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col);
517 double scaled_satd = (double)satd / sqrt((double)sse);
518 sb_wiener_log += scaled_satd * log(sb_wiener_var);
519 sb_count += scaled_satd;
520 }
521 }
522
523 if (sb_count > 0)
524 cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count));
525 cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance);
526 }
527
528 // Set the pointer to null since mbmi is only allocated inside this function.
529 xd->mi = NULL;
530 aom_free_frame_buffer(&cm->cur_frame->buf);
531 }
532
av1_get_sbq_perceptual_ai(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)533 int av1_get_sbq_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row,
534 int mi_col) {
535 AV1_COMMON *const cm = &cpi->common;
536 const int base_qindex = cm->quant_params.base_qindex;
537 int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col);
538 int offset = 0;
539 double beta = (double)cpi->norm_wiener_variance / sb_wiener_var;
540 double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col));
541 beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale);
542
543 // Cap beta such that the delta q value is not much far away from the base q.
544 beta = AOMMIN(beta, 4);
545 beta = AOMMAX(beta, 0.25);
546 offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
547 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
548 offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1);
549 offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1);
550 int qindex = cm->quant_params.base_qindex + offset;
551 qindex = AOMMIN(qindex, MAXQ);
552 qindex = AOMMAX(qindex, MINQ);
553 if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1);
554
555 return qindex;
556 }
557
av1_init_mb_ur_var_buffer(AV1_COMP * cpi)558 void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) {
559 AV1_COMMON *cm = &cpi->common;
560
561 if (cpi->mb_delta_q) return;
562
563 CHECK_MEM_ERROR(cm, cpi->mb_delta_q,
564 aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols,
565 sizeof(*cpi->mb_delta_q)));
566 }
567
568 #if CONFIG_TFLITE
model_predict(BLOCK_SIZE block_size,int num_cols,int num_rows,int bit_depth,uint8_t * y_buffer,int y_stride,float * predicts0,float * predicts1)569 static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows,
570 int bit_depth, uint8_t *y_buffer, int y_stride,
571 float *predicts0, float *predicts1) {
572 // Create the model and interpreter options.
573 TfLiteModel *model =
574 TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize);
575 if (model == NULL) return 1;
576
577 TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate();
578 TfLiteInterpreterOptionsSetNumThreads(options, 2);
579 if (options == NULL) {
580 TfLiteModelDelete(model);
581 return 1;
582 }
583
584 // Create the interpreter.
585 TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
586 if (interpreter == NULL) {
587 TfLiteInterpreterOptionsDelete(options);
588 TfLiteModelDelete(model);
589 return 1;
590 }
591
592 // Allocate tensors and populate the input tensor data.
593 TfLiteInterpreterAllocateTensors(interpreter);
594 TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0);
595 if (input_tensor == NULL) {
596 TfLiteInterpreterDelete(interpreter);
597 TfLiteInterpreterOptionsDelete(options);
598 TfLiteModelDelete(model);
599 return 1;
600 }
601
602 size_t input_size = TfLiteTensorByteSize(input_tensor);
603 float *input_data = aom_calloc(input_size, 1);
604 if (input_data == NULL) {
605 TfLiteInterpreterDelete(interpreter);
606 TfLiteInterpreterOptionsDelete(options);
607 TfLiteModelDelete(model);
608 return 1;
609 }
610
611 const int num_mi_w = mi_size_wide[block_size];
612 const int num_mi_h = mi_size_high[block_size];
613 for (int row = 0; row < num_rows; ++row) {
614 for (int col = 0; col < num_cols; ++col) {
615 const int row_offset = (row * num_mi_h) << 2;
616 const int col_offset = (col * num_mi_w) << 2;
617
618 uint8_t *buf = y_buffer + row_offset * y_stride + col_offset;
619 int r = row_offset, pos = 0;
620 const float base = (float)((1 << bit_depth) - 1);
621 while (r < row_offset + (num_mi_h << 2)) {
622 for (int c = 0; c < (num_mi_w << 2); ++c) {
623 input_data[pos++] = bit_depth > 8
624 ? (float)*CONVERT_TO_SHORTPTR(buf + c) / base
625 : (float)*(buf + c) / base;
626 }
627 buf += y_stride;
628 ++r;
629 }
630 TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size);
631
632 // Execute inference.
633 if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) {
634 TfLiteInterpreterDelete(interpreter);
635 TfLiteInterpreterOptionsDelete(options);
636 TfLiteModelDelete(model);
637 return 1;
638 }
639
640 // Extract the output tensor data.
641 const TfLiteTensor *output_tensor =
642 TfLiteInterpreterGetOutputTensor(interpreter, 0);
643 if (output_tensor == NULL) {
644 TfLiteInterpreterDelete(interpreter);
645 TfLiteInterpreterOptionsDelete(options);
646 TfLiteModelDelete(model);
647 return 1;
648 }
649
650 size_t output_size = TfLiteTensorByteSize(output_tensor);
651 float output_data[2];
652
653 TfLiteTensorCopyToBuffer(output_tensor, output_data, output_size);
654 predicts0[row * num_cols + col] = output_data[0];
655 predicts1[row * num_cols + col] = output_data[1];
656 }
657 }
658
659 // Dispose of the model and interpreter objects.
660 TfLiteInterpreterDelete(interpreter);
661 TfLiteInterpreterOptionsDelete(options);
662 TfLiteModelDelete(model);
663 aom_free(input_data);
664 return 0;
665 }
666
av1_set_mb_ur_variance(AV1_COMP * cpi)667 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
668 const AV1_COMMON *cm = &cpi->common;
669 const CommonModeInfoParams *const mi_params = &cm->mi_params;
670 uint8_t *y_buffer = cpi->source->y_buffer;
671 const int y_stride = cpi->source->y_stride;
672 const int block_size = cpi->common.seq_params->sb_size;
673 const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
674
675 const int num_mi_w = mi_size_wide[block_size];
676 const int num_mi_h = mi_size_high[block_size];
677 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
678 const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
679
680 // TODO(sdeng): fit a better model_1; disable it at this time.
681 float *mb_delta_q0, *mb_delta_q1, delta_q_avg0 = 0.0f;
682 CHECK_MEM_ERROR(cm, mb_delta_q0,
683 aom_calloc(num_rows * num_cols, sizeof(float)));
684 CHECK_MEM_ERROR(cm, mb_delta_q1,
685 aom_calloc(num_rows * num_cols, sizeof(float)));
686
687 if (model_predict(block_size, num_cols, num_rows, bit_depth, y_buffer,
688 y_stride, mb_delta_q0, mb_delta_q1)) {
689 aom_internal_error(cm->error, AOM_CODEC_ERROR,
690 "Failed to call TFlite functions.");
691 }
692
693 // Loop through each SB block.
694 for (int row = 0; row < num_rows; ++row) {
695 for (int col = 0; col < num_cols; ++col) {
696 const int index = row * num_cols + col;
697 delta_q_avg0 += mb_delta_q0[index];
698 }
699 }
700
701 delta_q_avg0 /= (float)(num_rows * num_cols);
702
703 float scaling_factor;
704 const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ;
705 if (cq_level < delta_q_avg0) {
706 scaling_factor = cq_level / delta_q_avg0;
707 } else {
708 scaling_factor = 1.0f - (cq_level - delta_q_avg0) / (1.0f - delta_q_avg0);
709 }
710
711 for (int row = 0; row < num_rows; ++row) {
712 for (int col = 0; col < num_cols; ++col) {
713 const int index = row * num_cols + col;
714 cpi->mb_delta_q[index] =
715 RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0f * (float)MAXQ *
716 scaling_factor * (mb_delta_q0[index] - delta_q_avg0));
717 }
718 }
719
720 aom_free(mb_delta_q0);
721 aom_free(mb_delta_q1);
722 }
723 #else // !CONFIG_TFLITE
av1_set_mb_ur_variance(AV1_COMP * cpi)724 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
725 const AV1_COMMON *cm = &cpi->common;
726 const CommonModeInfoParams *const mi_params = &cm->mi_params;
727 const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
728 uint8_t *y_buffer = cpi->source->y_buffer;
729 const int y_stride = cpi->source->y_stride;
730 const int block_size = cpi->common.seq_params->sb_size;
731
732 const int num_mi_w = mi_size_wide[block_size];
733 const int num_mi_h = mi_size_high[block_size];
734 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
735 const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
736
737 int *mb_delta_q[2];
738 CHECK_MEM_ERROR(cm, mb_delta_q[0],
739 aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0])));
740 CHECK_MEM_ERROR(cm, mb_delta_q[1],
741 aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1])));
742
743 // Approximates the model change between current version (Spet 2021) and the
744 // baseline (July 2021).
745 const double model_change[] = { 3.0, 3.0 };
746 // The following parameters are fitted from user labeled data.
747 const double a[] = { -24.50 * 4.0, -17.20 * 4.0 };
748 const double b[] = { 0.004898, 0.003093 };
749 const double c[] = { (29.932 + model_change[0]) * 4.0,
750 (42.100 + model_change[1]) * 4.0 };
751 int delta_q_avg[2] = { 0, 0 };
752 // Loop through each SB block.
753 for (int row = 0; row < num_rows; ++row) {
754 for (int col = 0; col < num_cols; ++col) {
755 double var = 0.0, num_of_var = 0.0;
756 const int index = row * num_cols + col;
757
758 // Loop through each 8x8 block.
759 for (int mi_row = row * num_mi_h;
760 mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h;
761 mi_row += 2) {
762 for (int mi_col = col * num_mi_w;
763 mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w;
764 mi_col += 2) {
765 struct buf_2d buf;
766 const int row_offset_y = mi_row << 2;
767 const int col_offset_y = mi_col << 2;
768
769 buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
770 buf.stride = y_stride;
771
772 unsigned int block_variance;
773 block_variance = av1_get_perpixel_variance_facade(
774 cpi, xd, &buf, BLOCK_8X8, AOM_PLANE_Y);
775
776 block_variance = AOMMAX(block_variance, 1);
777 var += log((double)block_variance);
778 num_of_var += 1.0;
779 }
780 }
781 var = exp(var / num_of_var);
782 mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]);
783 mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]);
784 delta_q_avg[0] += mb_delta_q[0][index];
785 delta_q_avg[1] += mb_delta_q[1][index];
786 }
787 }
788
789 delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols));
790 delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols));
791
792 int model_idx;
793 double scaling_factor;
794 const int cq_level = cpi->oxcf.rc_cfg.cq_level;
795 if (cq_level < delta_q_avg[0]) {
796 model_idx = 0;
797 scaling_factor = (double)cq_level / delta_q_avg[0];
798 } else if (cq_level < delta_q_avg[1]) {
799 model_idx = 2;
800 scaling_factor =
801 (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]);
802 } else {
803 model_idx = 1;
804 scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]);
805 }
806
807 const double new_delta_q_avg =
808 delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]);
809 for (int row = 0; row < num_rows; ++row) {
810 for (int col = 0; col < num_cols; ++col) {
811 const int index = row * num_cols + col;
812 if (model_idx == 2) {
813 const double delta_q =
814 mb_delta_q[0][index] +
815 scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]);
816 cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength /
817 100.0 * (delta_q - new_delta_q_avg));
818 } else {
819 cpi->mb_delta_q[index] = RINT(
820 (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor *
821 (mb_delta_q[model_idx][index] - delta_q_avg[model_idx]));
822 }
823 }
824 }
825
826 aom_free(mb_delta_q[0]);
827 aom_free(mb_delta_q[1]);
828 }
829 #endif
830
av1_get_sbq_user_rating_based(AV1_COMP * const cpi,int mi_row,int mi_col)831 int av1_get_sbq_user_rating_based(AV1_COMP *const cpi, int mi_row, int mi_col) {
832 const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
833 const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
834 AV1_COMMON *const cm = &cpi->common;
835 const int base_qindex = cm->quant_params.base_qindex;
836 if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex;
837
838 const int num_mi_w = mi_size_wide[bsize];
839 const int num_mi_h = mi_size_high[bsize];
840 const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
841 const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w);
842 const int delta_q = cpi->mb_delta_q[index];
843
844 int qindex = base_qindex + delta_q;
845 qindex = AOMMIN(qindex, MAXQ);
846 qindex = AOMMAX(qindex, MINQ + 1);
847
848 return qindex;
849 }
850