1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26
27 #if CONFIG_MISMATCH_DEBUG
28 #include "aom_util/debug_util.h"
29 #endif // CONFIG_MISMATCH_DEBUG
30
31 #include "av1/common/cfl.h"
32 #include "av1/common/common.h"
33 #include "av1/common/entropy.h"
34 #include "av1/common/entropymode.h"
35 #include "av1/common/idct.h"
36 #include "av1/common/mv.h"
37 #include "av1/common/mvref_common.h"
38 #include "av1/common/pred_common.h"
39 #include "av1/common/quant_common.h"
40 #include "av1/common/reconintra.h"
41 #include "av1/common/reconinter.h"
42 #include "av1/common/seg_common.h"
43 #include "av1/common/tile_common.h"
44 #include "av1/common/warped_motion.h"
45
46 #include "av1/encoder/allintra_vis.h"
47 #include "av1/encoder/aq_complexity.h"
48 #include "av1/encoder/aq_cyclicrefresh.h"
49 #include "av1/encoder/aq_variance.h"
50 #include "av1/encoder/global_motion_facade.h"
51 #include "av1/encoder/encodeframe.h"
52 #include "av1/encoder/encodeframe_utils.h"
53 #include "av1/encoder/encodemb.h"
54 #include "av1/encoder/encodemv.h"
55 #include "av1/encoder/encodetxb.h"
56 #include "av1/encoder/ethread.h"
57 #include "av1/encoder/extend.h"
58 #include "av1/encoder/intra_mode_search_utils.h"
59 #include "av1/encoder/ml.h"
60 #include "av1/encoder/motion_search_facade.h"
61 #include "av1/encoder/partition_strategy.h"
62 #if !CONFIG_REALTIME_ONLY
63 #include "av1/encoder/partition_model_weights.h"
64 #endif
65 #include "av1/encoder/partition_search.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/segmentation.h"
70 #include "av1/encoder/tokenize.h"
71 #include "av1/encoder/tpl_model.h"
72 #include "av1/encoder/var_based_part.h"
73
74 #if CONFIG_TUNE_VMAF
75 #include "av1/encoder/tune_vmaf.h"
76 #endif
77
78 /*!\cond */
79 // This is used as a reference when computing the source variance for the
80 // purposes of activity masking.
81 // Eventually this should be replaced by custom no-reference routines,
82 // which will be faster.
83 const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
84 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
85 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128
93 };
94
95 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
96 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
97 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
98 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104 128, 128, 128, 128, 128, 128, 128, 128
105 };
106
107 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
108 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
109 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
110 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
124 };
125
126 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
127 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
128 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
129 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145 128 * 16, 128 * 16
146 };
147 /*!\endcond */
148
av1_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)149 unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
150 const struct buf_2d *ref,
151 BLOCK_SIZE bs) {
152 unsigned int sse;
153 const unsigned int var =
154 cpi->ppi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
155 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
156 }
157
av1_high_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)158 unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
159 const struct buf_2d *ref,
160 BLOCK_SIZE bs, int bd) {
161 unsigned int var, sse;
162 assert(bd == 8 || bd == 10 || bd == 12);
163 const int off_index = (bd - 8) >> 1;
164 const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
165 AV1_HIGH_VAR_OFFS_10,
166 AV1_HIGH_VAR_OFFS_12 };
167 var = cpi->ppi->fn_ptr[bs].vf(ref->buf, ref->stride,
168 CONVERT_TO_BYTEPTR(high_var_offs[off_index]), 0,
169 &sse);
170 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
171 }
172
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)173 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
174 int mi_row, int mi_col, const int num_planes,
175 BLOCK_SIZE bsize) {
176 // Set current frame pointer.
177 x->e_mbd.cur_buf = src;
178
179 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
180 // the static analysis warnings.
181 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
182 const int is_uv = i > 0;
183 setup_pred_plane(
184 &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
185 src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
186 x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
187 }
188 }
189
190 #if !CONFIG_REALTIME_ONLY
191 /*!\brief Assigns different quantization parameters to each super
192 * block based on its TPL weight.
193 *
194 * \ingroup tpl_modelling
195 *
196 * \param[in] cpi Top level encoder instance structure
197 * \param[in,out] td Thread data structure
198 * \param[in,out] x Macro block level data for this block.
199 * \param[in] tile_info Tile infromation / identification
200 * \param[in] mi_row Block row (in "MI_SIZE" units) index
201 * \param[in] mi_col Block column (in "MI_SIZE" units) index
202 * \param[out] num_planes Number of image planes (e.g. Y,U,V)
203 *
204 * \return No return value but updates macroblock and thread data
205 * related to the q / q delta to be used.
206 */
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)207 static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
208 MACROBLOCK *const x,
209 const TileInfo *const tile_info,
210 int mi_row, int mi_col, int num_planes) {
211 AV1_COMMON *const cm = &cpi->common;
212 const CommonModeInfoParams *const mi_params = &cm->mi_params;
213 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
214 assert(delta_q_info->delta_q_present_flag);
215
216 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
217 // Delta-q modulation based on variance
218 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
219
220 const int delta_q_res = delta_q_info->delta_q_res;
221 int current_qindex = cm->quant_params.base_qindex;
222 if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
223 if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
224 const int block_wavelet_energy_level =
225 av1_block_wavelet_energy_level(cpi, x, sb_size);
226 x->sb_energy_level = block_wavelet_energy_level;
227 current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
228 cpi, block_wavelet_energy_level);
229 } else {
230 const int block_var_level = av1_log_block_var(cpi, x, sb_size);
231 x->sb_energy_level = block_var_level;
232 current_qindex =
233 av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
234 }
235 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
236 cpi->oxcf.algo_cfg.enable_tpl_model) {
237 // Setup deltaq based on tpl stats
238 current_qindex =
239 av1_get_q_for_deltaq_objective(cpi, sb_size, mi_row, mi_col);
240 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
241 current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
242 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
243 current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
244 }
245
246 MACROBLOCKD *const xd = &x->e_mbd;
247 current_qindex = av1_adjust_q_from_delta_q_res(
248 delta_q_res, xd->current_base_qindex, current_qindex);
249
250 x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
251 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
252 xd->mi[0]->current_qindex = current_qindex;
253 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
254
255 // keep track of any non-zero delta-q used
256 td->deltaq_used |= (x->delta_qindex != 0);
257
258 if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
259 const int delta_lf_res = delta_q_info->delta_lf_res;
260 const int lfmask = ~(delta_lf_res - 1);
261 const int delta_lf_from_base =
262 ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
263 const int8_t delta_lf =
264 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
265 const int frame_lf_count =
266 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
267 const int mib_size = cm->seq_params->mib_size;
268
269 // pre-set the delta lf for loop filter. Note that this value is set
270 // before mi is assigned for each block in current superblock
271 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
272 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
273 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
274 mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
275 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
276 mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
277 }
278 }
279 }
280 }
281 }
282
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)283 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
284 int mi_col) {
285 const AV1_COMMON *cm = &cpi->common;
286 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
287 const CommonModeInfoParams *const mi_params = &cm->mi_params;
288 MACROBLOCK *x = &td->mb;
289 const int frame_idx = cpi->gf_frame_index;
290 TplParams *const tpl_data = &cpi->ppi->tpl_data;
291 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
292
293 av1_zero(x->tpl_keep_ref_frame);
294
295 if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
296 if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
297 if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
298
299 const int is_overlay =
300 cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
301 if (is_overlay) {
302 memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
303 return;
304 }
305
306 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
307 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
308 const int tpl_stride = tpl_frame->stride;
309 int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
310 const int step = 1 << block_mis_log2;
311 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
312
313 const int mi_row_end =
314 AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
315 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
316 const int mi_col_sr =
317 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
318 const int mi_col_end_sr =
319 AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
320 cm->superres_scale_denominator),
321 mi_cols_sr);
322 const int row_step = step;
323 const int col_step_sr =
324 coded_to_superres_mi(step, cm->superres_scale_denominator);
325 for (int row = mi_row; row < mi_row_end; row += row_step) {
326 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
327 const TplDepStats *this_stats =
328 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
329 int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
330 // Find the winner ref frame idx for the current block
331 int64_t best_inter_cost = this_stats->pred_error[0];
332 int best_rf_idx = 0;
333 for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
334 if ((this_stats->pred_error[idx] < best_inter_cost) &&
335 (this_stats->pred_error[idx] != 0)) {
336 best_inter_cost = this_stats->pred_error[idx];
337 best_rf_idx = idx;
338 }
339 }
340 // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
341 // LAST_FRAME.
342 tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
343 this_stats->pred_error[LAST_FRAME - 1];
344
345 for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
346 inter_cost[rf_idx] += tpl_pred_error[rf_idx];
347 }
348 }
349
350 int rank_index[INTER_REFS_PER_FRAME - 1];
351 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
352 rank_index[idx] = idx + 1;
353 for (int i = idx; i > 0; --i) {
354 if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
355 const int tmp = rank_index[i - 1];
356 rank_index[i - 1] = rank_index[i];
357 rank_index[i] = tmp;
358 }
359 }
360 }
361
362 x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
363 x->tpl_keep_ref_frame[LAST_FRAME] = 1;
364
365 int cutoff_ref = 0;
366 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
367 x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
368 if (idx > 2) {
369 if (!cutoff_ref) {
370 // If the predictive coding gains are smaller than the previous more
371 // relevant frame over certain amount, discard this frame and all the
372 // frames afterwards.
373 if (llabs(inter_cost[rank_index[idx]]) <
374 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
375 inter_cost[rank_index[idx]] == 0)
376 cutoff_ref = 1;
377 }
378
379 if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
380 }
381 }
382 }
383
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)384 static AOM_INLINE void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
385 int mi_row, int mi_col) {
386 const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
387 const int orig_rdmult = cpi->rd.RDMULT;
388
389 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
390 cpi->gf_frame_index < cpi->ppi->gf_group.size));
391 const int gf_group_index = cpi->gf_frame_index;
392 if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
393 cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
394 cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
395 const int dr =
396 av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
397 x->rdmult = dr;
398 }
399 }
400 #endif // !CONFIG_REALTIME_ONLY
401
402 #if CONFIG_RT_ML_PARTITIONING
403 // Get a prediction(stored in x->est_pred) for the whole superblock.
get_estimated_pred(AV1_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)404 static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
405 MACROBLOCK *x, int mi_row, int mi_col) {
406 AV1_COMMON *const cm = &cpi->common;
407 const int is_key_frame = frame_is_intra_only(cm);
408 MACROBLOCKD *xd = &x->e_mbd;
409
410 // TODO(kyslov) Extend to 128x128
411 assert(cm->seq_params->sb_size == BLOCK_64X64);
412
413 av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
414
415 if (!is_key_frame) {
416 MB_MODE_INFO *mi = xd->mi[0];
417 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
418
419 assert(yv12 != NULL);
420
421 av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
422 get_ref_scale_factors(cm, LAST_FRAME), 1);
423 mi->ref_frame[0] = LAST_FRAME;
424 mi->ref_frame[1] = NONE;
425 mi->bsize = BLOCK_64X64;
426 mi->mv[0].as_int = 0;
427 mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
428
429 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
430
431 xd->plane[0].dst.buf = x->est_pred;
432 xd->plane[0].dst.stride = 64;
433 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
434 } else {
435 #if CONFIG_AV1_HIGHBITDEPTH
436 switch (xd->bd) {
437 case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
438 case 10:
439 memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
440 break;
441 case 12:
442 memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
443 break;
444 }
445 #else
446 memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
447 #endif // CONFIG_VP9_HIGHBITDEPTH
448 }
449 }
450 #endif // CONFIG_RT_ML_PARTITIONING
451
452 #define AVG_CDF_WEIGHT_LEFT 3
453 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
454
455 /*!\brief Encode a superblock (minimal RD search involved)
456 *
457 * \ingroup partition_search
458 * Encodes the superblock by a pre-determined partition pattern, only minor
459 * rd-based searches are allowed to adjust the initial pattern. It is only used
460 * by realtime encoding.
461 */
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)462 static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
463 TileDataEnc *tile_data, TokenExtra **tp,
464 const int mi_row, const int mi_col,
465 const int seg_skip) {
466 AV1_COMMON *const cm = &cpi->common;
467 MACROBLOCK *const x = &td->mb;
468 const SPEED_FEATURES *const sf = &cpi->sf;
469 const TileInfo *const tile_info = &tile_data->tile_info;
470 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
471 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
472 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
473
474 // Grade the temporal variation of the sb, the grade will be used to decide
475 // fast mode search strategy for coding blocks
476 if (sf->rt_sf.source_metrics_sb_nonrd &&
477 cpi->svc.number_spatial_layers <= 1 &&
478 cm->current_frame.frame_type != KEY_FRAME) {
479 int offset = cpi->source->y_stride * (mi_row << 2) + (mi_col << 2);
480 av1_source_content_sb(cpi, x, offset);
481 }
482 #if CONFIG_RT_ML_PARTITIONING
483 if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
484 PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
485 RD_STATS dummy_rdc;
486 get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
487 av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
488 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
489 av1_free_pc_tree_recursive(pc_root, av1_num_planes(cm), 0, 0);
490 return;
491 }
492 #endif
493 // Set the partition
494 if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
495 // set a fixed-size partition
496 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
497 const BLOCK_SIZE bsize =
498 seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
499 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
500 } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
501 // set a variance-based partition
502 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
503 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
504 }
505 assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
506 sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
507 set_cb_offsets(td->mb.cb_offset, 0, 0);
508
509 // Adjust and encode the superblock
510 PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
511
512 // Initialize the flag to skip cdef to 1.
513 if (sf->rt_sf.skip_cdef_sb) {
514 // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
515 // "blocks".
516 const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
517 for (int r = 0; r < block64_in_sb; ++r) {
518 for (int c = 0; c < block64_in_sb; ++c) {
519 const int idx_in_sb =
520 r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
521 if (mi[idx_in_sb]) mi[idx_in_sb]->skip_cdef_curr_sb = 1;
522 }
523 }
524 }
525
526 av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
527 pc_root);
528
529 if (sf->rt_sf.skip_cdef_sb) {
530 // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
531 // "blocks".
532 const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
533 const int skip = mi[0]->skip_cdef_curr_sb;
534 for (int r = 0; r < block64_in_sb; ++r) {
535 for (int c = 0; c < block64_in_sb; ++c) {
536 const int idx_in_sb =
537 r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
538 if (mi[idx_in_sb]) mi[idx_in_sb]->skip_cdef_curr_sb = skip;
539 }
540 }
541 }
542 av1_free_pc_tree_recursive(pc_root, av1_num_planes(cm), 0, 0);
543 }
544
545 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)546 static INLINE void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
547 const TileDataEnc *tile_data,
548 SIMPLE_MOTION_DATA_TREE *sms_root,
549 RD_STATS *rd_cost, int mi_row, int mi_col,
550 int gather_tpl_data) {
551 const AV1_COMMON *cm = &cpi->common;
552 const TileInfo *tile_info = &tile_data->tile_info;
553 MACROBLOCK *x = &td->mb;
554
555 const SPEED_FEATURES *sf = &cpi->sf;
556 const int use_simple_motion_search =
557 (sf->part_sf.simple_motion_search_split ||
558 sf->part_sf.simple_motion_search_prune_rect ||
559 sf->part_sf.simple_motion_search_early_term_none ||
560 sf->part_sf.ml_early_term_after_part_split_level) &&
561 !frame_is_intra_only(cm);
562 if (use_simple_motion_search) {
563 av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
564 mi_row, mi_col);
565 }
566
567 #if !CONFIG_REALTIME_ONLY
568 if (has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
569 cpi->oxcf.gf_cfg.lag_in_frames == 0) {
570 (void)tile_info;
571 (void)mi_row;
572 (void)mi_col;
573 (void)gather_tpl_data;
574 } else {
575 init_ref_frame_space(cpi, td, mi_row, mi_col);
576 x->sb_energy_level = 0;
577 x->part_search_info.cnn_output_valid = 0;
578 if (gather_tpl_data) {
579 if (cm->delta_q_info.delta_q_present_flag) {
580 const int num_planes = av1_num_planes(cm);
581 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
582 setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
583 av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
584 }
585 if (cpi->oxcf.algo_cfg.enable_tpl_model) {
586 adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
587 }
588 }
589 }
590 #else
591 (void)tile_info;
592 (void)mi_row;
593 (void)mi_col;
594 (void)gather_tpl_data;
595 #endif
596
597 // Reset hash state for transform/mode rd hash information
598 reset_hash_records(&x->txfm_search_info, cpi->sf.tx_sf.use_inter_txb_hash);
599 av1_zero(x->picked_ref_frames_mask);
600 av1_invalid_rd_stats(rd_cost);
601 }
602
603 /*!\brief Encode a superblock (RD-search-based)
604 *
605 * \ingroup partition_search
606 * Conducts partition search for a superblock, based on rate-distortion costs,
607 * from scratch or adjusting from a pre-calculated partition pattern.
608 */
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)609 static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
610 TileDataEnc *tile_data, TokenExtra **tp,
611 const int mi_row, const int mi_col,
612 const int seg_skip) {
613 AV1_COMMON *const cm = &cpi->common;
614 MACROBLOCK *const x = &td->mb;
615 const SPEED_FEATURES *const sf = &cpi->sf;
616 const TileInfo *const tile_info = &tile_data->tile_info;
617 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
618 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
619 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
620 const int num_planes = av1_num_planes(cm);
621 int dummy_rate;
622 int64_t dummy_dist;
623 RD_STATS dummy_rdc;
624 SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
625
626 #if CONFIG_REALTIME_ONLY
627 (void)seg_skip;
628 #endif // CONFIG_REALTIME_ONLY
629
630 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
631 1);
632
633 // Encode the superblock
634 if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
635 #if CONFIG_COLLECT_COMPONENT_TIMING
636 start_timing(cpi, rd_use_partition_time);
637 #endif
638 // partition search starting from a variance-based partition
639 av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col,
640 sb_size);
641 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
642 PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
643 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
644 &dummy_rate, &dummy_dist, 1, pc_root);
645 av1_free_pc_tree_recursive(pc_root, num_planes, 0, 0);
646 #if CONFIG_COLLECT_COMPONENT_TIMING
647 end_timing(cpi, rd_use_partition_time);
648 #endif
649 }
650 #if !CONFIG_REALTIME_ONLY
651 else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
652 // partition search by adjusting a fixed-size partition
653 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
654 const BLOCK_SIZE bsize =
655 seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
656 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
657 PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
658 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
659 &dummy_rate, &dummy_dist, 1, pc_root);
660 av1_free_pc_tree_recursive(pc_root, num_planes, 0, 0);
661 } else {
662 // The most exhaustive recursive partition search
663 SuperBlockEnc *sb_enc = &x->sb_enc;
664 // No stats for overlay frames. Exclude key frame.
665 av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
666
667 // Reset the tree for simple motion search data
668 av1_reset_simple_motion_tree_partition(sms_root, sb_size);
669
670 #if CONFIG_COLLECT_COMPONENT_TIMING
671 start_timing(cpi, rd_pick_partition_time);
672 #endif
673
674 // Estimate the maximum square partition block size, which will be used
675 // as the starting block size for partitioning the sb
676 set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
677
678 // The superblock can be searched only once, or twice consecutively for
679 // better quality. Note that the meaning of passes here is different from
680 // the general concept of 1-pass/2-pass encoders.
681 const int num_passes =
682 cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
683
684 if (num_passes == 1) {
685 #if CONFIG_PARTITION_SEARCH_ORDER
686 if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
687 av1_reset_part_sf(&cpi->sf.part_sf);
688 RD_STATS this_rdc;
689 av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
690 mi_col, sb_size, &this_rdc);
691 } else {
692 PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
693 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
694 &dummy_rdc, dummy_rdc, pc_root, sms_root, NULL,
695 SB_SINGLE_PASS, NULL);
696 }
697 #else
698 PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
699 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
700 &dummy_rdc, dummy_rdc, pc_root, sms_root, NULL,
701 SB_SINGLE_PASS, NULL);
702 #endif // CONFIG_PARTITION_SEARCH_ORDER
703 } else {
704 // First pass
705 SB_FIRST_PASS_STATS sb_fp_stats;
706 av1_backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
707 PC_TREE *const pc_root_p0 = av1_alloc_pc_tree_node(sb_size);
708 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
709 &dummy_rdc, dummy_rdc, pc_root_p0, sms_root, NULL,
710 SB_DRY_PASS, NULL);
711
712 // Second pass
713 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
714 mi_col, 0);
715 av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
716 av1_reset_simple_motion_tree_partition(sms_root, sb_size);
717
718 av1_restore_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
719
720 PC_TREE *const pc_root_p1 = av1_alloc_pc_tree_node(sb_size);
721 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
722 &dummy_rdc, dummy_rdc, pc_root_p1, sms_root, NULL,
723 SB_WET_PASS, NULL);
724 }
725 // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
726 sb_enc->tpl_data_count = 0;
727 #if CONFIG_COLLECT_COMPONENT_TIMING
728 end_timing(cpi, rd_pick_partition_time);
729 #endif
730 }
731 #endif // !CONFIG_REALTIME_ONLY
732
733 // Update the inter rd model
734 // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
735 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
736 cm->tiles.cols == 1 && cm->tiles.rows == 1) {
737 av1_inter_mode_data_fit(tile_data, x->rdmult);
738 }
739 }
740
is_rtc_mode(const CostUpdateFreq * cost_upd_freq,MODE mode)741 static AOM_INLINE int is_rtc_mode(const CostUpdateFreq *cost_upd_freq,
742 MODE mode) {
743 return ((mode == REALTIME) && cost_upd_freq->coeff >= 2 &&
744 cost_upd_freq->mode >= 2 && cost_upd_freq->mv >= 2 &&
745 cost_upd_freq->dv >= 2);
746 }
747
748 /*!\brief Encode a superblock row by breaking it into superblocks
749 *
750 * \ingroup partition_search
751 * \callgraph
752 * \callergraph
753 * Do partition and mode search for an sb row: one row of superblocks filling up
754 * the width of the current tile.
755 */
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TokenExtra ** tp)756 static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
757 TileDataEnc *tile_data, int mi_row,
758 TokenExtra **tp) {
759 AV1_COMMON *const cm = &cpi->common;
760 const TileInfo *const tile_info = &tile_data->tile_info;
761 MultiThreadInfo *const mt_info = &cpi->mt_info;
762 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
763 AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
764 bool row_mt_enabled = mt_info->row_mt_enabled;
765 MACROBLOCK *const x = &td->mb;
766 MACROBLOCKD *const xd = &x->e_mbd;
767 const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_data->tile_info);
768 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
769 const int mib_size = cm->seq_params->mib_size;
770 const int mib_size_log2 = cm->seq_params->mib_size_log2;
771 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
772 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
773 const CostUpdateFreq *const cost_upd_freq = &cpi->oxcf.cost_upd_freq;
774 const int rtc_mode = is_rtc_mode(cost_upd_freq, cpi->oxcf.mode);
775
776 #if CONFIG_COLLECT_COMPONENT_TIMING
777 start_timing(cpi, encode_sb_row_time);
778 #endif
779
780 // Initialize the left context for the new SB row
781 av1_zero_left_context(xd);
782
783 // Reset delta for quantizer and loof filters at the beginning of every tile
784 if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
785 if (cm->delta_q_info.delta_q_present_flag)
786 xd->current_base_qindex = cm->quant_params.base_qindex;
787 if (cm->delta_q_info.delta_lf_present_flag) {
788 av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
789 }
790 }
791
792 reset_thresh_freq_fact(x);
793
794 // Code each SB in the row
795 for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
796 mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
797 // In realtime mode and when frequency of cost updates is off/tile, wait for
798 // the top superblock to finish encoding. Otherwise, wait for the top-right
799 // superblock to finish encoding.
800 (*(enc_row_mt->sync_read_ptr))(row_mt_sync, sb_row,
801 sb_col_in_tile - rtc_mode);
802 const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
803 if (update_cdf && (tile_info->mi_row_start != mi_row)) {
804 if ((tile_info->mi_col_start == mi_col)) {
805 // restore frame context at the 1st column sb
806 memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
807 } else {
808 // update context
809 int wt_left = AVG_CDF_WEIGHT_LEFT;
810 int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
811 if (tile_info->mi_col_end > (mi_col + mib_size))
812 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
813 wt_left, wt_tr);
814 else
815 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
816 wt_left, wt_tr);
817 }
818 }
819
820 // Update the rate cost tables for some symbols
821 av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
822
823 // Reset color coding related parameters
824 x->color_sensitivity_sb[0] = 0;
825 x->color_sensitivity_sb[1] = 0;
826 x->color_sensitivity[0] = 0;
827 x->color_sensitivity[1] = 0;
828 x->content_state_sb.source_sad = kMedSad;
829 x->content_state_sb.lighting_change = 0;
830 x->content_state_sb.low_sumdiff = 0;
831
832 xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
833 x->source_variance = UINT_MAX;
834 td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
835
836 // Get segment id and skip flag
837 const struct segmentation *const seg = &cm->seg;
838 int seg_skip = 0;
839 if (seg->enabled) {
840 const uint8_t *const map =
841 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
842 const int segment_id =
843 map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
844 : 0;
845 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
846 }
847
848 // Produce the gradient data at superblock level, when intra mode pruning
849 // based on hog is enabled.
850 if (cpi->sf.intra_sf.intra_pruning_with_hog ||
851 cpi->sf.intra_sf.chroma_intra_pruning_with_hog)
852 produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
853
854 // encode the superblock
855 if (use_nonrd_mode) {
856 encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
857 } else {
858 encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
859 }
860
861 // Update the top-right context in row_mt coding
862 if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
863 if (sb_cols_in_tile == 1)
864 memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
865 else if (sb_col_in_tile >= 1)
866 memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
867 sizeof(*xd->tile_ctx));
868 }
869 (*(enc_row_mt->sync_write_ptr))(row_mt_sync, sb_row, sb_col_in_tile,
870 sb_cols_in_tile);
871 }
872 #if CONFIG_COLLECT_COMPONENT_TIMING
873 end_timing(cpi, encode_sb_row_time);
874 #endif
875 }
876
init_encode_frame_mb_context(AV1_COMP * cpi)877 static AOM_INLINE void init_encode_frame_mb_context(AV1_COMP *cpi) {
878 AV1_COMMON *const cm = &cpi->common;
879 const int num_planes = av1_num_planes(cm);
880 MACROBLOCK *const x = &cpi->td.mb;
881 MACROBLOCKD *const xd = &x->e_mbd;
882
883 // Copy data over into macro block data structures.
884 av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
885 cm->seq_params->sb_size);
886
887 av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
888 cm->seq_params->subsampling_y, num_planes);
889 }
890
av1_alloc_tile_data(AV1_COMP * cpi)891 void av1_alloc_tile_data(AV1_COMP *cpi) {
892 AV1_COMMON *const cm = &cpi->common;
893 const int tile_cols = cm->tiles.cols;
894 const int tile_rows = cm->tiles.rows;
895
896 if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
897 CHECK_MEM_ERROR(
898 cm, cpi->tile_data,
899 aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
900
901 cpi->allocated_tiles = tile_cols * tile_rows;
902 }
903
av1_init_tile_data(AV1_COMP * cpi)904 void av1_init_tile_data(AV1_COMP *cpi) {
905 AV1_COMMON *const cm = &cpi->common;
906 const int num_planes = av1_num_planes(cm);
907 const int tile_cols = cm->tiles.cols;
908 const int tile_rows = cm->tiles.rows;
909 int tile_col, tile_row;
910 TokenInfo *const token_info = &cpi->token_info;
911 TokenExtra *pre_tok = token_info->tile_tok[0][0];
912 TokenList *tplist = token_info->tplist[0][0];
913 unsigned int tile_tok = 0;
914 int tplist_count = 0;
915 const CostUpdateFreq *const cost_upd_freq = &cpi->oxcf.cost_upd_freq;
916 const int rtc_mode = is_rtc_mode(cost_upd_freq, cpi->oxcf.mode);
917
918 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
919 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
920 TileDataEnc *const tile_data =
921 &cpi->tile_data[tile_row * tile_cols + tile_col];
922 TileInfo *const tile_info = &tile_data->tile_info;
923 av1_tile_init(tile_info, cm, tile_row, tile_col);
924 tile_data->firstpass_top_mv = kZeroMv;
925 tile_data->abs_sum_level = 0;
926
927 if (pre_tok != NULL && tplist != NULL) {
928 token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
929 pre_tok = token_info->tile_tok[tile_row][tile_col];
930 tile_tok = allocated_tokens(
931 *tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
932 num_planes);
933 token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
934 tplist = token_info->tplist[tile_row][tile_col];
935 tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
936 }
937 tile_data->allow_update_cdf = !cm->tiles.large_scale;
938 tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
939 !cm->features.disable_cdf_update &&
940 !rtc_mode;
941 tile_data->tctx = *cm->fc;
942 }
943 }
944 }
945
946 /*!\brief Encode a superblock row
947 *
948 * \ingroup partition_search
949 */
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)950 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
951 int tile_col, int mi_row) {
952 AV1_COMMON *const cm = &cpi->common;
953 const int num_planes = av1_num_planes(cm);
954 const int tile_cols = cm->tiles.cols;
955 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
956 const TileInfo *const tile_info = &this_tile->tile_info;
957 TokenExtra *tok = NULL;
958 TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
959 const int sb_row_in_tile =
960 (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
961 const int tile_mb_cols =
962 (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
963 const int num_mb_rows_in_sb =
964 ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
965
966 get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
967 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
968 assert(tplist != NULL);
969 tplist[sb_row_in_tile].start = tok;
970
971 encode_sb_row(cpi, td, this_tile, mi_row, &tok);
972
973 tplist[sb_row_in_tile].count =
974 (unsigned int)(tok - tplist[sb_row_in_tile].start);
975
976 assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
977 get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
978 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
979 num_planes));
980
981 (void)tile_mb_cols;
982 (void)num_mb_rows_in_sb;
983 }
984
985 /*!\brief Encode a tile
986 *
987 * \ingroup partition_search
988 */
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)989 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
990 int tile_col) {
991 AV1_COMMON *const cm = &cpi->common;
992 TileDataEnc *const this_tile =
993 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
994 const TileInfo *const tile_info = &this_tile->tile_info;
995
996 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
997
998 av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
999 tile_info->mi_col_end, tile_row);
1000 av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1001 &td->mb.e_mbd);
1002
1003 if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1004 cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1005
1006 if (td->mb.txfm_search_info.txb_rd_records != NULL) {
1007 av1_crc32c_calculator_init(
1008 &td->mb.txfm_search_info.txb_rd_records->mb_rd_record.crc_calculator);
1009 }
1010
1011 for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1012 mi_row += cm->seq_params->mib_size) {
1013 av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1014 }
1015 this_tile->abs_sum_level = td->abs_sum_level;
1016 }
1017
1018 /*!\brief Break one frame into tiles and encode the tiles
1019 *
1020 * \ingroup partition_search
1021 *
1022 * \param[in] cpi Top-level encoder structure
1023 */
encode_tiles(AV1_COMP * cpi)1024 static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
1025 AV1_COMMON *const cm = &cpi->common;
1026 const int tile_cols = cm->tiles.cols;
1027 const int tile_rows = cm->tiles.rows;
1028 int tile_col, tile_row;
1029
1030 MACROBLOCK *const mb = &cpi->td.mb;
1031 assert(IMPLIES(cpi->tile_data == NULL,
1032 cpi->allocated_tiles < tile_cols * tile_rows));
1033 if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1034
1035 av1_init_tile_data(cpi);
1036 av1_alloc_mb_data(cm, mb, cpi->sf.rt_sf.use_nonrd_pick_mode);
1037
1038 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1039 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1040 TileDataEnc *const this_tile =
1041 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1042 cpi->td.intrabc_used = 0;
1043 cpi->td.deltaq_used = 0;
1044 cpi->td.abs_sum_level = 0;
1045 cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1046 cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1047 // Reset cyclic refresh counters.
1048 av1_init_cyclic_refresh_counters(&cpi->td.mb);
1049
1050 av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1051 // Accumulate cyclic refresh params.
1052 if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
1053 !frame_is_intra_only(&cpi->common))
1054 av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
1055 &cpi->td.mb);
1056 cpi->intrabc_used |= cpi->td.intrabc_used;
1057 cpi->deltaq_used |= cpi->td.deltaq_used;
1058 }
1059 }
1060
1061 av1_dealloc_mb_data(cm, mb);
1062 }
1063
1064 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(const AV1_COMMON * const cm,RefFrameDistanceInfo * const ref_frame_dist_info,const int ref_frame_flags)1065 static AOM_INLINE void set_rel_frame_dist(
1066 const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1067 const int ref_frame_flags) {
1068 MV_REFERENCE_FRAME ref_frame;
1069 int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1070 ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1071 ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1072 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1073 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1074 if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1075 int dist = av1_encoder_get_relative_dist(
1076 cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1077 cm->current_frame.display_order_hint);
1078 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1079 // Get the nearest ref_frame in the past
1080 if (abs(dist) < min_past_dist && dist < 0) {
1081 ref_frame_dist_info->nearest_past_ref = ref_frame;
1082 min_past_dist = abs(dist);
1083 }
1084 // Get the nearest ref_frame in the future
1085 if (dist < min_future_dist && dist > 0) {
1086 ref_frame_dist_info->nearest_future_ref = ref_frame;
1087 min_future_dist = dist;
1088 }
1089 }
1090 }
1091 }
1092
refs_are_one_sided(const AV1_COMMON * cm)1093 static INLINE int refs_are_one_sided(const AV1_COMMON *cm) {
1094 assert(!frame_is_intra_only(cm));
1095
1096 int one_sided_refs = 1;
1097 const int cur_display_order_hint = cm->current_frame.display_order_hint;
1098 for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1099 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1100 if (buf == NULL) continue;
1101 if (av1_encoder_get_relative_dist(buf->display_order_hint,
1102 cur_display_order_hint) > 0) {
1103 one_sided_refs = 0; // bwd reference
1104 break;
1105 }
1106 }
1107 return one_sided_refs;
1108 }
1109
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])1110 static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1111 int ref_order_hint[2]) {
1112 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1113 ref_order_hint[0] = ref_order_hint[1] = 0;
1114 if (!skip_mode_info->skip_mode_allowed) return;
1115
1116 const RefCntBuffer *const buf_0 =
1117 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1118 const RefCntBuffer *const buf_1 =
1119 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1120 assert(buf_0 != NULL && buf_1 != NULL);
1121
1122 ref_order_hint[0] = buf_0->order_hint;
1123 ref_order_hint[1] = buf_1->order_hint;
1124 }
1125
check_skip_mode_enabled(AV1_COMP * const cpi)1126 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1127 AV1_COMMON *const cm = &cpi->common;
1128
1129 av1_setup_skip_mode_allowed(cm);
1130 if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1131
1132 // Turn off skip mode if the temporal distances of the reference pair to the
1133 // current frame are different by more than 1 frame.
1134 const int cur_offset = (int)cm->current_frame.order_hint;
1135 int ref_offset[2];
1136 get_skip_mode_ref_offsets(cm, ref_offset);
1137 const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1138 cur_offset, ref_offset[0]);
1139 const int cur_to_ref1 = abs(get_relative_dist(
1140 &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1141 if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1142
1143 // High Latency: Turn off skip mode if all refs are fwd.
1144 if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1145
1146 static const int flag_list[REF_FRAMES] = { 0,
1147 AOM_LAST_FLAG,
1148 AOM_LAST2_FLAG,
1149 AOM_LAST3_FLAG,
1150 AOM_GOLD_FLAG,
1151 AOM_BWD_FLAG,
1152 AOM_ALT2_FLAG,
1153 AOM_ALT_FLAG };
1154 const int ref_frame[2] = {
1155 cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1156 cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1157 };
1158 if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) ||
1159 !(cpi->ref_frame_flags & flag_list[ref_frame[1]]))
1160 return 0;
1161
1162 return 1;
1163 }
1164
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)1165 static AOM_INLINE void set_default_interp_skip_flags(
1166 const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1167 const int num_planes = av1_num_planes(cm);
1168 interp_search_flags->default_interp_skip_flags =
1169 (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1170 : INTERP_SKIP_LUMA_SKIP_CHROMA;
1171 }
1172
setup_prune_ref_frame_mask(AV1_COMP * cpi)1173 static AOM_INLINE void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1174 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1175 cpi->sf.inter_sf.disable_onesided_comp) &&
1176 cpi->all_one_sided_refs) {
1177 // Disable all compound references
1178 cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1179 } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1180 cpi->sf.inter_sf.selective_ref_frame >= 2) {
1181 AV1_COMMON *const cm = &cpi->common;
1182 const int cur_frame_display_order_hint =
1183 cm->current_frame.display_order_hint;
1184 unsigned int *ref_display_order_hint =
1185 cm->cur_frame->ref_display_order_hint;
1186 const int arf2_dist = av1_encoder_get_relative_dist(
1187 ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1188 cur_frame_display_order_hint);
1189 const int bwd_dist = av1_encoder_get_relative_dist(
1190 ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1191 cur_frame_display_order_hint);
1192
1193 for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1194 MV_REFERENCE_FRAME rf[2];
1195 av1_set_ref_frame(rf, ref_idx);
1196 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1197 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1198 continue;
1199 }
1200
1201 if (!cpi->all_one_sided_refs) {
1202 int ref_dist[2];
1203 for (int i = 0; i < 2; ++i) {
1204 ref_dist[i] = av1_encoder_get_relative_dist(
1205 ref_display_order_hint[rf[i] - LAST_FRAME],
1206 cur_frame_display_order_hint);
1207 }
1208
1209 // One-sided compound is used only when all reference frames are
1210 // one-sided.
1211 if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1212 cpi->prune_ref_frame_mask |= 1 << ref_idx;
1213 }
1214 }
1215
1216 if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1217 (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1218 (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1219 // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1220 if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1221 // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1222 // reference to the current frame than ALTREF2_FRAME
1223 cpi->prune_ref_frame_mask |= 1 << ref_idx;
1224 }
1225 }
1226 }
1227 }
1228 }
1229
1230 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction
1231 * for a single frame
1232 *
1233 * \ingroup high_level_algo
1234 */
encode_frame_internal(AV1_COMP * cpi)1235 static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
1236 ThreadData *const td = &cpi->td;
1237 MACROBLOCK *const x = &td->mb;
1238 AV1_COMMON *const cm = &cpi->common;
1239 CommonModeInfoParams *const mi_params = &cm->mi_params;
1240 FeatureFlags *const features = &cm->features;
1241 MACROBLOCKD *const xd = &x->e_mbd;
1242 RD_COUNTS *const rdc = &cpi->td.rd_counts;
1243 FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
1244 IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
1245 MultiThreadInfo *const mt_info = &cpi->mt_info;
1246 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1247 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1248 const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
1249 int i;
1250
1251 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
1252 mi_params->setup_mi(mi_params);
1253 }
1254
1255 set_mi_offsets(mi_params, xd, 0, 0);
1256
1257 av1_zero(*td->counts);
1258 av1_zero(rdc->comp_pred_diff);
1259 av1_zero(rdc->tx_type_used);
1260 av1_zero(rdc->obmc_used);
1261 av1_zero(rdc->warped_used);
1262
1263 // Reset the flag.
1264 cpi->intrabc_used = 0;
1265 // Need to disable intrabc when superres is selected
1266 if (av1_superres_scaled(cm)) {
1267 features->allow_intrabc = 0;
1268 }
1269
1270 features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
1271
1272 if (features->allow_warped_motion &&
1273 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1274 const FRAME_UPDATE_TYPE update_type =
1275 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1276 if (frame_probs->warped_probs[update_type] <
1277 cpi->sf.inter_sf.prune_warped_prob_thresh)
1278 features->allow_warped_motion = 0;
1279 }
1280
1281 int hash_table_created = 0;
1282 if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
1283 !cpi->sf.rt_sf.use_nonrd_pick_mode) {
1284 // TODO(any): move this outside of the recoding loop to avoid recalculating
1285 // the hash table.
1286 // add to hash table
1287 const int pic_width = cpi->source->y_crop_width;
1288 const int pic_height = cpi->source->y_crop_height;
1289 uint32_t *block_hash_values[2][2];
1290 int8_t *is_block_same[2][3];
1291 int k, j;
1292
1293 for (k = 0; k < 2; k++) {
1294 for (j = 0; j < 2; j++) {
1295 CHECK_MEM_ERROR(cm, block_hash_values[k][j],
1296 aom_malloc(sizeof(uint32_t) * pic_width * pic_height));
1297 }
1298
1299 for (j = 0; j < 3; j++) {
1300 CHECK_MEM_ERROR(cm, is_block_same[k][j],
1301 aom_malloc(sizeof(int8_t) * pic_width * pic_height));
1302 }
1303 }
1304
1305 av1_hash_table_init(intrabc_hash_info);
1306 av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table);
1307 hash_table_created = 1;
1308 av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
1309 block_hash_values[0], is_block_same[0]);
1310 // Hash data generated for screen contents is used for intraBC ME
1311 const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
1312 const int max_sb_size =
1313 (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
1314 int src_idx = 0;
1315 for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
1316 const int dst_idx = !src_idx;
1317 av1_generate_block_hash_value(
1318 intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
1319 block_hash_values[dst_idx], is_block_same[src_idx],
1320 is_block_same[dst_idx]);
1321 if (size >= min_alloc_size) {
1322 av1_add_to_hash_map_by_row_with_precal_data(
1323 &intrabc_hash_info->intrabc_hash_table, block_hash_values[dst_idx],
1324 is_block_same[dst_idx][2], pic_width, pic_height, size);
1325 }
1326 }
1327
1328 for (k = 0; k < 2; k++) {
1329 for (j = 0; j < 2; j++) {
1330 aom_free(block_hash_values[k][j]);
1331 }
1332
1333 for (j = 0; j < 3; j++) {
1334 aom_free(is_block_same[k][j]);
1335 }
1336 }
1337 }
1338
1339 const CommonQuantParams *quant_params = &cm->quant_params;
1340 for (i = 0; i < MAX_SEGMENTS; ++i) {
1341 const int qindex =
1342 cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
1343 : quant_params->base_qindex;
1344 xd->lossless[i] =
1345 qindex == 0 && quant_params->y_dc_delta_q == 0 &&
1346 quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
1347 quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
1348 if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
1349 xd->qindex[i] = qindex;
1350 if (xd->lossless[i]) {
1351 cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
1352 } else {
1353 cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
1354 }
1355 }
1356 features->coded_lossless = is_coded_lossless(cm, xd);
1357 features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
1358
1359 // Fix delta q resolution for the moment
1360 cm->delta_q_info.delta_q_res = 0;
1361 if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ) {
1362 if (deltaq_mode == DELTA_Q_OBJECTIVE)
1363 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
1364 else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
1365 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1366 else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
1367 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1368 else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
1369 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1370
1371 // Set delta_q_present_flag before it is used for the first time
1372 cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
1373 cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
1374
1375 // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
1376 // is used for ineligible frames. That effectively will turn off row_mt
1377 // usage. Note objective delta_q and tpl eligible frames are only altref
1378 // frames currently.
1379 const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1380 if (cm->delta_q_info.delta_q_present_flag) {
1381 if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1382 !is_frame_tpl_eligible(gf_group, cpi->gf_frame_index))
1383 cm->delta_q_info.delta_q_present_flag = 0;
1384 }
1385
1386 // Reset delta_q_used flag
1387 cpi->deltaq_used = 0;
1388
1389 cm->delta_q_info.delta_lf_present_flag =
1390 cm->delta_q_info.delta_q_present_flag &&
1391 oxcf->tool_cfg.enable_deltalf_mode;
1392 cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
1393
1394 // update delta_q_present_flag and delta_lf_present_flag based on
1395 // base_qindex
1396 cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
1397 cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
1398 } else {
1399 cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
1400 cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
1401 cpi->cyclic_refresh->cnt_zeromv = 0;
1402 }
1403
1404 av1_frame_init_quantizer(cpi);
1405
1406 init_encode_frame_mb_context(cpi);
1407 set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
1408 if (cm->prev_frame && cm->prev_frame->seg.enabled)
1409 cm->last_frame_seg_map = cm->prev_frame->seg_map;
1410 else
1411 cm->last_frame_seg_map = NULL;
1412 if (features->allow_intrabc || features->coded_lossless) {
1413 av1_set_default_ref_deltas(cm->lf.ref_deltas);
1414 av1_set_default_mode_deltas(cm->lf.mode_deltas);
1415 } else if (cm->prev_frame) {
1416 memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
1417 memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
1418 }
1419 memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
1420 memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
1421
1422 cpi->all_one_sided_refs =
1423 frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
1424
1425 cpi->prune_ref_frame_mask = 0;
1426 // Figure out which ref frames can be skipped at frame level.
1427 setup_prune_ref_frame_mask(cpi);
1428
1429 x->txfm_search_info.txb_split_count = 0;
1430 #if CONFIG_SPEED_STATS
1431 x->txfm_search_info.tx_search_count = 0;
1432 #endif // CONFIG_SPEED_STATS
1433
1434 #if !CONFIG_REALTIME_ONLY
1435 #if CONFIG_COLLECT_COMPONENT_TIMING
1436 start_timing(cpi, av1_compute_global_motion_time);
1437 #endif
1438 av1_compute_global_motion_facade(cpi);
1439 #if CONFIG_COLLECT_COMPONENT_TIMING
1440 end_timing(cpi, av1_compute_global_motion_time);
1441 #endif
1442 #endif // !CONFIG_REALTIME_ONLY
1443
1444 #if CONFIG_COLLECT_COMPONENT_TIMING
1445 start_timing(cpi, av1_setup_motion_field_time);
1446 #endif
1447 av1_calculate_ref_frame_side(cm);
1448 if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
1449 #if CONFIG_COLLECT_COMPONENT_TIMING
1450 end_timing(cpi, av1_setup_motion_field_time);
1451 #endif
1452
1453 cm->current_frame.skip_mode_info.skip_mode_flag =
1454 check_skip_mode_enabled(cpi);
1455
1456 // Initialization of skip mode cost depends on the value of
1457 // 'skip_mode_flag'. This initialization happens in the function
1458 // av1_fill_mode_rates(), which is in turn called in
1459 // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
1460 // has to be called after 'skip_mode_flag' is initialized.
1461 av1_initialize_rd_consts(cpi);
1462 av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
1463
1464 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
1465 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
1466 mt_info->row_mt_enabled = 0;
1467
1468 if (oxcf->row_mt && (mt_info->num_workers > 1)) {
1469 mt_info->row_mt_enabled = 1;
1470 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
1471 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
1472 av1_encode_tiles_row_mt(cpi);
1473 } else {
1474 if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1)
1475 av1_encode_tiles_mt(cpi);
1476 else
1477 encode_tiles(cpi);
1478 }
1479
1480 // If intrabc is allowed but never selected, reset the allow_intrabc flag.
1481 if (features->allow_intrabc && !cpi->intrabc_used) {
1482 features->allow_intrabc = 0;
1483 }
1484 if (features->allow_intrabc) {
1485 cm->delta_q_info.delta_lf_present_flag = 0;
1486 }
1487
1488 if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
1489 cm->delta_q_info.delta_q_present_flag = 0;
1490 }
1491
1492 // Set the transform size appropriately before bitstream creation
1493 const MODE_EVAL_TYPE eval_type =
1494 cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
1495 ? WINNER_MODE_EVAL
1496 : DEFAULT_EVAL;
1497 const TX_SIZE_SEARCH_METHOD tx_search_type =
1498 cpi->winner_mode_params.tx_size_search_methods[eval_type];
1499 assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
1500 features->tx_mode = select_tx_mode(cm, tx_search_type);
1501
1502 #if CONFIG_FRAME_PARALLEL_ENCODE
1503 // Retain the frame level probability update conditions for parallel frames.
1504 // These conditions will be consumed during postencode stage to update the
1505 // probability.
1506 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1507 cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
1508 cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
1509 cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
1510 (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
1511 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
1512 cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
1513 (features->allow_warped_motion &&
1514 cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
1515 cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
1516 (cm->current_frame.frame_type != KEY_FRAME &&
1517 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
1518 features->interp_filter == SWITCHABLE);
1519 }
1520 #endif
1521
1522 if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
1523 ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
1524 INT_MAX) &&
1525 (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
1526 const FRAME_UPDATE_TYPE update_type =
1527 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1528 for (i = 0; i < TX_SIZES_ALL; i++) {
1529 int sum = 0;
1530 int j;
1531 int left = MAX_TX_TYPE_PROB;
1532
1533 for (j = 0; j < TX_TYPES; j++)
1534 sum += cpi->td.rd_counts.tx_type_used[i][j];
1535
1536 for (j = TX_TYPES - 1; j >= 0; j--) {
1537 int update_txtype_frameprobs = 1;
1538 const int new_prob =
1539 sum ? MAX_TX_TYPE_PROB * cpi->td.rd_counts.tx_type_used[i][j] / sum
1540 : (j ? 0 : MAX_TX_TYPE_PROB);
1541 #if CONFIG_FRAME_PARALLEL_ENCODE
1542 // Track the frame probabilities of parallel encode frames to update
1543 // during postencode stage.
1544 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1545 update_txtype_frameprobs = 0;
1546 cpi->frame_new_probs[cpi->num_frame_recode]
1547 .tx_type_probs[update_type][i][j] = new_prob;
1548 }
1549 #endif // CONFIG_FRAME_PARALLEL_ENCODE
1550 if (update_txtype_frameprobs) {
1551 int prob =
1552 (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
1553 left -= prob;
1554 if (j == 0) prob += left;
1555 frame_probs->tx_type_probs[update_type][i][j] = prob;
1556 }
1557 }
1558 }
1559 }
1560
1561 if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
1562 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
1563 const FRAME_UPDATE_TYPE update_type =
1564 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1565
1566 for (i = 0; i < BLOCK_SIZES_ALL; i++) {
1567 int sum = 0;
1568 int update_obmc_frameprobs = 1;
1569 for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
1570
1571 const int new_prob =
1572 sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
1573 #if CONFIG_FRAME_PARALLEL_ENCODE
1574 // Track the frame probabilities of parallel encode frames to update
1575 // during postencode stage.
1576 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1577 update_obmc_frameprobs = 0;
1578 cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
1579 new_prob;
1580 }
1581 #endif // CONFIG_FRAME_PARALLEL_ENCODE
1582 if (update_obmc_frameprobs) {
1583 frame_probs->obmc_probs[update_type][i] =
1584 (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
1585 }
1586 }
1587 }
1588
1589 if (features->allow_warped_motion &&
1590 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1591 const FRAME_UPDATE_TYPE update_type =
1592 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1593 int update_warp_frameprobs = 1;
1594 int sum = 0;
1595 for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
1596 const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
1597 #if CONFIG_FRAME_PARALLEL_ENCODE
1598 // Track the frame probabilities of parallel encode frames to update
1599 // during postencode stage.
1600 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1601 update_warp_frameprobs = 0;
1602 cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
1603 new_prob;
1604 }
1605 #endif // CONFIG_FRAME_PARALLEL_ENCODE
1606 if (update_warp_frameprobs) {
1607 frame_probs->warped_probs[update_type] =
1608 (frame_probs->warped_probs[update_type] + new_prob) >> 1;
1609 }
1610 }
1611
1612 if (cm->current_frame.frame_type != KEY_FRAME &&
1613 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
1614 features->interp_filter == SWITCHABLE) {
1615 const FRAME_UPDATE_TYPE update_type =
1616 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1617
1618 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
1619 int sum = 0;
1620 int j;
1621 int left = 1536;
1622
1623 for (j = 0; j < SWITCHABLE_FILTERS; j++) {
1624 sum += cpi->td.counts->switchable_interp[i][j];
1625 }
1626
1627 for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
1628 int update_interpfilter_frameprobs = 1;
1629 const int new_prob =
1630 sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
1631 : (j ? 0 : 1536);
1632 #if CONFIG_FRAME_PARALLEL_ENCODE
1633 // Track the frame probabilities of parallel encode frames to update
1634 // during postencode stage.
1635 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1636 update_interpfilter_frameprobs = 0;
1637 cpi->frame_new_probs[cpi->num_frame_recode]
1638 .switchable_interp_probs[update_type][i][j] = new_prob;
1639 }
1640 #endif // CONFIG_FRAME_PARALLEL_ENCODE
1641 if (update_interpfilter_frameprobs) {
1642 int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
1643 new_prob) >>
1644 1;
1645 left -= prob;
1646 if (j == 0) prob += left;
1647 frame_probs->switchable_interp_probs[update_type][i][j] = prob;
1648 }
1649 }
1650 }
1651 }
1652 if (hash_table_created) {
1653 av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
1654 }
1655 }
1656
1657 /*!\brief Setup reference frame buffers and encode a frame
1658 *
1659 * \ingroup high_level_algo
1660 * \callgraph
1661 * \callergraph
1662 *
1663 * \param[in] cpi Top-level encoder structure
1664 */
av1_encode_frame(AV1_COMP * cpi)1665 void av1_encode_frame(AV1_COMP *cpi) {
1666 AV1_COMMON *const cm = &cpi->common;
1667 CurrentFrame *const current_frame = &cm->current_frame;
1668 FeatureFlags *const features = &cm->features;
1669 const int num_planes = av1_num_planes(cm);
1670 // Indicates whether or not to use a default reduced set for ext-tx
1671 // rather than the potential full set of 16 transforms
1672 features->reduced_tx_set_used = cpi->oxcf.txfm_cfg.reduced_tx_type_set;
1673
1674 // Make sure segment_id is no larger than last_active_segid.
1675 if (cm->seg.enabled && cm->seg.update_map) {
1676 const int mi_rows = cm->mi_params.mi_rows;
1677 const int mi_cols = cm->mi_params.mi_cols;
1678 const int last_active_segid = cm->seg.last_active_segid;
1679 uint8_t *map = cpi->enc_seg.map;
1680 for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
1681 for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
1682 map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
1683 }
1684 map += mi_cols;
1685 }
1686 }
1687
1688 av1_setup_frame_buf_refs(cm);
1689 enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
1690 cm->cur_frame->ref_display_order_hint,
1691 cm->current_frame.display_order_hint);
1692 set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
1693 cpi->ref_frame_flags);
1694 av1_setup_frame_sign_bias(cm);
1695
1696 #if CONFIG_MISMATCH_DEBUG
1697 mismatch_reset_frame(num_planes);
1698 #else
1699 (void)num_planes;
1700 #endif
1701
1702 if (cpi->sf.hl_sf.frame_parameter_update ||
1703 cpi->sf.rt_sf.use_comp_ref_nonrd) {
1704 RD_COUNTS *const rdc = &cpi->td.rd_counts;
1705
1706 if (frame_is_intra_only(cm))
1707 current_frame->reference_mode = SINGLE_REFERENCE;
1708 else
1709 current_frame->reference_mode = REFERENCE_MODE_SELECT;
1710
1711 features->interp_filter = SWITCHABLE;
1712 if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
1713
1714 features->switchable_motion_mode = 1;
1715
1716 rdc->compound_ref_used_flag = 0;
1717 rdc->skip_mode_used_flag = 0;
1718
1719 encode_frame_internal(cpi);
1720
1721 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1722 // Use a flag that includes 4x4 blocks
1723 if (rdc->compound_ref_used_flag == 0) {
1724 current_frame->reference_mode = SINGLE_REFERENCE;
1725 #if CONFIG_ENTROPY_STATS
1726 av1_zero(cpi->td.counts->comp_inter);
1727 #endif // CONFIG_ENTROPY_STATS
1728 }
1729 }
1730 // Re-check on the skip mode status as reference mode may have been
1731 // changed.
1732 SkipModeInfo *const skip_mode_info = ¤t_frame->skip_mode_info;
1733 if (frame_is_intra_only(cm) ||
1734 current_frame->reference_mode == SINGLE_REFERENCE) {
1735 skip_mode_info->skip_mode_allowed = 0;
1736 skip_mode_info->skip_mode_flag = 0;
1737 }
1738 if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
1739 skip_mode_info->skip_mode_flag = 0;
1740
1741 if (!cm->tiles.large_scale) {
1742 if (features->tx_mode == TX_MODE_SELECT &&
1743 cpi->td.mb.txfm_search_info.txb_split_count == 0)
1744 features->tx_mode = TX_MODE_LARGEST;
1745 }
1746 } else {
1747 // This is needed if real-time speed setting is changed on the fly
1748 // from one using compound prediction to one using single reference.
1749 if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
1750 current_frame->reference_mode = SINGLE_REFERENCE;
1751 encode_frame_internal(cpi);
1752 }
1753 }
1754