1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_util/aom_pthread.h"
27 #if CONFIG_MISMATCH_DEBUG
28 #include "aom_util/debug_util.h"
29 #endif // CONFIG_MISMATCH_DEBUG
30
31 #include "av1/common/cfl.h"
32 #include "av1/common/common.h"
33 #include "av1/common/common_data.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46
47 #include "av1/encoder/allintra_vis.h"
48 #include "av1/encoder/aq_complexity.h"
49 #include "av1/encoder/aq_cyclicrefresh.h"
50 #include "av1/encoder/aq_variance.h"
51 #include "av1/encoder/global_motion_facade.h"
52 #include "av1/encoder/encodeframe.h"
53 #include "av1/encoder/encodeframe_utils.h"
54 #include "av1/encoder/encodemb.h"
55 #include "av1/encoder/encodemv.h"
56 #include "av1/encoder/encodetxb.h"
57 #include "av1/encoder/ethread.h"
58 #include "av1/encoder/extend.h"
59 #include "av1/encoder/intra_mode_search_utils.h"
60 #include "av1/encoder/ml.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/partition_strategy.h"
63 #if !CONFIG_REALTIME_ONLY
64 #include "av1/encoder/partition_model_weights.h"
65 #endif
66 #include "av1/encoder/partition_search.h"
67 #include "av1/encoder/rd.h"
68 #include "av1/encoder/rdopt.h"
69 #include "av1/encoder/reconinter_enc.h"
70 #include "av1/encoder/segmentation.h"
71 #include "av1/encoder/tokenize.h"
72 #include "av1/encoder/tpl_model.h"
73 #include "av1/encoder/var_based_part.h"
74
75 #if CONFIG_TUNE_VMAF
76 #include "av1/encoder/tune_vmaf.h"
77 #endif
78
79 /*!\cond */
80 // This is used as a reference when computing the source variance for the
81 // purposes of activity masking.
82 // Eventually this should be replaced by custom no-reference routines,
83 // which will be faster.
84 static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
85 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93 128, 128, 128, 128, 128, 128, 128, 128
94 };
95
96 #if CONFIG_AV1_HIGHBITDEPTH
97 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
98 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106 128, 128, 128, 128, 128, 128, 128, 128
107 };
108
109 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
110 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
126 };
127
128 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
129 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147 128 * 16, 128 * 16
148 };
149 #endif // CONFIG_AV1_HIGHBITDEPTH
150 /*!\endcond */
151
152 // For the given bit depth, returns a constant array used to assist the
153 // calculation of source block variance, which will then be used to decide
154 // adaptive quantizers.
get_var_offs(int use_hbd,int bd)155 static const uint8_t *get_var_offs(int use_hbd, int bd) {
156 #if CONFIG_AV1_HIGHBITDEPTH
157 if (use_hbd) {
158 assert(bd == 8 || bd == 10 || bd == 12);
159 const int off_index = (bd - 8) >> 1;
160 static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
161 AV1_HIGH_VAR_OFFS_10,
162 AV1_HIGH_VAR_OFFS_12 };
163 return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
164 }
165 #else
166 (void)use_hbd;
167 (void)bd;
168 assert(!use_hbd);
169 #endif
170 assert(bd == 8);
171 return AV1_VAR_OFFS;
172 }
173
av1_init_rtc_counters(MACROBLOCK * const x)174 void av1_init_rtc_counters(MACROBLOCK *const x) {
175 av1_init_cyclic_refresh_counters(x);
176 x->cnt_zeromv = 0;
177 }
178
av1_accumulate_rtc_counters(AV1_COMP * cpi,const MACROBLOCK * const x)179 void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
180 if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
181 av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
182 cpi->rc.cnt_zeromv += x->cnt_zeromv;
183 }
184
av1_get_perpixel_variance(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane,int use_hbd)185 unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
186 const MACROBLOCKD *xd,
187 const struct buf_2d *ref,
188 BLOCK_SIZE bsize, int plane,
189 int use_hbd) {
190 const int subsampling_x = xd->plane[plane].subsampling_x;
191 const int subsampling_y = xd->plane[plane].subsampling_y;
192 const BLOCK_SIZE plane_bsize =
193 get_plane_block_size(bsize, subsampling_x, subsampling_y);
194 unsigned int sse;
195 const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
196 ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
197 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
198 }
199
av1_get_perpixel_variance_facade(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane)200 unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
201 const MACROBLOCKD *xd,
202 const struct buf_2d *ref,
203 BLOCK_SIZE bsize, int plane) {
204 const int use_hbd = is_cur_buf_hbd(xd);
205 return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
206 }
207
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)208 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
209 int mi_row, int mi_col, const int num_planes,
210 BLOCK_SIZE bsize) {
211 // Set current frame pointer.
212 x->e_mbd.cur_buf = src;
213
214 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
215 // the static analysis warnings.
216 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
217 const int is_uv = i > 0;
218 setup_pred_plane(
219 &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
220 src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
221 x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
222 }
223 }
224
225 #if !CONFIG_REALTIME_ONLY
226 /*!\brief Assigns different quantization parameters to each super
227 * block based on its TPL weight.
228 *
229 * \ingroup tpl_modelling
230 *
231 * \param[in] cpi Top level encoder instance structure
232 * \param[in,out] td Thread data structure
233 * \param[in,out] x Macro block level data for this block.
234 * \param[in] tile_info Tile infromation / identification
235 * \param[in] mi_row Block row (in "MI_SIZE" units) index
236 * \param[in] mi_col Block column (in "MI_SIZE" units) index
237 * \param[out] num_planes Number of image planes (e.g. Y,U,V)
238 *
239 * \remark No return value but updates macroblock and thread data
240 * related to the q / q delta to be used.
241 */
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)242 static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
243 MACROBLOCK *const x,
244 const TileInfo *const tile_info,
245 int mi_row, int mi_col, int num_planes) {
246 AV1_COMMON *const cm = &cpi->common;
247 const CommonModeInfoParams *const mi_params = &cm->mi_params;
248 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
249 assert(delta_q_info->delta_q_present_flag);
250
251 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
252 // Delta-q modulation based on variance
253 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254
255 const int delta_q_res = delta_q_info->delta_q_res;
256 int current_qindex = cm->quant_params.base_qindex;
257 if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
258 DUCKY_ENCODE_FRAME_MODE_QINDEX) {
259 const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
260 const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
261 const int sb_cols =
262 CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
263 const int sb_index = sb_row * sb_cols + sb_col;
264 current_qindex =
265 cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
266 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
267 if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
268 const int block_wavelet_energy_level =
269 av1_block_wavelet_energy_level(cpi, x, sb_size);
270 x->sb_energy_level = block_wavelet_energy_level;
271 current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
272 cpi, block_wavelet_energy_level);
273 } else {
274 const int block_var_level = av1_log_block_var(cpi, x, sb_size);
275 x->sb_energy_level = block_var_level;
276 current_qindex =
277 av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
278 }
279 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
280 cpi->oxcf.algo_cfg.enable_tpl_model) {
281 // Setup deltaq based on tpl stats
282 current_qindex =
283 av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
284 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
285 current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
286 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
287 current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
288 } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
289 current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
290 }
291
292 x->rdmult_cur_qindex = current_qindex;
293 MACROBLOCKD *const xd = &x->e_mbd;
294 const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
295 delta_q_res, xd->current_base_qindex, current_qindex);
296 if (cpi->use_ducky_encode) {
297 assert(adjusted_qindex == current_qindex);
298 }
299 current_qindex = adjusted_qindex;
300
301 x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
302 x->rdmult_delta_qindex = x->delta_qindex;
303
304 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
305 xd->mi[0]->current_qindex = current_qindex;
306 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
307
308 // keep track of any non-zero delta-q used
309 td->deltaq_used |= (x->delta_qindex != 0);
310
311 if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
312 const int delta_lf_res = delta_q_info->delta_lf_res;
313 const int lfmask = ~(delta_lf_res - 1);
314 const int delta_lf_from_base =
315 ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
316 const int8_t delta_lf =
317 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
318 const int frame_lf_count =
319 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
320 const int mib_size = cm->seq_params->mib_size;
321
322 // pre-set the delta lf for loop filter. Note that this value is set
323 // before mi is assigned for each block in current superblock
324 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
325 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
326 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
327 mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
328 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
329 mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
330 }
331 }
332 }
333 }
334 }
335
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)336 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
337 int mi_col) {
338 const AV1_COMMON *cm = &cpi->common;
339 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
340 const CommonModeInfoParams *const mi_params = &cm->mi_params;
341 MACROBLOCK *x = &td->mb;
342 const int frame_idx = cpi->gf_frame_index;
343 TplParams *const tpl_data = &cpi->ppi->tpl_data;
344 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
345
346 av1_zero(x->tpl_keep_ref_frame);
347
348 if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
349 if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
350 if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
351
352 const int is_overlay =
353 cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
354 if (is_overlay) {
355 memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
356 return;
357 }
358
359 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
360 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
361 const int tpl_stride = tpl_frame->stride;
362 int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
363 const int step = 1 << block_mis_log2;
364 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
365
366 const int mi_row_end =
367 AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
368 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
369 const int mi_col_sr =
370 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
371 const int mi_col_end_sr =
372 AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
373 cm->superres_scale_denominator),
374 mi_cols_sr);
375 const int row_step = step;
376 const int col_step_sr =
377 coded_to_superres_mi(step, cm->superres_scale_denominator);
378 for (int row = mi_row; row < mi_row_end; row += row_step) {
379 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
380 const TplDepStats *this_stats =
381 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
382 int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
383 // Find the winner ref frame idx for the current block
384 int64_t best_inter_cost = this_stats->pred_error[0];
385 int best_rf_idx = 0;
386 for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
387 if ((this_stats->pred_error[idx] < best_inter_cost) &&
388 (this_stats->pred_error[idx] != 0)) {
389 best_inter_cost = this_stats->pred_error[idx];
390 best_rf_idx = idx;
391 }
392 }
393 // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
394 // LAST_FRAME.
395 tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
396 this_stats->pred_error[LAST_FRAME - 1];
397
398 for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
399 inter_cost[rf_idx] += tpl_pred_error[rf_idx];
400 }
401 }
402
403 int rank_index[INTER_REFS_PER_FRAME - 1];
404 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
405 rank_index[idx] = idx + 1;
406 for (int i = idx; i > 0; --i) {
407 if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
408 const int tmp = rank_index[i - 1];
409 rank_index[i - 1] = rank_index[i];
410 rank_index[i] = tmp;
411 }
412 }
413 }
414
415 x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
416 x->tpl_keep_ref_frame[LAST_FRAME] = 1;
417
418 int cutoff_ref = 0;
419 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
420 x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
421 if (idx > 2) {
422 if (!cutoff_ref) {
423 // If the predictive coding gains are smaller than the previous more
424 // relevant frame over certain amount, discard this frame and all the
425 // frames afterwards.
426 if (llabs(inter_cost[rank_index[idx]]) <
427 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
428 inter_cost[rank_index[idx]] == 0)
429 cutoff_ref = 1;
430 }
431
432 if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
433 }
434 }
435 }
436
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)437 static AOM_INLINE void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
438 int mi_row, int mi_col) {
439 const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
440 const int orig_rdmult = cpi->rd.RDMULT;
441
442 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
443 cpi->gf_frame_index < cpi->ppi->gf_group.size));
444 const int gf_group_index = cpi->gf_frame_index;
445 if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
446 cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
447 cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
448 const int dr =
449 av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
450 x->rdmult = dr;
451 }
452 }
453 #endif // !CONFIG_REALTIME_ONLY
454
455 #if CONFIG_RT_ML_PARTITIONING
456 // Get a prediction(stored in x->est_pred) for the whole superblock.
get_estimated_pred(AV1_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)457 static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
458 MACROBLOCK *x, int mi_row, int mi_col) {
459 AV1_COMMON *const cm = &cpi->common;
460 const int is_key_frame = frame_is_intra_only(cm);
461 MACROBLOCKD *xd = &x->e_mbd;
462
463 // TODO(kyslov) Extend to 128x128
464 assert(cm->seq_params->sb_size == BLOCK_64X64);
465
466 av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
467
468 if (!is_key_frame) {
469 MB_MODE_INFO *mi = xd->mi[0];
470 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
471
472 assert(yv12 != NULL);
473
474 av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
475 get_ref_scale_factors(cm, LAST_FRAME), 1);
476 mi->ref_frame[0] = LAST_FRAME;
477 mi->ref_frame[1] = NONE;
478 mi->bsize = BLOCK_64X64;
479 mi->mv[0].as_int = 0;
480 mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
481
482 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
483
484 xd->plane[0].dst.buf = x->est_pred;
485 xd->plane[0].dst.stride = 64;
486 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
487 } else {
488 #if CONFIG_AV1_HIGHBITDEPTH
489 switch (xd->bd) {
490 case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
491 case 10:
492 memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
493 break;
494 case 12:
495 memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
496 break;
497 }
498 #else
499 memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
500 #endif // CONFIG_VP9_HIGHBITDEPTH
501 }
502 }
503 #endif // CONFIG_RT_ML_PARTITIONING
504
505 #define AVG_CDF_WEIGHT_LEFT 3
506 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
507
508 /*!\brief Encode a superblock (minimal RD search involved)
509 *
510 * \ingroup partition_search
511 * Encodes the superblock by a pre-determined partition pattern, only minor
512 * rd-based searches are allowed to adjust the initial pattern. It is only used
513 * by realtime encoding.
514 */
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)515 static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
516 TileDataEnc *tile_data, TokenExtra **tp,
517 const int mi_row, const int mi_col,
518 const int seg_skip) {
519 AV1_COMMON *const cm = &cpi->common;
520 MACROBLOCK *const x = &td->mb;
521 const SPEED_FEATURES *const sf = &cpi->sf;
522 const TileInfo *const tile_info = &tile_data->tile_info;
523 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
524 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
525 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
526 PC_TREE *const pc_root = td->pc_root;
527
528 #if CONFIG_RT_ML_PARTITIONING
529 if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
530 RD_STATS dummy_rdc;
531 get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
532 av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
533 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
534 return;
535 }
536 #endif
537 // Set the partition
538 if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
539 (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
540 (!frame_is_intra_only(cm) &&
541 (!cpi->ppi->use_svc ||
542 !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
543 // set a fixed-size partition
544 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
545 BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
546 if (sf->rt_sf.use_fast_fixed_part &&
547 x->content_state_sb.source_sad_nonrd < kLowSad) {
548 bsize_select = BLOCK_64X64;
549 }
550 const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
551 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
552 } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
553 // set a variance-based partition
554 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
555 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
556 }
557 assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
558 sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
559 set_cb_offsets(td->mb.cb_offset, 0, 0);
560
561 // Initialize the flag to skip cdef to 1.
562 if (sf->rt_sf.skip_cdef_sb) {
563 const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
564 // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
565 // "blocks".
566 for (int r = 0; r < block64_in_sb; ++r) {
567 for (int c = 0; c < block64_in_sb; ++c) {
568 const int idx_in_sb =
569 r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
570 if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
571 }
572 }
573 }
574
575 #if CONFIG_COLLECT_COMPONENT_TIMING
576 start_timing(cpi, nonrd_use_partition_time);
577 #endif
578 av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
579 pc_root);
580 #if CONFIG_COLLECT_COMPONENT_TIMING
581 end_timing(cpi, nonrd_use_partition_time);
582 #endif
583 }
584
585 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)586 static INLINE void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
587 const TileDataEnc *tile_data,
588 SIMPLE_MOTION_DATA_TREE *sms_root,
589 RD_STATS *rd_cost, int mi_row, int mi_col,
590 int gather_tpl_data) {
591 const AV1_COMMON *cm = &cpi->common;
592 const TileInfo *tile_info = &tile_data->tile_info;
593 MACROBLOCK *x = &td->mb;
594
595 const SPEED_FEATURES *sf = &cpi->sf;
596 const int use_simple_motion_search =
597 (sf->part_sf.simple_motion_search_split ||
598 sf->part_sf.simple_motion_search_prune_rect ||
599 sf->part_sf.simple_motion_search_early_term_none ||
600 sf->part_sf.ml_early_term_after_part_split_level) &&
601 !frame_is_intra_only(cm);
602 if (use_simple_motion_search) {
603 av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
604 mi_row, mi_col);
605 }
606
607 #if !CONFIG_REALTIME_ONLY
608 if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
609 cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
610 init_ref_frame_space(cpi, td, mi_row, mi_col);
611 x->sb_energy_level = 0;
612 x->part_search_info.cnn_output_valid = 0;
613 if (gather_tpl_data) {
614 if (cm->delta_q_info.delta_q_present_flag) {
615 const int num_planes = av1_num_planes(cm);
616 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
617 setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
618 av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
619 }
620
621 // TODO(jingning): revisit this function.
622 if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
623 adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
624 }
625 }
626 }
627 #else
628 (void)tile_info;
629 (void)mi_row;
630 (void)mi_col;
631 (void)gather_tpl_data;
632 #endif
633
634 x->reuse_inter_pred = false;
635 x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
636 reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
637 av1_zero(x->picked_ref_frames_mask);
638 av1_invalid_rd_stats(rd_cost);
639 }
640
641 #if !CONFIG_REALTIME_ONLY
sb_qp_sweep_init_quantizers(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_tree,RD_STATS * rd_cost,int mi_row,int mi_col,int delta_qp_ofs)642 static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
643 const TileDataEnc *tile_data,
644 SIMPLE_MOTION_DATA_TREE *sms_tree,
645 RD_STATS *rd_cost, int mi_row,
646 int mi_col, int delta_qp_ofs) {
647 AV1_COMMON *const cm = &cpi->common;
648 MACROBLOCK *const x = &td->mb;
649 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
650 const TileInfo *tile_info = &tile_data->tile_info;
651 const CommonModeInfoParams *const mi_params = &cm->mi_params;
652 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
653 assert(delta_q_info->delta_q_present_flag);
654 const int delta_q_res = delta_q_info->delta_q_res;
655
656 const SPEED_FEATURES *sf = &cpi->sf;
657 const int use_simple_motion_search =
658 (sf->part_sf.simple_motion_search_split ||
659 sf->part_sf.simple_motion_search_prune_rect ||
660 sf->part_sf.simple_motion_search_early_term_none ||
661 sf->part_sf.ml_early_term_after_part_split_level) &&
662 !frame_is_intra_only(cm);
663 if (use_simple_motion_search) {
664 av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
665 mi_row, mi_col);
666 }
667
668 int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
669
670 MACROBLOCKD *const xd = &x->e_mbd;
671 current_qindex = av1_adjust_q_from_delta_q_res(
672 delta_q_res, xd->current_base_qindex, current_qindex);
673
674 x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
675
676 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
677 xd->mi[0]->current_qindex = current_qindex;
678 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
679
680 // keep track of any non-zero delta-q used
681 td->deltaq_used |= (x->delta_qindex != 0);
682
683 if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
684 const int delta_lf_res = delta_q_info->delta_lf_res;
685 const int lfmask = ~(delta_lf_res - 1);
686 const int delta_lf_from_base =
687 ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
688 const int8_t delta_lf =
689 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
690 const int frame_lf_count =
691 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
692 const int mib_size = cm->seq_params->mib_size;
693
694 // pre-set the delta lf for loop filter. Note that this value is set
695 // before mi is assigned for each block in current superblock
696 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
697 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
698 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
699 mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
700 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
701 mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
702 }
703 }
704 }
705 }
706
707 x->reuse_inter_pred = false;
708 x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
709 reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
710 av1_zero(x->picked_ref_frames_mask);
711 av1_invalid_rd_stats(rd_cost);
712 }
713
sb_qp_sweep(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,SIMPLE_MOTION_DATA_TREE * sms_tree,SB_FIRST_PASS_STATS * sb_org_stats)714 static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
715 TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
716 int mi_col, BLOCK_SIZE bsize,
717 SIMPLE_MOTION_DATA_TREE *sms_tree,
718 SB_FIRST_PASS_STATS *sb_org_stats) {
719 AV1_COMMON *const cm = &cpi->common;
720 MACROBLOCK *const x = &td->mb;
721 RD_STATS rdc_winner, cur_rdc;
722 av1_invalid_rd_stats(&rdc_winner);
723
724 int best_qindex = td->mb.rdmult_delta_qindex;
725 const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
726 const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
727 const int step = cm->delta_q_info.delta_q_res;
728
729 for (int sweep_qp_delta = start; sweep_qp_delta <= end;
730 sweep_qp_delta += step) {
731 sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
732 mi_col, sweep_qp_delta);
733
734 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
735 const int backup_current_qindex =
736 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
737
738 av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
739 av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
740 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
741
742 td->pc_root = av1_alloc_pc_tree_node(bsize);
743 if (!td->pc_root)
744 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
745 "Failed to allocate PC_TREE");
746 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
747 &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
748 SB_DRY_PASS, NULL);
749
750 if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
751 (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
752 rdc_winner.rdcost == cur_rdc.rdcost)) {
753 rdc_winner = cur_rdc;
754 best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
755 }
756 }
757
758 return best_qindex;
759 }
760 #endif //! CONFIG_REALTIME_ONLY
761
762 /*!\brief Encode a superblock (RD-search-based)
763 *
764 * \ingroup partition_search
765 * Conducts partition search for a superblock, based on rate-distortion costs,
766 * from scratch or adjusting from a pre-calculated partition pattern.
767 */
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)768 static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
769 TileDataEnc *tile_data, TokenExtra **tp,
770 const int mi_row, const int mi_col,
771 const int seg_skip) {
772 AV1_COMMON *const cm = &cpi->common;
773 MACROBLOCK *const x = &td->mb;
774 MACROBLOCKD *const xd = &x->e_mbd;
775 const SPEED_FEATURES *const sf = &cpi->sf;
776 const TileInfo *const tile_info = &tile_data->tile_info;
777 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
778 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
779 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
780 const int num_planes = av1_num_planes(cm);
781 int dummy_rate;
782 int64_t dummy_dist;
783 RD_STATS dummy_rdc;
784 SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
785
786 #if CONFIG_REALTIME_ONLY
787 (void)seg_skip;
788 #endif // CONFIG_REALTIME_ONLY
789
790 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
791 1);
792
793 // Encode the superblock
794 if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
795 // partition search starting from a variance-based partition
796 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
797 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
798
799 #if CONFIG_COLLECT_COMPONENT_TIMING
800 start_timing(cpi, rd_use_partition_time);
801 #endif
802 td->pc_root = av1_alloc_pc_tree_node(sb_size);
803 if (!td->pc_root)
804 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
805 "Failed to allocate PC_TREE");
806 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
807 &dummy_rate, &dummy_dist, 1, td->pc_root);
808 av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
809 sf->part_sf.partition_search_type);
810 td->pc_root = NULL;
811 #if CONFIG_COLLECT_COMPONENT_TIMING
812 end_timing(cpi, rd_use_partition_time);
813 #endif
814 }
815 #if !CONFIG_REALTIME_ONLY
816 else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
817 // partition search by adjusting a fixed-size partition
818 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
819 const BLOCK_SIZE bsize =
820 seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
821 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
822 td->pc_root = av1_alloc_pc_tree_node(sb_size);
823 if (!td->pc_root)
824 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
825 "Failed to allocate PC_TREE");
826 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
827 &dummy_rate, &dummy_dist, 1, td->pc_root);
828 av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
829 sf->part_sf.partition_search_type);
830 td->pc_root = NULL;
831 } else {
832 // The most exhaustive recursive partition search
833 SuperBlockEnc *sb_enc = &x->sb_enc;
834 // No stats for overlay frames. Exclude key frame.
835 av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
836
837 // Reset the tree for simple motion search data
838 av1_reset_simple_motion_tree_partition(sms_root, sb_size);
839
840 #if CONFIG_COLLECT_COMPONENT_TIMING
841 start_timing(cpi, rd_pick_partition_time);
842 #endif
843
844 // Estimate the maximum square partition block size, which will be used
845 // as the starting block size for partitioning the sb
846 set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
847
848 // The superblock can be searched only once, or twice consecutively for
849 // better quality. Note that the meaning of passes here is different from
850 // the general concept of 1-pass/2-pass encoders.
851 const int num_passes =
852 cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
853
854 if (cpi->oxcf.sb_qp_sweep &&
855 !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
856 cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
857 cm->delta_q_info.delta_q_present_flag) {
858 AOM_CHECK_MEM_ERROR(
859 x->e_mbd.error_info, td->mb.sb_stats_cache,
860 (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
861 av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
862 mi_col);
863 assert(x->rdmult_delta_qindex == x->delta_qindex);
864
865 const int best_qp_diff =
866 sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
867 td->mb.sb_stats_cache) -
868 x->rdmult_delta_qindex;
869
870 sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
871 mi_row, mi_col, best_qp_diff);
872
873 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
874 const int backup_current_qindex =
875 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
876
877 av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
878 av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
879 mi_col);
880
881 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
882 backup_current_qindex;
883 aom_free(td->mb.sb_stats_cache);
884 td->mb.sb_stats_cache = NULL;
885 }
886 if (num_passes == 1) {
887 #if CONFIG_PARTITION_SEARCH_ORDER
888 if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
889 av1_reset_part_sf(&cpi->sf.part_sf);
890 av1_reset_sf_for_ext_part(cpi);
891 RD_STATS this_rdc;
892 av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
893 mi_col, sb_size, &this_rdc);
894 } else {
895 td->pc_root = av1_alloc_pc_tree_node(sb_size);
896 if (!td->pc_root)
897 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
898 "Failed to allocate PC_TREE");
899 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
900 &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
901 NULL, SB_SINGLE_PASS, NULL);
902 }
903 #else
904 td->pc_root = av1_alloc_pc_tree_node(sb_size);
905 if (!td->pc_root)
906 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
907 "Failed to allocate PC_TREE");
908 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
909 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
910 SB_SINGLE_PASS, NULL);
911 #endif // CONFIG_PARTITION_SEARCH_ORDER
912 } else {
913 // First pass
914 AOM_CHECK_MEM_ERROR(
915 x->e_mbd.error_info, td->mb.sb_fp_stats,
916 (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
917 av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
918 mi_col);
919 td->pc_root = av1_alloc_pc_tree_node(sb_size);
920 if (!td->pc_root)
921 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
922 "Failed to allocate PC_TREE");
923 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
924 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
925 SB_DRY_PASS, NULL);
926
927 // Second pass
928 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
929 mi_col, 0);
930 av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
931 av1_reset_simple_motion_tree_partition(sms_root, sb_size);
932
933 av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
934 mi_col);
935
936 td->pc_root = av1_alloc_pc_tree_node(sb_size);
937 if (!td->pc_root)
938 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
939 "Failed to allocate PC_TREE");
940 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
941 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
942 SB_WET_PASS, NULL);
943 aom_free(td->mb.sb_fp_stats);
944 td->mb.sb_fp_stats = NULL;
945 }
946
947 // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
948 sb_enc->tpl_data_count = 0;
949 #if CONFIG_COLLECT_COMPONENT_TIMING
950 end_timing(cpi, rd_pick_partition_time);
951 #endif
952 }
953 #endif // !CONFIG_REALTIME_ONLY
954
955 // Update the inter rd model
956 // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
957 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
958 cm->tiles.cols == 1 && cm->tiles.rows == 1) {
959 av1_inter_mode_data_fit(tile_data, x->rdmult);
960 }
961 }
962
963 // Check if the cost update of symbols mode, coeff and dv are tile or off.
is_mode_coeff_dv_upd_freq_tile_or_off(const AV1_COMP * const cpi)964 static AOM_INLINE int is_mode_coeff_dv_upd_freq_tile_or_off(
965 const AV1_COMP *const cpi) {
966 const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
967
968 return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
969 inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
970 cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
971 }
972
973 // When row-mt is enabled and cost update frequencies are set to off/tile,
974 // processing of current SB can start even before processing of top-right SB
975 // is finished. This function checks if it is sufficient to wait for top SB
976 // to finish processing before current SB starts processing.
delay_wait_for_top_right_sb(const AV1_COMP * const cpi)977 static AOM_INLINE int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
978 const MODE mode = cpi->oxcf.mode;
979 if (mode == GOOD) return 0;
980
981 if (mode == ALLINTRA)
982 return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
983 else if (mode == REALTIME)
984 return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
985 cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
986 else
987 return 0;
988 }
989
990 /*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
991 *
992 * \ingroup partition_search
993 * \callgraph
994 * \callergraph
995 */
get_sb_source_sad(const AV1_COMP * cpi,int mi_row,int mi_col)996 static AOM_INLINE uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
997 int mi_col) {
998 if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
999
1000 const AV1_COMMON *const cm = &cpi->common;
1001 const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1002 ? (cm->seq_params->mib_size >> 1)
1003 : cm->seq_params->mib_size;
1004 const int num_blk_64x64_cols =
1005 (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1006 const int num_blk_64x64_rows =
1007 (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1008 const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1009 const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1010 uint64_t curr_sb_sad = UINT64_MAX;
1011 const uint64_t *const src_sad_blk_64x64_data =
1012 &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1013 blk_64x64_row_index * num_blk_64x64_cols];
1014 if (cm->seq_params->sb_size == BLOCK_128X128 &&
1015 blk_64x64_col_index + 1 < num_blk_64x64_cols &&
1016 blk_64x64_row_index + 1 < num_blk_64x64_rows) {
1017 // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1018 // superblock
1019 curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1020 src_sad_blk_64x64_data[num_blk_64x64_cols] +
1021 src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1022 } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1023 curr_sb_sad = src_sad_blk_64x64_data[0];
1024 }
1025 return curr_sb_sad;
1026 }
1027
1028 /*!\brief Determine whether grading content can be skipped based on sad stat
1029 *
1030 * \ingroup partition_search
1031 * \callgraph
1032 * \callergraph
1033 */
is_calc_src_content_needed(AV1_COMP * cpi,MACROBLOCK * const x,int mi_row,int mi_col)1034 static AOM_INLINE bool is_calc_src_content_needed(AV1_COMP *cpi,
1035 MACROBLOCK *const x,
1036 int mi_row, int mi_col) {
1037 if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1038 return true;
1039 const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1040 if (curr_sb_sad == UINT64_MAX) return true;
1041 if (curr_sb_sad == 0) {
1042 x->content_state_sb.source_sad_nonrd = kZeroSad;
1043 return false;
1044 }
1045 AV1_COMMON *const cm = &cpi->common;
1046 bool do_calc_src_content = true;
1047
1048 if (cpi->oxcf.speed < 9) return do_calc_src_content;
1049
1050 // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1051 if (AOMMIN(cm->width, cm->height) < 360) {
1052 // Derive Average 64x64 block source SAD from SB source SAD
1053 const uint64_t avg_64x64_blk_sad =
1054 (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1055 : curr_sb_sad;
1056
1057 // The threshold is determined based on kLowSad and kHighSad threshold and
1058 // test results.
1059 uint64_t thresh_low = 15000;
1060 uint64_t thresh_high = 40000;
1061
1062 if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1063 thresh_low = thresh_low << 1;
1064 thresh_high = thresh_high << 1;
1065 }
1066
1067 if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1068 do_calc_src_content = false;
1069 // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1070 // to RTC rd path.
1071 x->content_state_sb.source_sad_nonrd = kMedSad;
1072 }
1073 }
1074
1075 return do_calc_src_content;
1076 }
1077
1078 /*!\brief Determine whether grading content is needed based on sf and frame stat
1079 *
1080 * \ingroup partition_search
1081 * \callgraph
1082 * \callergraph
1083 */
1084 // TODO(any): consolidate sfs to make interface cleaner
grade_source_content_sb(AV1_COMP * cpi,MACROBLOCK * const x,TileDataEnc * tile_data,int mi_row,int mi_col)1085 static AOM_INLINE void grade_source_content_sb(AV1_COMP *cpi,
1086 MACROBLOCK *const x,
1087 TileDataEnc *tile_data,
1088 int mi_row, int mi_col) {
1089 AV1_COMMON *const cm = &cpi->common;
1090 if (cm->current_frame.frame_type == KEY_FRAME ||
1091 (cpi->ppi->use_svc &&
1092 cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1093 assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1094 assert(x->content_state_sb.source_sad_rd == kMedSad);
1095 return;
1096 }
1097 bool calc_src_content = false;
1098
1099 if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1100 if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1101 calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1102 } else {
1103 x->content_state_sb.source_sad_nonrd = kZeroSad;
1104 }
1105 } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1106 (cm->width * cm->height <= 352 * 288)) {
1107 if (cpi->rc.frame_source_sad > 0)
1108 calc_src_content = true;
1109 else
1110 x->content_state_sb.source_sad_rd = kZeroSad;
1111 }
1112 if (calc_src_content)
1113 av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1114 }
1115
1116 /*!\brief Encode a superblock row by breaking it into superblocks
1117 *
1118 * \ingroup partition_search
1119 * \callgraph
1120 * \callergraph
1121 * Do partition and mode search for an sb row: one row of superblocks filling up
1122 * the width of the current tile.
1123 */
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TokenExtra ** tp)1124 static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1125 TileDataEnc *tile_data, int mi_row,
1126 TokenExtra **tp) {
1127 AV1_COMMON *const cm = &cpi->common;
1128 const TileInfo *const tile_info = &tile_data->tile_info;
1129 MultiThreadInfo *const mt_info = &cpi->mt_info;
1130 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1131 AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1132 bool row_mt_enabled = mt_info->row_mt_enabled;
1133 MACROBLOCK *const x = &td->mb;
1134 MACROBLOCKD *const xd = &x->e_mbd;
1135 const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1136 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1137 const int mib_size = cm->seq_params->mib_size;
1138 const int mib_size_log2 = cm->seq_params->mib_size_log2;
1139 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1140 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1141
1142 #if CONFIG_COLLECT_COMPONENT_TIMING
1143 start_timing(cpi, encode_sb_row_time);
1144 #endif
1145
1146 // Initialize the left context for the new SB row
1147 av1_zero_left_context(xd);
1148
1149 // Reset delta for quantizer and loof filters at the beginning of every tile
1150 if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1151 if (cm->delta_q_info.delta_q_present_flag)
1152 xd->current_base_qindex = cm->quant_params.base_qindex;
1153 if (cm->delta_q_info.delta_lf_present_flag) {
1154 av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1155 }
1156 }
1157
1158 reset_thresh_freq_fact(x);
1159
1160 // Code each SB in the row
1161 for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1162 mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1163 // In realtime/allintra mode and when frequency of cost updates is off/tile,
1164 // wait for the top superblock to finish encoding. Otherwise, wait for the
1165 // top-right superblock to finish encoding.
1166 enc_row_mt->sync_read_ptr(
1167 row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1168
1169 #if CONFIG_MULTITHREAD
1170 if (row_mt_enabled) {
1171 pthread_mutex_lock(enc_row_mt->mutex_);
1172 const bool row_mt_exit = enc_row_mt->row_mt_exit;
1173 pthread_mutex_unlock(enc_row_mt->mutex_);
1174 // Exit in case any worker has encountered an error.
1175 if (row_mt_exit) return;
1176 }
1177 #endif
1178
1179 const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1180 if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1181 if ((tile_info->mi_col_start == mi_col)) {
1182 // restore frame context at the 1st column sb
1183 memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
1184 } else {
1185 // update context
1186 int wt_left = AVG_CDF_WEIGHT_LEFT;
1187 int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1188 if (tile_info->mi_col_end > (mi_col + mib_size))
1189 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1190 wt_left, wt_tr);
1191 else
1192 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1193 wt_left, wt_tr);
1194 }
1195 }
1196
1197 // Update the rate cost tables for some symbols
1198 av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1199
1200 // Reset color coding related parameters
1201 av1_zero(x->color_sensitivity_sb);
1202 av1_zero(x->color_sensitivity_sb_g);
1203 av1_zero(x->color_sensitivity_sb_alt);
1204 av1_zero(x->color_sensitivity);
1205 x->content_state_sb.source_sad_nonrd = kMedSad;
1206 x->content_state_sb.source_sad_rd = kMedSad;
1207 x->content_state_sb.lighting_change = 0;
1208 x->content_state_sb.low_sumdiff = 0;
1209 x->force_zeromv_skip_for_sb = 0;
1210 x->sb_me_block = 0;
1211 x->sb_me_partition = 0;
1212 x->sb_me_mv.as_int = 0;
1213 x->sb_force_fixed_part = 1;
1214
1215 if (cpi->oxcf.mode == ALLINTRA) {
1216 x->intra_sb_rdmult_modifier = 128;
1217 }
1218
1219 xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1220 x->source_variance = UINT_MAX;
1221 td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1222
1223 // Get segment id and skip flag
1224 const struct segmentation *const seg = &cm->seg;
1225 int seg_skip = 0;
1226 if (seg->enabled) {
1227 const uint8_t *const map =
1228 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1229 const uint8_t segment_id =
1230 map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1231 : 0;
1232 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1233 }
1234
1235 produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1236
1237 init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1238 sb_size);
1239
1240 // Grade the temporal variation of the sb, the grade will be used to decide
1241 // fast mode search strategy for coding blocks
1242 if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1243
1244 // encode the superblock
1245 if (use_nonrd_mode) {
1246 encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1247 } else {
1248 encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1249 }
1250
1251 // Update the top-right context in row_mt coding
1252 if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1253 if (sb_cols_in_tile == 1)
1254 memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
1255 else if (sb_col_in_tile >= 1)
1256 memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
1257 sizeof(*xd->tile_ctx));
1258 }
1259 enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1260 sb_cols_in_tile);
1261 }
1262
1263 #if CONFIG_COLLECT_COMPONENT_TIMING
1264 end_timing(cpi, encode_sb_row_time);
1265 #endif
1266 }
1267
init_encode_frame_mb_context(AV1_COMP * cpi)1268 static AOM_INLINE void init_encode_frame_mb_context(AV1_COMP *cpi) {
1269 AV1_COMMON *const cm = &cpi->common;
1270 const int num_planes = av1_num_planes(cm);
1271 MACROBLOCK *const x = &cpi->td.mb;
1272 MACROBLOCKD *const xd = &x->e_mbd;
1273
1274 // Copy data over into macro block data structures.
1275 av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1276 cm->seq_params->sb_size);
1277
1278 av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1279 cm->seq_params->subsampling_y, num_planes);
1280 }
1281
av1_alloc_tile_data(AV1_COMP * cpi)1282 void av1_alloc_tile_data(AV1_COMP *cpi) {
1283 AV1_COMMON *const cm = &cpi->common;
1284 AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1285 const int tile_cols = cm->tiles.cols;
1286 const int tile_rows = cm->tiles.rows;
1287
1288 av1_row_mt_mem_dealloc(cpi);
1289
1290 aom_free(cpi->tile_data);
1291 cpi->allocated_tiles = 0;
1292 enc_row_mt->allocated_tile_cols = 0;
1293 enc_row_mt->allocated_tile_rows = 0;
1294
1295 CHECK_MEM_ERROR(
1296 cm, cpi->tile_data,
1297 aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1298
1299 cpi->allocated_tiles = tile_cols * tile_rows;
1300 enc_row_mt->allocated_tile_cols = tile_cols;
1301 enc_row_mt->allocated_tile_rows = tile_rows;
1302 for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1303 for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1304 const int tile_index = tile_row * tile_cols + tile_col;
1305 TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1306 av1_zero(this_tile->row_mt_sync);
1307 this_tile->row_ctx = NULL;
1308 }
1309 }
1310 }
1311
av1_init_tile_data(AV1_COMP * cpi)1312 void av1_init_tile_data(AV1_COMP *cpi) {
1313 AV1_COMMON *const cm = &cpi->common;
1314 const int num_planes = av1_num_planes(cm);
1315 const int tile_cols = cm->tiles.cols;
1316 const int tile_rows = cm->tiles.rows;
1317 int tile_col, tile_row;
1318 TokenInfo *const token_info = &cpi->token_info;
1319 TokenExtra *pre_tok = token_info->tile_tok[0][0];
1320 TokenList *tplist = token_info->tplist[0][0];
1321 unsigned int tile_tok = 0;
1322 int tplist_count = 0;
1323
1324 if (!is_stat_generation_stage(cpi) &&
1325 cm->features.allow_screen_content_tools) {
1326 // Number of tokens for which token info needs to be allocated.
1327 unsigned int tokens_required =
1328 get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1329 MAX_SB_SIZE_LOG2, num_planes);
1330 // Allocate/reallocate memory for token related info if the number of tokens
1331 // required is more than the number of tokens already allocated. This could
1332 // occur in case of the following:
1333 // 1) If the memory is not yet allocated
1334 // 2) If the frame dimensions have changed
1335 const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1336 if (realloc_tokens) {
1337 free_token_info(token_info);
1338 alloc_token_info(cm, token_info, tokens_required);
1339 pre_tok = token_info->tile_tok[0][0];
1340 tplist = token_info->tplist[0][0];
1341 }
1342 }
1343
1344 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1345 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1346 TileDataEnc *const tile_data =
1347 &cpi->tile_data[tile_row * tile_cols + tile_col];
1348 TileInfo *const tile_info = &tile_data->tile_info;
1349 av1_tile_init(tile_info, cm, tile_row, tile_col);
1350 tile_data->firstpass_top_mv = kZeroMv;
1351 tile_data->abs_sum_level = 0;
1352
1353 if (is_token_info_allocated(token_info)) {
1354 token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1355 pre_tok = token_info->tile_tok[tile_row][tile_col];
1356 tile_tok = allocated_tokens(
1357 tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1358 num_planes);
1359 token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1360 tplist = token_info->tplist[tile_row][tile_col];
1361 tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1362 }
1363 tile_data->allow_update_cdf = !cm->tiles.large_scale;
1364 tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1365 !cm->features.disable_cdf_update &&
1366 !delay_wait_for_top_right_sb(cpi);
1367 tile_data->tctx = *cm->fc;
1368 }
1369 }
1370 }
1371
1372 // Populate the start palette token info prior to encoding an SB row.
get_token_start(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra ** tp)1373 static AOM_INLINE void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1374 int tile_row, int tile_col, int mi_row,
1375 TokenExtra **tp) {
1376 const TokenInfo *token_info = &cpi->token_info;
1377 if (!is_token_info_allocated(token_info)) return;
1378
1379 const AV1_COMMON *cm = &cpi->common;
1380 const int num_planes = av1_num_planes(cm);
1381 TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1382 const int sb_row_in_tile =
1383 (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1384
1385 get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1386 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1387 assert(tplist != NULL);
1388 tplist[sb_row_in_tile].start = *tp;
1389 }
1390
1391 // Populate the token count after encoding an SB row.
populate_token_count(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra * tok)1392 static AOM_INLINE void populate_token_count(AV1_COMP *cpi,
1393 const TileInfo *tile_info,
1394 int tile_row, int tile_col,
1395 int mi_row, TokenExtra *tok) {
1396 const TokenInfo *token_info = &cpi->token_info;
1397 if (!is_token_info_allocated(token_info)) return;
1398
1399 const AV1_COMMON *cm = &cpi->common;
1400 const int num_planes = av1_num_planes(cm);
1401 TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1402 const int sb_row_in_tile =
1403 (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1404 const int tile_mb_cols =
1405 (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1406 const int num_mb_rows_in_sb =
1407 ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1408 tplist[sb_row_in_tile].count =
1409 (unsigned int)(tok - tplist[sb_row_in_tile].start);
1410
1411 assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1412 get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1413 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1414 num_planes));
1415
1416 (void)num_planes;
1417 (void)tile_mb_cols;
1418 (void)num_mb_rows_in_sb;
1419 }
1420
1421 /*!\brief Encode a superblock row
1422 *
1423 * \ingroup partition_search
1424 */
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)1425 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1426 int tile_col, int mi_row) {
1427 AV1_COMMON *const cm = &cpi->common;
1428 const int tile_cols = cm->tiles.cols;
1429 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1430 const TileInfo *const tile_info = &this_tile->tile_info;
1431 TokenExtra *tok = NULL;
1432
1433 get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1434
1435 encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1436
1437 populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1438 }
1439
1440 /*!\brief Encode a tile
1441 *
1442 * \ingroup partition_search
1443 */
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)1444 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1445 int tile_col) {
1446 AV1_COMMON *const cm = &cpi->common;
1447 TileDataEnc *const this_tile =
1448 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1449 const TileInfo *const tile_info = &this_tile->tile_info;
1450
1451 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1452
1453 av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1454 tile_info->mi_col_end, tile_row);
1455 av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1456 &td->mb.e_mbd);
1457
1458 if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1459 cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1460
1461 if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1462 av1_crc32c_calculator_init(
1463 &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1464 }
1465
1466 for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1467 mi_row += cm->seq_params->mib_size) {
1468 av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1469 }
1470 this_tile->abs_sum_level = td->abs_sum_level;
1471 }
1472
1473 /*!\brief Break one frame into tiles and encode the tiles
1474 *
1475 * \ingroup partition_search
1476 *
1477 * \param[in] cpi Top-level encoder structure
1478 */
encode_tiles(AV1_COMP * cpi)1479 static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
1480 AV1_COMMON *const cm = &cpi->common;
1481 const int tile_cols = cm->tiles.cols;
1482 const int tile_rows = cm->tiles.rows;
1483 int tile_col, tile_row;
1484
1485 MACROBLOCK *const mb = &cpi->td.mb;
1486 assert(IMPLIES(cpi->tile_data == NULL,
1487 cpi->allocated_tiles < tile_cols * tile_rows));
1488 if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1489
1490 av1_init_tile_data(cpi);
1491 av1_alloc_mb_data(cpi, mb);
1492
1493 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1494 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1495 TileDataEnc *const this_tile =
1496 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1497 cpi->td.intrabc_used = 0;
1498 cpi->td.deltaq_used = 0;
1499 cpi->td.abs_sum_level = 0;
1500 cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1501 cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1502 cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1503 cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1504 av1_init_rtc_counters(&cpi->td.mb);
1505 cpi->td.mb.palette_pixels = 0;
1506 av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1507 if (!frame_is_intra_only(&cpi->common))
1508 av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1509 cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1510 cpi->intrabc_used |= cpi->td.intrabc_used;
1511 cpi->deltaq_used |= cpi->td.deltaq_used;
1512 }
1513 }
1514
1515 av1_dealloc_mb_data(mb, av1_num_planes(cm));
1516 }
1517
1518 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(const AV1_COMMON * const cm,RefFrameDistanceInfo * const ref_frame_dist_info,const int ref_frame_flags)1519 static AOM_INLINE void set_rel_frame_dist(
1520 const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1521 const int ref_frame_flags) {
1522 MV_REFERENCE_FRAME ref_frame;
1523 int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1524 ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1525 ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1526 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1527 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1528 if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1529 int dist = av1_encoder_get_relative_dist(
1530 cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1531 cm->current_frame.display_order_hint);
1532 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1533 // Get the nearest ref_frame in the past
1534 if (abs(dist) < min_past_dist && dist < 0) {
1535 ref_frame_dist_info->nearest_past_ref = ref_frame;
1536 min_past_dist = abs(dist);
1537 }
1538 // Get the nearest ref_frame in the future
1539 if (dist < min_future_dist && dist > 0) {
1540 ref_frame_dist_info->nearest_future_ref = ref_frame;
1541 min_future_dist = dist;
1542 }
1543 }
1544 }
1545 }
1546
refs_are_one_sided(const AV1_COMMON * cm)1547 static INLINE int refs_are_one_sided(const AV1_COMMON *cm) {
1548 assert(!frame_is_intra_only(cm));
1549
1550 int one_sided_refs = 1;
1551 const int cur_display_order_hint = cm->current_frame.display_order_hint;
1552 for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1553 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1554 if (buf == NULL) continue;
1555 if (av1_encoder_get_relative_dist(buf->display_order_hint,
1556 cur_display_order_hint) > 0) {
1557 one_sided_refs = 0; // bwd reference
1558 break;
1559 }
1560 }
1561 return one_sided_refs;
1562 }
1563
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])1564 static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1565 int ref_order_hint[2]) {
1566 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1567 ref_order_hint[0] = ref_order_hint[1] = 0;
1568 if (!skip_mode_info->skip_mode_allowed) return;
1569
1570 const RefCntBuffer *const buf_0 =
1571 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1572 const RefCntBuffer *const buf_1 =
1573 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1574 assert(buf_0 != NULL && buf_1 != NULL);
1575
1576 ref_order_hint[0] = buf_0->order_hint;
1577 ref_order_hint[1] = buf_1->order_hint;
1578 }
1579
check_skip_mode_enabled(AV1_COMP * const cpi)1580 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1581 AV1_COMMON *const cm = &cpi->common;
1582
1583 av1_setup_skip_mode_allowed(cm);
1584 if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1585
1586 // Turn off skip mode if the temporal distances of the reference pair to the
1587 // current frame are different by more than 1 frame.
1588 const int cur_offset = (int)cm->current_frame.order_hint;
1589 int ref_offset[2];
1590 get_skip_mode_ref_offsets(cm, ref_offset);
1591 const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1592 cur_offset, ref_offset[0]);
1593 const int cur_to_ref1 = abs(get_relative_dist(
1594 &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1595 if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1596
1597 // High Latency: Turn off skip mode if all refs are fwd.
1598 if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1599
1600 const int ref_frame[2] = {
1601 cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1602 cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1603 };
1604 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1605 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1606 return 0;
1607
1608 return 1;
1609 }
1610
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)1611 static AOM_INLINE void set_default_interp_skip_flags(
1612 const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1613 const int num_planes = av1_num_planes(cm);
1614 interp_search_flags->default_interp_skip_flags =
1615 (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1616 : INTERP_SKIP_LUMA_SKIP_CHROMA;
1617 }
1618
setup_prune_ref_frame_mask(AV1_COMP * cpi)1619 static AOM_INLINE void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1620 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1621 cpi->sf.inter_sf.disable_onesided_comp) &&
1622 cpi->all_one_sided_refs) {
1623 // Disable all compound references
1624 cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1625 } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1626 cpi->sf.inter_sf.selective_ref_frame >= 2) {
1627 AV1_COMMON *const cm = &cpi->common;
1628 const int cur_frame_display_order_hint =
1629 cm->current_frame.display_order_hint;
1630 unsigned int *ref_display_order_hint =
1631 cm->cur_frame->ref_display_order_hint;
1632 const int arf2_dist = av1_encoder_get_relative_dist(
1633 ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1634 cur_frame_display_order_hint);
1635 const int bwd_dist = av1_encoder_get_relative_dist(
1636 ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1637 cur_frame_display_order_hint);
1638
1639 for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1640 MV_REFERENCE_FRAME rf[2];
1641 av1_set_ref_frame(rf, ref_idx);
1642 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1643 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1644 continue;
1645 }
1646
1647 if (!cpi->all_one_sided_refs) {
1648 int ref_dist[2];
1649 for (int i = 0; i < 2; ++i) {
1650 ref_dist[i] = av1_encoder_get_relative_dist(
1651 ref_display_order_hint[rf[i] - LAST_FRAME],
1652 cur_frame_display_order_hint);
1653 }
1654
1655 // One-sided compound is used only when all reference frames are
1656 // one-sided.
1657 if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1658 cpi->prune_ref_frame_mask |= 1 << ref_idx;
1659 }
1660 }
1661
1662 if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1663 (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1664 (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1665 // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1666 if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1667 // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1668 // reference to the current frame than ALTREF2_FRAME
1669 cpi->prune_ref_frame_mask |= 1 << ref_idx;
1670 }
1671 }
1672 }
1673 }
1674 }
1675
allow_deltaq_mode(AV1_COMP * cpi)1676 static int allow_deltaq_mode(AV1_COMP *cpi) {
1677 #if !CONFIG_REALTIME_ONLY
1678 AV1_COMMON *const cm = &cpi->common;
1679 BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1680 int sbs_wide = mi_size_wide[sb_size];
1681 int sbs_high = mi_size_high[sb_size];
1682
1683 int64_t delta_rdcost = 0;
1684 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1685 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1686 int64_t this_delta_rdcost = 0;
1687 av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1688 mi_row, mi_col);
1689 delta_rdcost += this_delta_rdcost;
1690 }
1691 }
1692 return delta_rdcost < 0;
1693 #else
1694 (void)cpi;
1695 return 1;
1696 #endif // !CONFIG_REALTIME_ONLY
1697 }
1698
1699 #define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1700 #define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1701
1702 // Populates block level thresholds for force zeromv-skip decision
populate_thresh_to_force_zeromv_skip(AV1_COMP * cpi)1703 static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1704 if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1705
1706 // Threshold for forcing zeromv-skip decision is as below:
1707 // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1708 // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1709 // allowing slightly higher error for smaller blocks.
1710 // Per Pixel Threshold of 64x64 block Area of 64x64 block 1 1
1711 // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1712 // Per Pixel Threshold of 128x128 block Area of 128x128 block 4 2
1713 // Thus, per pixel thresholds for blocks of size 32x32, 16x16,... can be
1714 // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1715 // small blocks, the same is clipped to 4.
1716 const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1717 const int num_128x128_pix =
1718 block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1719
1720 for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1721 const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1722
1723 // Calculate the threshold for zeromv-skip decision based on area of the
1724 // partition
1725 unsigned int thresh_exit_part_blk =
1726 (unsigned int)(thresh_exit_128x128_part *
1727 sqrt((double)num_block_pix / num_128x128_pix) +
1728 0.5);
1729 thresh_exit_part_blk = AOMMIN(
1730 thresh_exit_part_blk,
1731 (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1732 cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1733 }
1734 }
1735
free_block_hash_buffers(uint32_t * block_hash_values[2][2],int8_t * is_block_same[2][3])1736 static void free_block_hash_buffers(uint32_t *block_hash_values[2][2],
1737 int8_t *is_block_same[2][3]) {
1738 for (int k = 0; k < 2; ++k) {
1739 for (int j = 0; j < 2; ++j) {
1740 aom_free(block_hash_values[k][j]);
1741 }
1742
1743 for (int j = 0; j < 3; ++j) {
1744 aom_free(is_block_same[k][j]);
1745 }
1746 }
1747 }
1748
1749 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction
1750 * for a single frame
1751 *
1752 * \ingroup high_level_algo
1753 */
encode_frame_internal(AV1_COMP * cpi)1754 static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
1755 ThreadData *const td = &cpi->td;
1756 MACROBLOCK *const x = &td->mb;
1757 AV1_COMMON *const cm = &cpi->common;
1758 CommonModeInfoParams *const mi_params = &cm->mi_params;
1759 FeatureFlags *const features = &cm->features;
1760 MACROBLOCKD *const xd = &x->e_mbd;
1761 RD_COUNTS *const rdc = &cpi->td.rd_counts;
1762 #if CONFIG_FPMT_TEST
1763 FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
1764 FrameProbInfo *const temp_frame_probs_simulation =
1765 &cpi->ppi->temp_frame_probs_simulation;
1766 #endif
1767 FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
1768 IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
1769 MultiThreadInfo *const mt_info = &cpi->mt_info;
1770 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1771 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1772 const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
1773 int i;
1774
1775 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
1776 mi_params->setup_mi(mi_params);
1777 }
1778
1779 set_mi_offsets(mi_params, xd, 0, 0);
1780
1781 av1_zero(*td->counts);
1782 av1_zero(rdc->tx_type_used);
1783 av1_zero(rdc->obmc_used);
1784 av1_zero(rdc->warped_used);
1785 av1_zero(rdc->seg_tmp_pred_cost);
1786
1787 // Reset the flag.
1788 cpi->intrabc_used = 0;
1789 // Need to disable intrabc when superres is selected
1790 if (av1_superres_scaled(cm)) {
1791 features->allow_intrabc = 0;
1792 }
1793
1794 features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
1795
1796 if (features->allow_warped_motion &&
1797 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1798 const FRAME_UPDATE_TYPE update_type =
1799 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1800 int warped_probability =
1801 #if CONFIG_FPMT_TEST
1802 cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
1803 ? temp_frame_probs->warped_probs[update_type]
1804 :
1805 #endif // CONFIG_FPMT_TEST
1806 frame_probs->warped_probs[update_type];
1807 if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
1808 features->allow_warped_motion = 0;
1809 }
1810
1811 int hash_table_created = 0;
1812 if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
1813 !cpi->sf.rt_sf.use_nonrd_pick_mode) {
1814 // TODO(any): move this outside of the recoding loop to avoid recalculating
1815 // the hash table.
1816 // add to hash table
1817 const int pic_width = cpi->source->y_crop_width;
1818 const int pic_height = cpi->source->y_crop_height;
1819 uint32_t *block_hash_values[2][2] = { { NULL } };
1820 int8_t *is_block_same[2][3] = { { NULL } };
1821 int k, j;
1822 bool error = false;
1823
1824 for (k = 0; k < 2 && !error; ++k) {
1825 for (j = 0; j < 2; ++j) {
1826 block_hash_values[k][j] = (uint32_t *)aom_malloc(
1827 sizeof(*block_hash_values[0][0]) * pic_width * pic_height);
1828 if (!block_hash_values[k][j]) {
1829 error = true;
1830 break;
1831 }
1832 }
1833
1834 for (j = 0; j < 3 && !error; ++j) {
1835 is_block_same[k][j] = (int8_t *)aom_malloc(
1836 sizeof(*is_block_same[0][0]) * pic_width * pic_height);
1837 if (!is_block_same[k][j]) error = true;
1838 }
1839 }
1840
1841 av1_hash_table_init(intrabc_hash_info);
1842 if (error ||
1843 !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
1844 free_block_hash_buffers(block_hash_values, is_block_same);
1845 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1846 "Error allocating intrabc_hash_table and buffers");
1847 }
1848 hash_table_created = 1;
1849 av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
1850 block_hash_values[0], is_block_same[0]);
1851 // Hash data generated for screen contents is used for intraBC ME
1852 const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
1853 const int max_sb_size =
1854 (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
1855 int src_idx = 0;
1856 for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
1857 const int dst_idx = !src_idx;
1858 av1_generate_block_hash_value(
1859 intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
1860 block_hash_values[dst_idx], is_block_same[src_idx],
1861 is_block_same[dst_idx]);
1862 if (size >= min_alloc_size) {
1863 if (!av1_add_to_hash_map_by_row_with_precal_data(
1864 &intrabc_hash_info->intrabc_hash_table,
1865 block_hash_values[dst_idx], is_block_same[dst_idx][2],
1866 pic_width, pic_height, size)) {
1867 error = true;
1868 break;
1869 }
1870 }
1871 }
1872
1873 free_block_hash_buffers(block_hash_values, is_block_same);
1874
1875 if (error) {
1876 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1877 "Error adding data to intrabc_hash_table");
1878 }
1879 }
1880
1881 const CommonQuantParams *quant_params = &cm->quant_params;
1882 for (i = 0; i < MAX_SEGMENTS; ++i) {
1883 const int qindex =
1884 cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
1885 : quant_params->base_qindex;
1886 xd->lossless[i] =
1887 qindex == 0 && quant_params->y_dc_delta_q == 0 &&
1888 quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
1889 quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
1890 if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
1891 xd->qindex[i] = qindex;
1892 if (xd->lossless[i]) {
1893 cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
1894 } else {
1895 cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
1896 }
1897 }
1898 features->coded_lossless = is_coded_lossless(cm, xd);
1899 features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
1900
1901 // Fix delta q resolution for the moment
1902
1903 cm->delta_q_info.delta_q_res = 0;
1904 if (cpi->use_ducky_encode) {
1905 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
1906 } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ) {
1907 if (deltaq_mode == DELTA_Q_OBJECTIVE)
1908 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
1909 else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
1910 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1911 else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
1912 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1913 else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
1914 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1915 else if (deltaq_mode == DELTA_Q_HDR)
1916 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1917 // Set delta_q_present_flag before it is used for the first time
1918 cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
1919 cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
1920
1921 // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
1922 // is used for ineligible frames. That effectively will turn off row_mt
1923 // usage. Note objective delta_q and tpl eligible frames are only altref
1924 // frames currently.
1925 const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1926 if (cm->delta_q_info.delta_q_present_flag) {
1927 if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1928 gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
1929 cm->delta_q_info.delta_q_present_flag = 0;
1930
1931 if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1932 cm->delta_q_info.delta_q_present_flag) {
1933 cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
1934 }
1935 }
1936
1937 // Reset delta_q_used flag
1938 cpi->deltaq_used = 0;
1939
1940 cm->delta_q_info.delta_lf_present_flag =
1941 cm->delta_q_info.delta_q_present_flag &&
1942 oxcf->tool_cfg.enable_deltalf_mode;
1943 cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
1944
1945 // update delta_q_present_flag and delta_lf_present_flag based on
1946 // base_qindex
1947 cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
1948 cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
1949 } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
1950 cpi->svc.number_temporal_layers == 1) {
1951 cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
1952 cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
1953 }
1954 cpi->rc.cnt_zeromv = 0;
1955
1956 av1_frame_init_quantizer(cpi);
1957 init_encode_frame_mb_context(cpi);
1958 set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
1959
1960 if (cm->prev_frame && cm->prev_frame->seg.enabled)
1961 cm->last_frame_seg_map = cm->prev_frame->seg_map;
1962 else
1963 cm->last_frame_seg_map = NULL;
1964 if (features->allow_intrabc || features->coded_lossless) {
1965 av1_set_default_ref_deltas(cm->lf.ref_deltas);
1966 av1_set_default_mode_deltas(cm->lf.mode_deltas);
1967 } else if (cm->prev_frame) {
1968 memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
1969 memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
1970 }
1971 memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
1972 memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
1973
1974 cpi->all_one_sided_refs =
1975 frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
1976
1977 cpi->prune_ref_frame_mask = 0;
1978 // Figure out which ref frames can be skipped at frame level.
1979 setup_prune_ref_frame_mask(cpi);
1980
1981 x->txfm_search_info.txb_split_count = 0;
1982 #if CONFIG_SPEED_STATS
1983 x->txfm_search_info.tx_search_count = 0;
1984 #endif // CONFIG_SPEED_STATS
1985
1986 #if !CONFIG_REALTIME_ONLY
1987 #if CONFIG_COLLECT_COMPONENT_TIMING
1988 start_timing(cpi, av1_compute_global_motion_time);
1989 #endif
1990 av1_compute_global_motion_facade(cpi);
1991 #if CONFIG_COLLECT_COMPONENT_TIMING
1992 end_timing(cpi, av1_compute_global_motion_time);
1993 #endif
1994 #endif // !CONFIG_REALTIME_ONLY
1995
1996 #if CONFIG_COLLECT_COMPONENT_TIMING
1997 start_timing(cpi, av1_setup_motion_field_time);
1998 #endif
1999 av1_calculate_ref_frame_side(cm);
2000 if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2001 #if CONFIG_COLLECT_COMPONENT_TIMING
2002 end_timing(cpi, av1_setup_motion_field_time);
2003 #endif
2004
2005 cm->current_frame.skip_mode_info.skip_mode_flag =
2006 check_skip_mode_enabled(cpi);
2007
2008 // Initialization of skip mode cost depends on the value of
2009 // 'skip_mode_flag'. This initialization happens in the function
2010 // av1_fill_mode_rates(), which is in turn called in
2011 // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2012 // has to be called after 'skip_mode_flag' is initialized.
2013 av1_initialize_rd_consts(cpi);
2014 av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2015 populate_thresh_to_force_zeromv_skip(cpi);
2016
2017 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2018 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2019 mt_info->row_mt_enabled = 0;
2020 mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2021 cm->tiles.cols * cm->tiles.rows) > 1;
2022
2023 if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2024 mt_info->row_mt_enabled = 1;
2025 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2026 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2027 av1_encode_tiles_row_mt(cpi);
2028 } else {
2029 if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2030 av1_encode_tiles_mt(cpi);
2031 } else {
2032 // Preallocate the pc_tree for realtime coding to reduce the cost of
2033 // memory allocation.
2034 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2035 if (use_nonrd_mode) {
2036 td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2037 if (!td->pc_root)
2038 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2039 "Failed to allocate PC_TREE");
2040 } else {
2041 td->pc_root = NULL;
2042 }
2043
2044 encode_tiles(cpi);
2045 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2046 cpi->sf.part_sf.partition_search_type);
2047 td->pc_root = NULL;
2048 }
2049 }
2050
2051 // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2052 if (features->allow_intrabc && !cpi->intrabc_used) {
2053 features->allow_intrabc = 0;
2054 }
2055 if (features->allow_intrabc) {
2056 cm->delta_q_info.delta_lf_present_flag = 0;
2057 }
2058
2059 if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2060 cm->delta_q_info.delta_q_present_flag = 0;
2061 }
2062
2063 // Set the transform size appropriately before bitstream creation
2064 const MODE_EVAL_TYPE eval_type =
2065 cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2066 ? WINNER_MODE_EVAL
2067 : DEFAULT_EVAL;
2068 const TX_SIZE_SEARCH_METHOD tx_search_type =
2069 cpi->winner_mode_params.tx_size_search_methods[eval_type];
2070 assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2071 features->tx_mode = select_tx_mode(cm, tx_search_type);
2072
2073 // Retain the frame level probability update conditions for parallel frames.
2074 // These conditions will be consumed during postencode stage to update the
2075 // probability.
2076 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2077 cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2078 cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2079 cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2080 (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2081 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2082 cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2083 (features->allow_warped_motion &&
2084 cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2085 cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2086 (cm->current_frame.frame_type != KEY_FRAME &&
2087 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2088 features->interp_filter == SWITCHABLE);
2089 }
2090
2091 if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2092 ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2093 INT_MAX) &&
2094 (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2095 const FRAME_UPDATE_TYPE update_type =
2096 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2097 for (i = 0; i < TX_SIZES_ALL; i++) {
2098 int sum = 0;
2099 int j;
2100 int left = MAX_TX_TYPE_PROB;
2101
2102 for (j = 0; j < TX_TYPES; j++)
2103 sum += cpi->td.rd_counts.tx_type_used[i][j];
2104
2105 for (j = TX_TYPES - 1; j >= 0; j--) {
2106 int update_txtype_frameprobs = 1;
2107 const int new_prob =
2108 sum ? MAX_TX_TYPE_PROB * cpi->td.rd_counts.tx_type_used[i][j] / sum
2109 : (j ? 0 : MAX_TX_TYPE_PROB);
2110 #if CONFIG_FPMT_TEST
2111 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2112 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2113 0) {
2114 int prob =
2115 (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2116 new_prob) >>
2117 1;
2118 left -= prob;
2119 if (j == 0) prob += left;
2120 temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2121 prob;
2122 // Copy temp_frame_probs_simulation to temp_frame_probs
2123 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2124 update_type_idx++) {
2125 temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2126 temp_frame_probs_simulation
2127 ->tx_type_probs[update_type_idx][i][j];
2128 }
2129 }
2130 update_txtype_frameprobs = 0;
2131 }
2132 #endif // CONFIG_FPMT_TEST
2133 // Track the frame probabilities of parallel encode frames to update
2134 // during postencode stage.
2135 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2136 update_txtype_frameprobs = 0;
2137 cpi->frame_new_probs[cpi->num_frame_recode]
2138 .tx_type_probs[update_type][i][j] = new_prob;
2139 }
2140 if (update_txtype_frameprobs) {
2141 int prob =
2142 (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2143 left -= prob;
2144 if (j == 0) prob += left;
2145 frame_probs->tx_type_probs[update_type][i][j] = prob;
2146 }
2147 }
2148 }
2149 }
2150
2151 if (cm->seg.enabled) {
2152 cm->seg.temporal_update = 1;
2153 if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2154 cm->seg.temporal_update = 0;
2155 }
2156
2157 if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2158 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2159 const FRAME_UPDATE_TYPE update_type =
2160 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2161
2162 for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2163 int sum = 0;
2164 int update_obmc_frameprobs = 1;
2165 for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2166
2167 const int new_prob =
2168 sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2169 #if CONFIG_FPMT_TEST
2170 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2171 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2172 temp_frame_probs_simulation->obmc_probs[update_type][i] =
2173 (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2174 new_prob) >>
2175 1;
2176 // Copy temp_frame_probs_simulation to temp_frame_probs
2177 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2178 update_type_idx++) {
2179 temp_frame_probs->obmc_probs[update_type_idx][i] =
2180 temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2181 }
2182 }
2183 update_obmc_frameprobs = 0;
2184 }
2185 #endif // CONFIG_FPMT_TEST
2186 // Track the frame probabilities of parallel encode frames to update
2187 // during postencode stage.
2188 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2189 update_obmc_frameprobs = 0;
2190 cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2191 new_prob;
2192 }
2193 if (update_obmc_frameprobs) {
2194 frame_probs->obmc_probs[update_type][i] =
2195 (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2196 }
2197 }
2198 }
2199
2200 if (features->allow_warped_motion &&
2201 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2202 const FRAME_UPDATE_TYPE update_type =
2203 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2204 int update_warp_frameprobs = 1;
2205 int sum = 0;
2206 for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2207 const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2208 #if CONFIG_FPMT_TEST
2209 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2210 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2211 temp_frame_probs_simulation->warped_probs[update_type] =
2212 (temp_frame_probs_simulation->warped_probs[update_type] +
2213 new_prob) >>
2214 1;
2215 // Copy temp_frame_probs_simulation to temp_frame_probs
2216 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2217 update_type_idx++) {
2218 temp_frame_probs->warped_probs[update_type_idx] =
2219 temp_frame_probs_simulation->warped_probs[update_type_idx];
2220 }
2221 }
2222 update_warp_frameprobs = 0;
2223 }
2224 #endif // CONFIG_FPMT_TEST
2225 // Track the frame probabilities of parallel encode frames to update
2226 // during postencode stage.
2227 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2228 update_warp_frameprobs = 0;
2229 cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2230 new_prob;
2231 }
2232 if (update_warp_frameprobs) {
2233 frame_probs->warped_probs[update_type] =
2234 (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2235 }
2236 }
2237
2238 if (cm->current_frame.frame_type != KEY_FRAME &&
2239 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2240 features->interp_filter == SWITCHABLE) {
2241 const FRAME_UPDATE_TYPE update_type =
2242 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2243
2244 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2245 int sum = 0;
2246 int j;
2247 int left = 1536;
2248
2249 for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2250 sum += cpi->td.counts->switchable_interp[i][j];
2251 }
2252
2253 for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2254 int update_interpfilter_frameprobs = 1;
2255 const int new_prob =
2256 sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2257 : (j ? 0 : 1536);
2258 #if CONFIG_FPMT_TEST
2259 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2260 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2261 0) {
2262 int prob = (temp_frame_probs_simulation
2263 ->switchable_interp_probs[update_type][i][j] +
2264 new_prob) >>
2265 1;
2266 left -= prob;
2267 if (j == 0) prob += left;
2268 temp_frame_probs_simulation
2269 ->switchable_interp_probs[update_type][i][j] = prob;
2270 // Copy temp_frame_probs_simulation to temp_frame_probs
2271 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2272 update_type_idx++) {
2273 temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2274 temp_frame_probs_simulation
2275 ->switchable_interp_probs[update_type_idx][i][j];
2276 }
2277 }
2278 update_interpfilter_frameprobs = 0;
2279 }
2280 #endif // CONFIG_FPMT_TEST
2281 // Track the frame probabilities of parallel encode frames to update
2282 // during postencode stage.
2283 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2284 update_interpfilter_frameprobs = 0;
2285 cpi->frame_new_probs[cpi->num_frame_recode]
2286 .switchable_interp_probs[update_type][i][j] = new_prob;
2287 }
2288 if (update_interpfilter_frameprobs) {
2289 int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2290 new_prob) >>
2291 1;
2292 left -= prob;
2293 if (j == 0) prob += left;
2294 frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2295 }
2296 }
2297 }
2298 }
2299 if (hash_table_created) {
2300 av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2301 }
2302 }
2303
2304 /*!\brief Setup reference frame buffers and encode a frame
2305 *
2306 * \ingroup high_level_algo
2307 * \callgraph
2308 * \callergraph
2309 *
2310 * \param[in] cpi Top-level encoder structure
2311 */
av1_encode_frame(AV1_COMP * cpi)2312 void av1_encode_frame(AV1_COMP *cpi) {
2313 AV1_COMMON *const cm = &cpi->common;
2314 CurrentFrame *const current_frame = &cm->current_frame;
2315 FeatureFlags *const features = &cm->features;
2316 RD_COUNTS *const rdc = &cpi->td.rd_counts;
2317 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2318 // Indicates whether or not to use a default reduced set for ext-tx
2319 // rather than the potential full set of 16 transforms
2320 features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2321
2322 // Make sure segment_id is no larger than last_active_segid.
2323 if (cm->seg.enabled && cm->seg.update_map) {
2324 const int mi_rows = cm->mi_params.mi_rows;
2325 const int mi_cols = cm->mi_params.mi_cols;
2326 const int last_active_segid = cm->seg.last_active_segid;
2327 uint8_t *map = cpi->enc_seg.map;
2328 for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2329 for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2330 map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2331 }
2332 map += mi_cols;
2333 }
2334 }
2335
2336 av1_setup_frame_buf_refs(cm);
2337 enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2338 cm->cur_frame->ref_display_order_hint,
2339 cm->current_frame.display_order_hint);
2340 set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2341 cpi->ref_frame_flags);
2342 av1_setup_frame_sign_bias(cm);
2343
2344 // If global motion is enabled, then every buffer which is used as either
2345 // a source or a ref frame should have an image pyramid allocated.
2346 // Check here so that issues can be caught early in debug mode
2347 #if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2348 if (cpi->alloc_pyramid) {
2349 assert(cpi->source->y_pyramid);
2350 for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2351 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2352 if (buf != NULL) {
2353 assert(buf->buf.y_pyramid);
2354 }
2355 }
2356 }
2357 #endif // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2358
2359 #if CONFIG_MISMATCH_DEBUG
2360 mismatch_reset_frame(av1_num_planes(cm));
2361 #endif
2362
2363 rdc->newmv_or_intra_blocks = 0;
2364 cpi->palette_pixel_num = 0;
2365
2366 if (cpi->sf.hl_sf.frame_parameter_update ||
2367 cpi->sf.rt_sf.use_comp_ref_nonrd) {
2368 if (frame_is_intra_only(cm))
2369 current_frame->reference_mode = SINGLE_REFERENCE;
2370 else
2371 current_frame->reference_mode = REFERENCE_MODE_SELECT;
2372
2373 features->interp_filter = SWITCHABLE;
2374 if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2375
2376 features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2377 features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2378
2379 rdc->compound_ref_used_flag = 0;
2380 rdc->skip_mode_used_flag = 0;
2381
2382 encode_frame_internal(cpi);
2383
2384 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2385 // Use a flag that includes 4x4 blocks
2386 if (rdc->compound_ref_used_flag == 0) {
2387 current_frame->reference_mode = SINGLE_REFERENCE;
2388 #if CONFIG_ENTROPY_STATS
2389 av1_zero(cpi->td.counts->comp_inter);
2390 #endif // CONFIG_ENTROPY_STATS
2391 }
2392 }
2393 // Re-check on the skip mode status as reference mode may have been
2394 // changed.
2395 SkipModeInfo *const skip_mode_info = ¤t_frame->skip_mode_info;
2396 if (frame_is_intra_only(cm) ||
2397 current_frame->reference_mode == SINGLE_REFERENCE) {
2398 skip_mode_info->skip_mode_allowed = 0;
2399 skip_mode_info->skip_mode_flag = 0;
2400 }
2401 if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2402 skip_mode_info->skip_mode_flag = 0;
2403
2404 if (!cm->tiles.large_scale) {
2405 if (features->tx_mode == TX_MODE_SELECT &&
2406 cpi->td.mb.txfm_search_info.txb_split_count == 0)
2407 features->tx_mode = TX_MODE_LARGEST;
2408 }
2409 } else {
2410 // This is needed if real-time speed setting is changed on the fly
2411 // from one using compound prediction to one using single reference.
2412 if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2413 current_frame->reference_mode = SINGLE_REFERENCE;
2414 encode_frame_internal(cpi);
2415 }
2416 }
2417