1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_util/aom_pthread.h"
27 #if CONFIG_MISMATCH_DEBUG
28 #include "aom_util/debug_util.h"
29 #endif // CONFIG_MISMATCH_DEBUG
30
31 #include "av1/common/cfl.h"
32 #include "av1/common/common.h"
33 #include "av1/common/common_data.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46
47 #include "av1/encoder/allintra_vis.h"
48 #include "av1/encoder/aq_complexity.h"
49 #include "av1/encoder/aq_cyclicrefresh.h"
50 #include "av1/encoder/aq_variance.h"
51 #include "av1/encoder/global_motion_facade.h"
52 #include "av1/encoder/encodeframe.h"
53 #include "av1/encoder/encodeframe_utils.h"
54 #include "av1/encoder/encodemb.h"
55 #include "av1/encoder/encodemv.h"
56 #include "av1/encoder/encodetxb.h"
57 #include "av1/encoder/ethread.h"
58 #include "av1/encoder/extend.h"
59 #include "av1/encoder/intra_mode_search_utils.h"
60 #include "av1/encoder/ml.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/partition_strategy.h"
63 #if !CONFIG_REALTIME_ONLY
64 #include "av1/encoder/partition_model_weights.h"
65 #endif
66 #include "av1/encoder/partition_search.h"
67 #include "av1/encoder/rd.h"
68 #include "av1/encoder/rdopt.h"
69 #include "av1/encoder/reconinter_enc.h"
70 #include "av1/encoder/segmentation.h"
71 #include "av1/encoder/tokenize.h"
72 #include "av1/encoder/tpl_model.h"
73 #include "av1/encoder/var_based_part.h"
74
75 #if CONFIG_TUNE_VMAF
76 #include "av1/encoder/tune_vmaf.h"
77 #endif
78
79 /*!\cond */
80 // This is used as a reference when computing the source variance for the
81 // purposes of activity masking.
82 // Eventually this should be replaced by custom no-reference routines,
83 // which will be faster.
84 static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
85 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93 128, 128, 128, 128, 128, 128, 128, 128
94 };
95
96 #if CONFIG_AV1_HIGHBITDEPTH
97 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
98 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106 128, 128, 128, 128, 128, 128, 128, 128
107 };
108
109 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
110 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
126 };
127
128 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
129 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147 128 * 16, 128 * 16
148 };
149 #endif // CONFIG_AV1_HIGHBITDEPTH
150 /*!\endcond */
151
152 // For the given bit depth, returns a constant array used to assist the
153 // calculation of source block variance, which will then be used to decide
154 // adaptive quantizers.
get_var_offs(int use_hbd,int bd)155 static const uint8_t *get_var_offs(int use_hbd, int bd) {
156 #if CONFIG_AV1_HIGHBITDEPTH
157 if (use_hbd) {
158 assert(bd == 8 || bd == 10 || bd == 12);
159 const int off_index = (bd - 8) >> 1;
160 static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
161 AV1_HIGH_VAR_OFFS_10,
162 AV1_HIGH_VAR_OFFS_12 };
163 return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
164 }
165 #else
166 (void)use_hbd;
167 (void)bd;
168 assert(!use_hbd);
169 #endif
170 assert(bd == 8);
171 return AV1_VAR_OFFS;
172 }
173
av1_init_rtc_counters(MACROBLOCK * const x)174 void av1_init_rtc_counters(MACROBLOCK *const x) {
175 av1_init_cyclic_refresh_counters(x);
176 x->cnt_zeromv = 0;
177 }
178
av1_accumulate_rtc_counters(AV1_COMP * cpi,const MACROBLOCK * const x)179 void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
180 if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
181 av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
182 cpi->rc.cnt_zeromv += x->cnt_zeromv;
183 }
184
av1_get_perpixel_variance(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane,int use_hbd)185 unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
186 const MACROBLOCKD *xd,
187 const struct buf_2d *ref,
188 BLOCK_SIZE bsize, int plane,
189 int use_hbd) {
190 const int subsampling_x = xd->plane[plane].subsampling_x;
191 const int subsampling_y = xd->plane[plane].subsampling_y;
192 const BLOCK_SIZE plane_bsize =
193 get_plane_block_size(bsize, subsampling_x, subsampling_y);
194 unsigned int sse;
195 const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
196 ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
197 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
198 }
199
av1_get_perpixel_variance_facade(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane)200 unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
201 const MACROBLOCKD *xd,
202 const struct buf_2d *ref,
203 BLOCK_SIZE bsize, int plane) {
204 const int use_hbd = is_cur_buf_hbd(xd);
205 return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
206 }
207
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)208 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
209 int mi_row, int mi_col, const int num_planes,
210 BLOCK_SIZE bsize) {
211 // Set current frame pointer.
212 x->e_mbd.cur_buf = src;
213
214 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
215 // the static analysis warnings.
216 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
217 const int is_uv = i > 0;
218 setup_pred_plane(
219 &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
220 src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
221 x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
222 }
223 }
224
225 #if !CONFIG_REALTIME_ONLY
226 /*!\brief Assigns different quantization parameters to each super
227 * block based on its TPL weight.
228 *
229 * \ingroup tpl_modelling
230 *
231 * \param[in] cpi Top level encoder instance structure
232 * \param[in,out] td Thread data structure
233 * \param[in,out] x Macro block level data for this block.
234 * \param[in] tile_info Tile infromation / identification
235 * \param[in] mi_row Block row (in "MI_SIZE" units) index
236 * \param[in] mi_col Block column (in "MI_SIZE" units) index
237 * \param[out] num_planes Number of image planes (e.g. Y,U,V)
238 *
239 * \remark No return value but updates macroblock and thread data
240 * related to the q / q delta to be used.
241 */
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)242 static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
243 MACROBLOCK *const x,
244 const TileInfo *const tile_info, int mi_row,
245 int mi_col, int num_planes) {
246 AV1_COMMON *const cm = &cpi->common;
247 const CommonModeInfoParams *const mi_params = &cm->mi_params;
248 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
249 assert(delta_q_info->delta_q_present_flag);
250
251 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
252 // Delta-q modulation based on variance
253 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254
255 const int delta_q_res = delta_q_info->delta_q_res;
256 int current_qindex = cm->quant_params.base_qindex;
257 if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
258 DUCKY_ENCODE_FRAME_MODE_QINDEX) {
259 const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
260 const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
261 const int sb_cols =
262 CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
263 const int sb_index = sb_row * sb_cols + sb_col;
264 current_qindex =
265 cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
266 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
267 if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
268 const int block_wavelet_energy_level =
269 av1_block_wavelet_energy_level(cpi, x, sb_size);
270 x->sb_energy_level = block_wavelet_energy_level;
271 current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
272 cpi, block_wavelet_energy_level);
273 } else {
274 const int block_var_level = av1_log_block_var(cpi, x, sb_size);
275 x->sb_energy_level = block_var_level;
276 current_qindex =
277 av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
278 }
279 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
280 cpi->oxcf.algo_cfg.enable_tpl_model) {
281 // Setup deltaq based on tpl stats
282 current_qindex =
283 av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
284 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
285 current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
286 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
287 current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
288 } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
289 current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
290 } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
291 current_qindex = av1_get_sbq_variance_boost(cpi, x);
292 }
293
294 x->rdmult_cur_qindex = current_qindex;
295 MACROBLOCKD *const xd = &x->e_mbd;
296 const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
297 delta_q_res, xd->current_base_qindex, current_qindex);
298 if (cpi->use_ducky_encode) {
299 assert(adjusted_qindex == current_qindex);
300 }
301 current_qindex = adjusted_qindex;
302
303 x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
304 x->rdmult_delta_qindex = x->delta_qindex;
305
306 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
307 xd->mi[0]->current_qindex = current_qindex;
308 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
309
310 // keep track of any non-zero delta-q used
311 td->deltaq_used |= (x->delta_qindex != 0);
312
313 if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
314 const int delta_lf_res = delta_q_info->delta_lf_res;
315 const int lfmask = ~(delta_lf_res - 1);
316 const int delta_lf_from_base =
317 ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
318 const int8_t delta_lf =
319 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
320 const int frame_lf_count =
321 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
322 const int mib_size = cm->seq_params->mib_size;
323
324 // pre-set the delta lf for loop filter. Note that this value is set
325 // before mi is assigned for each block in current superblock
326 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
327 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
328 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
329 mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
330 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
331 mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
332 }
333 }
334 }
335 }
336 }
337
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)338 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
339 int mi_col) {
340 const AV1_COMMON *cm = &cpi->common;
341 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
342 const CommonModeInfoParams *const mi_params = &cm->mi_params;
343 MACROBLOCK *x = &td->mb;
344 const int frame_idx = cpi->gf_frame_index;
345 TplParams *const tpl_data = &cpi->ppi->tpl_data;
346 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
347
348 av1_zero(x->tpl_keep_ref_frame);
349
350 if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
351 if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
352 if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
353
354 const int is_overlay =
355 cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
356 if (is_overlay) {
357 memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
358 return;
359 }
360
361 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
362 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
363 const int tpl_stride = tpl_frame->stride;
364 int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
365 const int step = 1 << block_mis_log2;
366 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
367
368 const int mi_row_end =
369 AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
370 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
371 const int mi_col_sr =
372 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
373 const int mi_col_end_sr =
374 AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
375 cm->superres_scale_denominator),
376 mi_cols_sr);
377 const int row_step = step;
378 const int col_step_sr =
379 coded_to_superres_mi(step, cm->superres_scale_denominator);
380 for (int row = mi_row; row < mi_row_end; row += row_step) {
381 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
382 const TplDepStats *this_stats =
383 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
384 int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
385 // Find the winner ref frame idx for the current block
386 int64_t best_inter_cost = this_stats->pred_error[0];
387 int best_rf_idx = 0;
388 for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
389 if ((this_stats->pred_error[idx] < best_inter_cost) &&
390 (this_stats->pred_error[idx] != 0)) {
391 best_inter_cost = this_stats->pred_error[idx];
392 best_rf_idx = idx;
393 }
394 }
395 // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
396 // LAST_FRAME.
397 tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
398 this_stats->pred_error[LAST_FRAME - 1];
399
400 for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
401 inter_cost[rf_idx] += tpl_pred_error[rf_idx];
402 }
403 }
404
405 int rank_index[INTER_REFS_PER_FRAME - 1];
406 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
407 rank_index[idx] = idx + 1;
408 for (int i = idx; i > 0; --i) {
409 if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
410 const int tmp = rank_index[i - 1];
411 rank_index[i - 1] = rank_index[i];
412 rank_index[i] = tmp;
413 }
414 }
415 }
416
417 x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
418 x->tpl_keep_ref_frame[LAST_FRAME] = 1;
419
420 int cutoff_ref = 0;
421 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
422 x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
423 if (idx > 2) {
424 if (!cutoff_ref) {
425 // If the predictive coding gains are smaller than the previous more
426 // relevant frame over certain amount, discard this frame and all the
427 // frames afterwards.
428 if (llabs(inter_cost[rank_index[idx]]) <
429 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
430 inter_cost[rank_index[idx]] == 0)
431 cutoff_ref = 1;
432 }
433
434 if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
435 }
436 }
437 }
438
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)439 static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
440 int mi_row, int mi_col) {
441 const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
442 const int orig_rdmult = cpi->rd.RDMULT;
443
444 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
445 cpi->gf_frame_index < cpi->ppi->gf_group.size));
446 const int gf_group_index = cpi->gf_frame_index;
447 if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
448 cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
449 cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
450 const int dr =
451 av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
452 x->rdmult = dr;
453 }
454 }
455 #endif // !CONFIG_REALTIME_ONLY
456
457 #if CONFIG_RT_ML_PARTITIONING
458 // Get a prediction(stored in x->est_pred) for the whole superblock.
get_estimated_pred(AV1_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)459 static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
460 MACROBLOCK *x, int mi_row, int mi_col) {
461 AV1_COMMON *const cm = &cpi->common;
462 const int is_key_frame = frame_is_intra_only(cm);
463 MACROBLOCKD *xd = &x->e_mbd;
464
465 // TODO(kyslov) Extend to 128x128
466 assert(cm->seq_params->sb_size == BLOCK_64X64);
467
468 av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
469
470 if (!is_key_frame) {
471 MB_MODE_INFO *mi = xd->mi[0];
472 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
473
474 assert(yv12 != NULL);
475
476 av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
477 get_ref_scale_factors(cm, LAST_FRAME), 1);
478 mi->ref_frame[0] = LAST_FRAME;
479 mi->ref_frame[1] = NONE;
480 mi->bsize = BLOCK_64X64;
481 mi->mv[0].as_int = 0;
482 mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
483
484 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
485
486 xd->plane[0].dst.buf = x->est_pred;
487 xd->plane[0].dst.stride = 64;
488 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
489 } else {
490 #if CONFIG_AV1_HIGHBITDEPTH
491 switch (xd->bd) {
492 case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
493 case 10:
494 memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
495 break;
496 case 12:
497 memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
498 break;
499 }
500 #else
501 memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
502 #endif // CONFIG_VP9_HIGHBITDEPTH
503 }
504 }
505 #endif // CONFIG_RT_ML_PARTITIONING
506
507 #define AVG_CDF_WEIGHT_LEFT 3
508 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
509
510 /*!\brief Encode a superblock (minimal RD search involved)
511 *
512 * \ingroup partition_search
513 * Encodes the superblock by a pre-determined partition pattern, only minor
514 * rd-based searches are allowed to adjust the initial pattern. It is only used
515 * by realtime encoding.
516 */
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)517 static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
518 TileDataEnc *tile_data, TokenExtra **tp,
519 const int mi_row, const int mi_col,
520 const int seg_skip) {
521 AV1_COMMON *const cm = &cpi->common;
522 MACROBLOCK *const x = &td->mb;
523 const SPEED_FEATURES *const sf = &cpi->sf;
524 const TileInfo *const tile_info = &tile_data->tile_info;
525 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
526 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
527 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
528 PC_TREE *const pc_root = td->pc_root;
529
530 #if CONFIG_RT_ML_PARTITIONING
531 if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
532 RD_STATS dummy_rdc;
533 get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
534 av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
535 BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
536 return;
537 }
538 #endif
539 // Set the partition
540 if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
541 (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
542 (!frame_is_intra_only(cm) &&
543 (!cpi->ppi->use_svc ||
544 !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
545 // set a fixed-size partition
546 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
547 BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
548 if (sf->rt_sf.use_fast_fixed_part &&
549 x->content_state_sb.source_sad_nonrd < kLowSad) {
550 bsize_select = cm->seq_params->sb_size;
551 }
552 if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
553 cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
554 bsize_select = cm->seq_params->sb_size;
555 x->force_zeromv_skip_for_sb = 1;
556 }
557 const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
558 if (x->content_state_sb.source_sad_nonrd > kZeroSad)
559 x->force_color_check_block_level = 1;
560 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
561 } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
562 // set a variance-based partition
563 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
564 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
565 }
566 assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
567 sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
568 set_cb_offsets(td->mb.cb_offset, 0, 0);
569
570 // Initialize the flag to skip cdef to 1.
571 if (sf->rt_sf.skip_cdef_sb) {
572 const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
573 // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
574 // "blocks".
575 for (int r = 0; r < block64_in_sb; ++r) {
576 for (int c = 0; c < block64_in_sb; ++c) {
577 const int idx_in_sb =
578 r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
579 if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
580 }
581 }
582 }
583
584 #if CONFIG_COLLECT_COMPONENT_TIMING
585 start_timing(cpi, nonrd_use_partition_time);
586 #endif
587 av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
588 pc_root);
589 #if CONFIG_COLLECT_COMPONENT_TIMING
590 end_timing(cpi, nonrd_use_partition_time);
591 #endif
592 }
593
594 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)595 static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
596 const TileDataEnc *tile_data,
597 SIMPLE_MOTION_DATA_TREE *sms_root,
598 RD_STATS *rd_cost, int mi_row, int mi_col,
599 int gather_tpl_data) {
600 const AV1_COMMON *cm = &cpi->common;
601 const TileInfo *tile_info = &tile_data->tile_info;
602 MACROBLOCK *x = &td->mb;
603
604 const SPEED_FEATURES *sf = &cpi->sf;
605 const int use_simple_motion_search =
606 (sf->part_sf.simple_motion_search_split ||
607 sf->part_sf.simple_motion_search_prune_rect ||
608 sf->part_sf.simple_motion_search_early_term_none ||
609 sf->part_sf.ml_early_term_after_part_split_level) &&
610 !frame_is_intra_only(cm);
611 if (use_simple_motion_search) {
612 av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
613 mi_row, mi_col);
614 }
615
616 #if !CONFIG_REALTIME_ONLY
617 if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
618 cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
619 init_ref_frame_space(cpi, td, mi_row, mi_col);
620 x->sb_energy_level = 0;
621 x->part_search_info.cnn_output_valid = 0;
622 if (gather_tpl_data) {
623 if (cm->delta_q_info.delta_q_present_flag) {
624 const int num_planes = av1_num_planes(cm);
625 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
626 setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
627 av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
628 }
629
630 // TODO(jingning): revisit this function.
631 if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
632 adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
633 }
634 }
635 }
636 #else
637 (void)tile_info;
638 (void)mi_row;
639 (void)mi_col;
640 (void)gather_tpl_data;
641 #endif
642
643 x->reuse_inter_pred = false;
644 x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
645 reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
646 av1_zero(x->picked_ref_frames_mask);
647 av1_invalid_rd_stats(rd_cost);
648 }
649
650 #if !CONFIG_REALTIME_ONLY
sb_qp_sweep_init_quantizers(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_tree,RD_STATS * rd_cost,int mi_row,int mi_col,int delta_qp_ofs)651 static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
652 const TileDataEnc *tile_data,
653 SIMPLE_MOTION_DATA_TREE *sms_tree,
654 RD_STATS *rd_cost, int mi_row,
655 int mi_col, int delta_qp_ofs) {
656 AV1_COMMON *const cm = &cpi->common;
657 MACROBLOCK *const x = &td->mb;
658 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
659 const TileInfo *tile_info = &tile_data->tile_info;
660 const CommonModeInfoParams *const mi_params = &cm->mi_params;
661 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
662 assert(delta_q_info->delta_q_present_flag);
663 const int delta_q_res = delta_q_info->delta_q_res;
664
665 const SPEED_FEATURES *sf = &cpi->sf;
666 const int use_simple_motion_search =
667 (sf->part_sf.simple_motion_search_split ||
668 sf->part_sf.simple_motion_search_prune_rect ||
669 sf->part_sf.simple_motion_search_early_term_none ||
670 sf->part_sf.ml_early_term_after_part_split_level) &&
671 !frame_is_intra_only(cm);
672 if (use_simple_motion_search) {
673 av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
674 mi_row, mi_col);
675 }
676
677 int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
678
679 MACROBLOCKD *const xd = &x->e_mbd;
680 current_qindex = av1_adjust_q_from_delta_q_res(
681 delta_q_res, xd->current_base_qindex, current_qindex);
682
683 x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
684
685 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
686 xd->mi[0]->current_qindex = current_qindex;
687 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
688
689 // keep track of any non-zero delta-q used
690 td->deltaq_used |= (x->delta_qindex != 0);
691
692 if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
693 const int delta_lf_res = delta_q_info->delta_lf_res;
694 const int lfmask = ~(delta_lf_res - 1);
695 const int delta_lf_from_base =
696 ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
697 const int8_t delta_lf =
698 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
699 const int frame_lf_count =
700 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
701 const int mib_size = cm->seq_params->mib_size;
702
703 // pre-set the delta lf for loop filter. Note that this value is set
704 // before mi is assigned for each block in current superblock
705 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
706 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
707 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
708 mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
709 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
710 mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
711 }
712 }
713 }
714 }
715
716 x->reuse_inter_pred = false;
717 x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
718 reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
719 av1_zero(x->picked_ref_frames_mask);
720 av1_invalid_rd_stats(rd_cost);
721 }
722
sb_qp_sweep(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,SIMPLE_MOTION_DATA_TREE * sms_tree,SB_FIRST_PASS_STATS * sb_org_stats)723 static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
724 TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
725 int mi_col, BLOCK_SIZE bsize,
726 SIMPLE_MOTION_DATA_TREE *sms_tree,
727 SB_FIRST_PASS_STATS *sb_org_stats) {
728 AV1_COMMON *const cm = &cpi->common;
729 MACROBLOCK *const x = &td->mb;
730 RD_STATS rdc_winner, cur_rdc;
731 av1_invalid_rd_stats(&rdc_winner);
732
733 int best_qindex = td->mb.rdmult_delta_qindex;
734 const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
735 const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
736 const int step = cm->delta_q_info.delta_q_res;
737
738 for (int sweep_qp_delta = start; sweep_qp_delta <= end;
739 sweep_qp_delta += step) {
740 sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
741 mi_col, sweep_qp_delta);
742
743 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
744 const int backup_current_qindex =
745 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
746
747 av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
748 av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
749 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
750
751 td->pc_root = av1_alloc_pc_tree_node(bsize);
752 if (!td->pc_root)
753 aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
754 "Failed to allocate PC_TREE");
755 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
756 &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
757 SB_DRY_PASS, NULL);
758
759 if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
760 (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
761 rdc_winner.rdcost == cur_rdc.rdcost)) {
762 rdc_winner = cur_rdc;
763 best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
764 }
765 }
766
767 return best_qindex;
768 }
769 #endif //! CONFIG_REALTIME_ONLY
770
771 /*!\brief Encode a superblock (RD-search-based)
772 *
773 * \ingroup partition_search
774 * Conducts partition search for a superblock, based on rate-distortion costs,
775 * from scratch or adjusting from a pre-calculated partition pattern.
776 */
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)777 static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
778 TileDataEnc *tile_data, TokenExtra **tp,
779 const int mi_row, const int mi_col,
780 const int seg_skip) {
781 AV1_COMMON *const cm = &cpi->common;
782 MACROBLOCK *const x = &td->mb;
783 MACROBLOCKD *const xd = &x->e_mbd;
784 const SPEED_FEATURES *const sf = &cpi->sf;
785 const TileInfo *const tile_info = &tile_data->tile_info;
786 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
787 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
788 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
789 const int num_planes = av1_num_planes(cm);
790 int dummy_rate;
791 int64_t dummy_dist;
792 RD_STATS dummy_rdc;
793 SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
794
795 #if CONFIG_REALTIME_ONLY
796 (void)seg_skip;
797 #endif // CONFIG_REALTIME_ONLY
798
799 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
800 1);
801
802 // Encode the superblock
803 if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
804 // partition search starting from a variance-based partition
805 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
806 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
807
808 #if CONFIG_COLLECT_COMPONENT_TIMING
809 start_timing(cpi, rd_use_partition_time);
810 #endif
811 td->pc_root = av1_alloc_pc_tree_node(sb_size);
812 if (!td->pc_root)
813 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
814 "Failed to allocate PC_TREE");
815 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
816 &dummy_rate, &dummy_dist, 1, td->pc_root);
817 av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
818 sf->part_sf.partition_search_type);
819 td->pc_root = NULL;
820 #if CONFIG_COLLECT_COMPONENT_TIMING
821 end_timing(cpi, rd_use_partition_time);
822 #endif
823 }
824 #if !CONFIG_REALTIME_ONLY
825 else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
826 // partition search by adjusting a fixed-size partition
827 av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
828 const BLOCK_SIZE bsize =
829 seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
830 av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
831 td->pc_root = av1_alloc_pc_tree_node(sb_size);
832 if (!td->pc_root)
833 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
834 "Failed to allocate PC_TREE");
835 av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
836 &dummy_rate, &dummy_dist, 1, td->pc_root);
837 av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
838 sf->part_sf.partition_search_type);
839 td->pc_root = NULL;
840 } else {
841 // The most exhaustive recursive partition search
842 SuperBlockEnc *sb_enc = &x->sb_enc;
843 // No stats for overlay frames. Exclude key frame.
844 av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
845
846 // Reset the tree for simple motion search data
847 av1_reset_simple_motion_tree_partition(sms_root, sb_size);
848
849 #if CONFIG_COLLECT_COMPONENT_TIMING
850 start_timing(cpi, rd_pick_partition_time);
851 #endif
852
853 // Estimate the maximum square partition block size, which will be used
854 // as the starting block size for partitioning the sb
855 set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
856
857 // The superblock can be searched only once, or twice consecutively for
858 // better quality. Note that the meaning of passes here is different from
859 // the general concept of 1-pass/2-pass encoders.
860 const int num_passes =
861 cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
862
863 if (cpi->oxcf.sb_qp_sweep &&
864 !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
865 cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
866 cm->delta_q_info.delta_q_present_flag) {
867 AOM_CHECK_MEM_ERROR(
868 x->e_mbd.error_info, td->mb.sb_stats_cache,
869 (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
870 av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
871 mi_col);
872 assert(x->rdmult_delta_qindex == x->delta_qindex);
873
874 const int best_qp_diff =
875 sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
876 td->mb.sb_stats_cache) -
877 x->rdmult_delta_qindex;
878
879 sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
880 mi_row, mi_col, best_qp_diff);
881
882 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
883 const int backup_current_qindex =
884 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
885
886 av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
887 av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
888 mi_col);
889
890 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
891 backup_current_qindex;
892 aom_free(td->mb.sb_stats_cache);
893 td->mb.sb_stats_cache = NULL;
894 }
895 if (num_passes == 1) {
896 #if CONFIG_PARTITION_SEARCH_ORDER
897 if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
898 av1_reset_part_sf(&cpi->sf.part_sf);
899 av1_reset_sf_for_ext_part(cpi);
900 RD_STATS this_rdc;
901 av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
902 mi_col, sb_size, &this_rdc);
903 } else {
904 td->pc_root = av1_alloc_pc_tree_node(sb_size);
905 if (!td->pc_root)
906 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
907 "Failed to allocate PC_TREE");
908 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
909 &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
910 NULL, SB_SINGLE_PASS, NULL);
911 }
912 #else
913 td->pc_root = av1_alloc_pc_tree_node(sb_size);
914 if (!td->pc_root)
915 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
916 "Failed to allocate PC_TREE");
917 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
918 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
919 SB_SINGLE_PASS, NULL);
920 #endif // CONFIG_PARTITION_SEARCH_ORDER
921 } else {
922 // First pass
923 AOM_CHECK_MEM_ERROR(
924 x->e_mbd.error_info, td->mb.sb_fp_stats,
925 (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
926 av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
927 mi_col);
928 td->pc_root = av1_alloc_pc_tree_node(sb_size);
929 if (!td->pc_root)
930 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
931 "Failed to allocate PC_TREE");
932 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
933 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
934 SB_DRY_PASS, NULL);
935
936 // Second pass
937 init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
938 mi_col, 0);
939 av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
940 av1_reset_simple_motion_tree_partition(sms_root, sb_size);
941
942 av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
943 mi_col);
944
945 td->pc_root = av1_alloc_pc_tree_node(sb_size);
946 if (!td->pc_root)
947 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
948 "Failed to allocate PC_TREE");
949 av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
950 &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
951 SB_WET_PASS, NULL);
952 aom_free(td->mb.sb_fp_stats);
953 td->mb.sb_fp_stats = NULL;
954 }
955
956 // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
957 sb_enc->tpl_data_count = 0;
958 #if CONFIG_COLLECT_COMPONENT_TIMING
959 end_timing(cpi, rd_pick_partition_time);
960 #endif
961 }
962 #endif // !CONFIG_REALTIME_ONLY
963
964 // Update the inter rd model
965 // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
966 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
967 cm->tiles.cols == 1 && cm->tiles.rows == 1) {
968 av1_inter_mode_data_fit(tile_data, x->rdmult);
969 }
970 }
971
972 // Check if the cost update of symbols mode, coeff and dv are tile or off.
is_mode_coeff_dv_upd_freq_tile_or_off(const AV1_COMP * const cpi)973 static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
974 const AV1_COMP *const cpi) {
975 const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
976
977 return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
978 inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
979 cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
980 }
981
982 // When row-mt is enabled and cost update frequencies are set to off/tile,
983 // processing of current SB can start even before processing of top-right SB
984 // is finished. This function checks if it is sufficient to wait for top SB
985 // to finish processing before current SB starts processing.
delay_wait_for_top_right_sb(const AV1_COMP * const cpi)986 static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
987 const MODE mode = cpi->oxcf.mode;
988 if (mode == GOOD) return 0;
989
990 if (mode == ALLINTRA)
991 return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
992 else if (mode == REALTIME)
993 return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
994 cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
995 else
996 return 0;
997 }
998
999 /*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
1000 *
1001 * \ingroup partition_search
1002 * \callgraph
1003 * \callergraph
1004 */
get_sb_source_sad(const AV1_COMP * cpi,int mi_row,int mi_col)1005 static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
1006 int mi_col) {
1007 if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
1008
1009 const AV1_COMMON *const cm = &cpi->common;
1010 const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1011 ? (cm->seq_params->mib_size >> 1)
1012 : cm->seq_params->mib_size;
1013 const int num_blk_64x64_cols =
1014 (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1015 const int num_blk_64x64_rows =
1016 (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1017 const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1018 const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1019 uint64_t curr_sb_sad = UINT64_MAX;
1020 // Avoid the border as sad_blk_64x64 may not be set for the border
1021 // in the scene detection.
1022 if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1023 (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1024 return curr_sb_sad;
1025 }
1026 const uint64_t *const src_sad_blk_64x64_data =
1027 &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1028 blk_64x64_row_index * num_blk_64x64_cols];
1029 if (cm->seq_params->sb_size == BLOCK_128X128) {
1030 // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1031 // superblock
1032 curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1033 src_sad_blk_64x64_data[num_blk_64x64_cols] +
1034 src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1035 } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1036 curr_sb_sad = src_sad_blk_64x64_data[0];
1037 }
1038 return curr_sb_sad;
1039 }
1040
1041 /*!\brief Determine whether grading content can be skipped based on sad stat
1042 *
1043 * \ingroup partition_search
1044 * \callgraph
1045 * \callergraph
1046 */
is_calc_src_content_needed(AV1_COMP * cpi,MACROBLOCK * const x,int mi_row,int mi_col)1047 static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1048 MACROBLOCK *const x, int mi_row,
1049 int mi_col) {
1050 if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1051 return true;
1052 const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1053 if (curr_sb_sad == UINT64_MAX) return true;
1054 if (curr_sb_sad == 0) {
1055 x->content_state_sb.source_sad_nonrd = kZeroSad;
1056 return false;
1057 }
1058 AV1_COMMON *const cm = &cpi->common;
1059 bool do_calc_src_content = true;
1060
1061 if (cpi->oxcf.speed < 9) return do_calc_src_content;
1062
1063 // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1064 if (AOMMIN(cm->width, cm->height) < 360) {
1065 // Derive Average 64x64 block source SAD from SB source SAD
1066 const uint64_t avg_64x64_blk_sad =
1067 (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1068 : curr_sb_sad;
1069
1070 // The threshold is determined based on kLowSad and kHighSad threshold and
1071 // test results.
1072 uint64_t thresh_low = 15000;
1073 uint64_t thresh_high = 40000;
1074
1075 if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1076 thresh_low = thresh_low << 1;
1077 thresh_high = thresh_high << 1;
1078 }
1079
1080 if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1081 do_calc_src_content = false;
1082 // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1083 // to RTC rd path.
1084 x->content_state_sb.source_sad_nonrd = kMedSad;
1085 }
1086 }
1087
1088 return do_calc_src_content;
1089 }
1090
1091 /*!\brief Determine whether grading content is needed based on sf and frame stat
1092 *
1093 * \ingroup partition_search
1094 * \callgraph
1095 * \callergraph
1096 */
1097 // TODO(any): consolidate sfs to make interface cleaner
grade_source_content_sb(AV1_COMP * cpi,MACROBLOCK * const x,TileDataEnc * tile_data,int mi_row,int mi_col)1098 static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1099 TileDataEnc *tile_data, int mi_row,
1100 int mi_col) {
1101 AV1_COMMON *const cm = &cpi->common;
1102 if (cm->current_frame.frame_type == KEY_FRAME ||
1103 (cpi->ppi->use_svc &&
1104 cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1105 assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1106 assert(x->content_state_sb.source_sad_rd == kMedSad);
1107 return;
1108 }
1109 bool calc_src_content = false;
1110
1111 if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1112 if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1113 calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1114 } else {
1115 x->content_state_sb.source_sad_nonrd = kZeroSad;
1116 }
1117 } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1118 (cm->width * cm->height <= 352 * 288)) {
1119 if (cpi->rc.frame_source_sad > 0)
1120 calc_src_content = true;
1121 else
1122 x->content_state_sb.source_sad_rd = kZeroSad;
1123 }
1124 if (calc_src_content)
1125 av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1126 }
1127
1128 /*!\brief Encode a superblock row by breaking it into superblocks
1129 *
1130 * \ingroup partition_search
1131 * \callgraph
1132 * \callergraph
1133 * Do partition and mode search for an sb row: one row of superblocks filling up
1134 * the width of the current tile.
1135 */
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TokenExtra ** tp)1136 static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1137 TileDataEnc *tile_data, int mi_row,
1138 TokenExtra **tp) {
1139 AV1_COMMON *const cm = &cpi->common;
1140 const TileInfo *const tile_info = &tile_data->tile_info;
1141 MultiThreadInfo *const mt_info = &cpi->mt_info;
1142 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1143 AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1144 bool row_mt_enabled = mt_info->row_mt_enabled;
1145 MACROBLOCK *const x = &td->mb;
1146 MACROBLOCKD *const xd = &x->e_mbd;
1147 const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1148 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1149 const int mib_size = cm->seq_params->mib_size;
1150 const int mib_size_log2 = cm->seq_params->mib_size_log2;
1151 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1152 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1153
1154 #if CONFIG_COLLECT_COMPONENT_TIMING
1155 start_timing(cpi, encode_sb_row_time);
1156 #endif
1157
1158 // Initialize the left context for the new SB row
1159 av1_zero_left_context(xd);
1160
1161 // Reset delta for quantizer and loof filters at the beginning of every tile
1162 if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1163 if (cm->delta_q_info.delta_q_present_flag)
1164 xd->current_base_qindex = cm->quant_params.base_qindex;
1165 if (cm->delta_q_info.delta_lf_present_flag) {
1166 av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1167 }
1168 }
1169
1170 reset_thresh_freq_fact(x);
1171
1172 // Code each SB in the row
1173 for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1174 mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1175 // In realtime/allintra mode and when frequency of cost updates is off/tile,
1176 // wait for the top superblock to finish encoding. Otherwise, wait for the
1177 // top-right superblock to finish encoding.
1178 enc_row_mt->sync_read_ptr(
1179 row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1180
1181 #if CONFIG_MULTITHREAD
1182 if (row_mt_enabled) {
1183 pthread_mutex_lock(enc_row_mt->mutex_);
1184 const bool row_mt_exit = enc_row_mt->row_mt_exit;
1185 pthread_mutex_unlock(enc_row_mt->mutex_);
1186 // Exit in case any worker has encountered an error.
1187 if (row_mt_exit) return;
1188 }
1189 #endif
1190
1191 const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1192 if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1193 if ((tile_info->mi_col_start == mi_col)) {
1194 // restore frame context at the 1st column sb
1195 memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
1196 } else {
1197 // update context
1198 int wt_left = AVG_CDF_WEIGHT_LEFT;
1199 int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1200 if (tile_info->mi_col_end > (mi_col + mib_size))
1201 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1202 wt_left, wt_tr);
1203 else
1204 av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1205 wt_left, wt_tr);
1206 }
1207 }
1208
1209 // Update the rate cost tables for some symbols
1210 av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1211
1212 // Reset color coding related parameters
1213 av1_zero(x->color_sensitivity_sb);
1214 av1_zero(x->color_sensitivity_sb_g);
1215 av1_zero(x->color_sensitivity_sb_alt);
1216 av1_zero(x->color_sensitivity);
1217 x->content_state_sb.source_sad_nonrd = kMedSad;
1218 x->content_state_sb.source_sad_rd = kMedSad;
1219 x->content_state_sb.lighting_change = 0;
1220 x->content_state_sb.low_sumdiff = 0;
1221 x->force_zeromv_skip_for_sb = 0;
1222 x->sb_me_block = 0;
1223 x->sb_me_partition = 0;
1224 x->sb_me_mv.as_int = 0;
1225 x->sb_force_fixed_part = 1;
1226 x->color_palette_thresh = 64;
1227 x->force_color_check_block_level = 0;
1228 x->nonrd_prune_ref_frame_search =
1229 cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1230
1231 if (cpi->oxcf.mode == ALLINTRA) {
1232 x->intra_sb_rdmult_modifier = 128;
1233 }
1234
1235 xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1236 x->source_variance = UINT_MAX;
1237 td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1238
1239 // Get segment id and skip flag
1240 const struct segmentation *const seg = &cm->seg;
1241 int seg_skip = 0;
1242 if (seg->enabled) {
1243 const uint8_t *const map =
1244 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1245 const uint8_t segment_id =
1246 map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1247 : 0;
1248 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1249 }
1250
1251 produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1252
1253 init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1254 sb_size);
1255
1256 // Grade the temporal variation of the sb, the grade will be used to decide
1257 // fast mode search strategy for coding blocks
1258 if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1259
1260 // encode the superblock
1261 if (use_nonrd_mode) {
1262 encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1263 } else {
1264 encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1265 }
1266
1267 // Update the top-right context in row_mt coding
1268 if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1269 if (sb_cols_in_tile == 1)
1270 memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
1271 else if (sb_col_in_tile >= 1)
1272 memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
1273 sizeof(*xd->tile_ctx));
1274 }
1275 enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1276 sb_cols_in_tile);
1277 }
1278
1279 #if CONFIG_COLLECT_COMPONENT_TIMING
1280 end_timing(cpi, encode_sb_row_time);
1281 #endif
1282 }
1283
init_encode_frame_mb_context(AV1_COMP * cpi)1284 static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1285 AV1_COMMON *const cm = &cpi->common;
1286 const int num_planes = av1_num_planes(cm);
1287 MACROBLOCK *const x = &cpi->td.mb;
1288 MACROBLOCKD *const xd = &x->e_mbd;
1289
1290 // Copy data over into macro block data structures.
1291 av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1292 cm->seq_params->sb_size);
1293
1294 av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1295 cm->seq_params->subsampling_y, num_planes);
1296 }
1297
av1_alloc_tile_data(AV1_COMP * cpi)1298 void av1_alloc_tile_data(AV1_COMP *cpi) {
1299 AV1_COMMON *const cm = &cpi->common;
1300 AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1301 const int tile_cols = cm->tiles.cols;
1302 const int tile_rows = cm->tiles.rows;
1303
1304 av1_row_mt_mem_dealloc(cpi);
1305
1306 aom_free(cpi->tile_data);
1307 cpi->allocated_tiles = 0;
1308 enc_row_mt->allocated_tile_cols = 0;
1309 enc_row_mt->allocated_tile_rows = 0;
1310
1311 CHECK_MEM_ERROR(
1312 cm, cpi->tile_data,
1313 aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1314
1315 cpi->allocated_tiles = tile_cols * tile_rows;
1316 enc_row_mt->allocated_tile_cols = tile_cols;
1317 enc_row_mt->allocated_tile_rows = tile_rows;
1318 for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1319 for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1320 const int tile_index = tile_row * tile_cols + tile_col;
1321 TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1322 av1_zero(this_tile->row_mt_sync);
1323 this_tile->row_ctx = NULL;
1324 }
1325 }
1326 }
1327
av1_init_tile_data(AV1_COMP * cpi)1328 void av1_init_tile_data(AV1_COMP *cpi) {
1329 AV1_COMMON *const cm = &cpi->common;
1330 const int num_planes = av1_num_planes(cm);
1331 const int tile_cols = cm->tiles.cols;
1332 const int tile_rows = cm->tiles.rows;
1333 int tile_col, tile_row;
1334 TokenInfo *const token_info = &cpi->token_info;
1335 TokenExtra *pre_tok = token_info->tile_tok[0][0];
1336 TokenList *tplist = token_info->tplist[0][0];
1337 unsigned int tile_tok = 0;
1338 int tplist_count = 0;
1339
1340 if (!is_stat_generation_stage(cpi) &&
1341 cm->features.allow_screen_content_tools) {
1342 // Number of tokens for which token info needs to be allocated.
1343 unsigned int tokens_required =
1344 get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1345 MAX_SB_SIZE_LOG2, num_planes);
1346 // Allocate/reallocate memory for token related info if the number of tokens
1347 // required is more than the number of tokens already allocated. This could
1348 // occur in case of the following:
1349 // 1) If the memory is not yet allocated
1350 // 2) If the frame dimensions have changed
1351 const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1352 if (realloc_tokens) {
1353 free_token_info(token_info);
1354 alloc_token_info(cm, token_info, tokens_required);
1355 pre_tok = token_info->tile_tok[0][0];
1356 tplist = token_info->tplist[0][0];
1357 }
1358 }
1359
1360 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1361 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1362 TileDataEnc *const tile_data =
1363 &cpi->tile_data[tile_row * tile_cols + tile_col];
1364 TileInfo *const tile_info = &tile_data->tile_info;
1365 av1_tile_init(tile_info, cm, tile_row, tile_col);
1366 tile_data->firstpass_top_mv = kZeroMv;
1367 tile_data->abs_sum_level = 0;
1368
1369 if (is_token_info_allocated(token_info)) {
1370 token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1371 pre_tok = token_info->tile_tok[tile_row][tile_col];
1372 tile_tok = allocated_tokens(
1373 tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1374 num_planes);
1375 token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1376 tplist = token_info->tplist[tile_row][tile_col];
1377 tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1378 }
1379 tile_data->allow_update_cdf = !cm->tiles.large_scale;
1380 tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1381 !cm->features.disable_cdf_update &&
1382 !delay_wait_for_top_right_sb(cpi);
1383 tile_data->tctx = *cm->fc;
1384 }
1385 }
1386 }
1387
1388 // Populate the start palette token info prior to encoding an SB row.
get_token_start(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra ** tp)1389 static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1390 int tile_row, int tile_col, int mi_row,
1391 TokenExtra **tp) {
1392 const TokenInfo *token_info = &cpi->token_info;
1393 if (!is_token_info_allocated(token_info)) return;
1394
1395 const AV1_COMMON *cm = &cpi->common;
1396 const int num_planes = av1_num_planes(cm);
1397 TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1398 const int sb_row_in_tile =
1399 (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1400
1401 get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1402 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1403 assert(tplist != NULL);
1404 tplist[sb_row_in_tile].start = *tp;
1405 }
1406
1407 // Populate the token count after encoding an SB row.
populate_token_count(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra * tok)1408 static inline void populate_token_count(AV1_COMP *cpi,
1409 const TileInfo *tile_info, int tile_row,
1410 int tile_col, int mi_row,
1411 TokenExtra *tok) {
1412 const TokenInfo *token_info = &cpi->token_info;
1413 if (!is_token_info_allocated(token_info)) return;
1414
1415 const AV1_COMMON *cm = &cpi->common;
1416 const int num_planes = av1_num_planes(cm);
1417 TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1418 const int sb_row_in_tile =
1419 (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1420 const int tile_mb_cols =
1421 (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1422 const int num_mb_rows_in_sb =
1423 ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1424 tplist[sb_row_in_tile].count =
1425 (unsigned int)(tok - tplist[sb_row_in_tile].start);
1426
1427 assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1428 get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1429 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1430 num_planes));
1431
1432 (void)num_planes;
1433 (void)tile_mb_cols;
1434 (void)num_mb_rows_in_sb;
1435 }
1436
1437 /*!\brief Encode a superblock row
1438 *
1439 * \ingroup partition_search
1440 */
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)1441 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1442 int tile_col, int mi_row) {
1443 AV1_COMMON *const cm = &cpi->common;
1444 const int tile_cols = cm->tiles.cols;
1445 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1446 const TileInfo *const tile_info = &this_tile->tile_info;
1447 TokenExtra *tok = NULL;
1448
1449 get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1450
1451 encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1452
1453 populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1454 }
1455
1456 /*!\brief Encode a tile
1457 *
1458 * \ingroup partition_search
1459 */
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)1460 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1461 int tile_col) {
1462 AV1_COMMON *const cm = &cpi->common;
1463 TileDataEnc *const this_tile =
1464 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1465 const TileInfo *const tile_info = &this_tile->tile_info;
1466
1467 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1468
1469 av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1470 tile_info->mi_col_end, tile_row);
1471 av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1472 &td->mb.e_mbd);
1473
1474 #if !CONFIG_REALTIME_ONLY
1475 if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1476 cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1477 #endif
1478
1479 if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1480 av1_crc32c_calculator_init(
1481 &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1482 }
1483
1484 for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1485 mi_row += cm->seq_params->mib_size) {
1486 av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1487 }
1488 this_tile->abs_sum_level = td->abs_sum_level;
1489 }
1490
1491 /*!\brief Break one frame into tiles and encode the tiles
1492 *
1493 * \ingroup partition_search
1494 *
1495 * \param[in] cpi Top-level encoder structure
1496 */
encode_tiles(AV1_COMP * cpi)1497 static inline void encode_tiles(AV1_COMP *cpi) {
1498 AV1_COMMON *const cm = &cpi->common;
1499 const int tile_cols = cm->tiles.cols;
1500 const int tile_rows = cm->tiles.rows;
1501 int tile_col, tile_row;
1502
1503 MACROBLOCK *const mb = &cpi->td.mb;
1504 assert(IMPLIES(cpi->tile_data == NULL,
1505 cpi->allocated_tiles < tile_cols * tile_rows));
1506 if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1507
1508 av1_init_tile_data(cpi);
1509 av1_alloc_mb_data(cpi, mb);
1510
1511 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1512 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1513 TileDataEnc *const this_tile =
1514 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1515 cpi->td.intrabc_used = 0;
1516 cpi->td.deltaq_used = 0;
1517 cpi->td.abs_sum_level = 0;
1518 cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1519 cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1520 cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1521 cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1522 av1_init_rtc_counters(&cpi->td.mb);
1523 cpi->td.mb.palette_pixels = 0;
1524 av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1525 if (!frame_is_intra_only(&cpi->common))
1526 av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1527 cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1528 cpi->intrabc_used |= cpi->td.intrabc_used;
1529 cpi->deltaq_used |= cpi->td.deltaq_used;
1530 }
1531 }
1532
1533 av1_dealloc_mb_data(mb, av1_num_planes(cm));
1534 }
1535
1536 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(const AV1_COMMON * const cm,RefFrameDistanceInfo * const ref_frame_dist_info,const int ref_frame_flags)1537 static inline void set_rel_frame_dist(
1538 const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1539 const int ref_frame_flags) {
1540 MV_REFERENCE_FRAME ref_frame;
1541 int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1542 ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1543 ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1544 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1545 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1546 if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1547 int dist = av1_encoder_get_relative_dist(
1548 cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1549 cm->current_frame.display_order_hint);
1550 ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1551 // Get the nearest ref_frame in the past
1552 if (abs(dist) < min_past_dist && dist < 0) {
1553 ref_frame_dist_info->nearest_past_ref = ref_frame;
1554 min_past_dist = abs(dist);
1555 }
1556 // Get the nearest ref_frame in the future
1557 if (dist < min_future_dist && dist > 0) {
1558 ref_frame_dist_info->nearest_future_ref = ref_frame;
1559 min_future_dist = dist;
1560 }
1561 }
1562 }
1563 }
1564
refs_are_one_sided(const AV1_COMMON * cm)1565 static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1566 assert(!frame_is_intra_only(cm));
1567
1568 int one_sided_refs = 1;
1569 const int cur_display_order_hint = cm->current_frame.display_order_hint;
1570 for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1571 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1572 if (buf == NULL) continue;
1573 if (av1_encoder_get_relative_dist(buf->display_order_hint,
1574 cur_display_order_hint) > 0) {
1575 one_sided_refs = 0; // bwd reference
1576 break;
1577 }
1578 }
1579 return one_sided_refs;
1580 }
1581
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])1582 static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1583 int ref_order_hint[2]) {
1584 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1585 ref_order_hint[0] = ref_order_hint[1] = 0;
1586 if (!skip_mode_info->skip_mode_allowed) return;
1587
1588 const RefCntBuffer *const buf_0 =
1589 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1590 const RefCntBuffer *const buf_1 =
1591 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1592 assert(buf_0 != NULL && buf_1 != NULL);
1593
1594 ref_order_hint[0] = buf_0->order_hint;
1595 ref_order_hint[1] = buf_1->order_hint;
1596 }
1597
check_skip_mode_enabled(AV1_COMP * const cpi)1598 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1599 AV1_COMMON *const cm = &cpi->common;
1600
1601 av1_setup_skip_mode_allowed(cm);
1602 if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1603
1604 // Turn off skip mode if the temporal distances of the reference pair to the
1605 // current frame are different by more than 1 frame.
1606 const int cur_offset = (int)cm->current_frame.order_hint;
1607 int ref_offset[2];
1608 get_skip_mode_ref_offsets(cm, ref_offset);
1609 const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1610 cur_offset, ref_offset[0]);
1611 const int cur_to_ref1 = abs(get_relative_dist(
1612 &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1613 if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1614
1615 // High Latency: Turn off skip mode if all refs are fwd.
1616 if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1617
1618 const int ref_frame[2] = {
1619 cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1620 cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1621 };
1622 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1623 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1624 return 0;
1625
1626 return 1;
1627 }
1628
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)1629 static inline void set_default_interp_skip_flags(
1630 const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1631 const int num_planes = av1_num_planes(cm);
1632 interp_search_flags->default_interp_skip_flags =
1633 (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1634 : INTERP_SKIP_LUMA_SKIP_CHROMA;
1635 }
1636
setup_prune_ref_frame_mask(AV1_COMP * cpi)1637 static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1638 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1639 cpi->sf.inter_sf.disable_onesided_comp) &&
1640 cpi->all_one_sided_refs) {
1641 // Disable all compound references
1642 cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1643 } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1644 cpi->sf.inter_sf.selective_ref_frame >= 2) {
1645 AV1_COMMON *const cm = &cpi->common;
1646 const int cur_frame_display_order_hint =
1647 cm->current_frame.display_order_hint;
1648 unsigned int *ref_display_order_hint =
1649 cm->cur_frame->ref_display_order_hint;
1650 const int arf2_dist = av1_encoder_get_relative_dist(
1651 ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1652 cur_frame_display_order_hint);
1653 const int bwd_dist = av1_encoder_get_relative_dist(
1654 ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1655 cur_frame_display_order_hint);
1656
1657 for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1658 MV_REFERENCE_FRAME rf[2];
1659 av1_set_ref_frame(rf, ref_idx);
1660 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1661 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1662 continue;
1663 }
1664
1665 if (!cpi->all_one_sided_refs) {
1666 int ref_dist[2];
1667 for (int i = 0; i < 2; ++i) {
1668 ref_dist[i] = av1_encoder_get_relative_dist(
1669 ref_display_order_hint[rf[i] - LAST_FRAME],
1670 cur_frame_display_order_hint);
1671 }
1672
1673 // One-sided compound is used only when all reference frames are
1674 // one-sided.
1675 if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1676 cpi->prune_ref_frame_mask |= 1 << ref_idx;
1677 }
1678 }
1679
1680 if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1681 (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1682 (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1683 // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1684 if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1685 // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1686 // reference to the current frame than ALTREF2_FRAME
1687 cpi->prune_ref_frame_mask |= 1 << ref_idx;
1688 }
1689 }
1690 }
1691 }
1692 }
1693
allow_deltaq_mode(AV1_COMP * cpi)1694 static int allow_deltaq_mode(AV1_COMP *cpi) {
1695 #if !CONFIG_REALTIME_ONLY
1696 AV1_COMMON *const cm = &cpi->common;
1697 BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1698 int sbs_wide = mi_size_wide[sb_size];
1699 int sbs_high = mi_size_high[sb_size];
1700
1701 int64_t delta_rdcost = 0;
1702 for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1703 for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1704 int64_t this_delta_rdcost = 0;
1705 av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1706 mi_row, mi_col);
1707 delta_rdcost += this_delta_rdcost;
1708 }
1709 }
1710 return delta_rdcost < 0;
1711 #else
1712 (void)cpi;
1713 return 1;
1714 #endif // !CONFIG_REALTIME_ONLY
1715 }
1716
1717 #define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1718 #define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1719
1720 // Populates block level thresholds for force zeromv-skip decision
populate_thresh_to_force_zeromv_skip(AV1_COMP * cpi)1721 static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1722 if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1723
1724 // Threshold for forcing zeromv-skip decision is as below:
1725 // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1726 // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1727 // allowing slightly higher error for smaller blocks.
1728 // Per Pixel Threshold of 64x64 block Area of 64x64 block 1 1
1729 // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1730 // Per Pixel Threshold of 128x128 block Area of 128x128 block 4 2
1731 // Thus, per pixel thresholds for blocks of size 32x32, 16x16,... can be
1732 // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1733 // small blocks, the same is clipped to 4.
1734 const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1735 const int num_128x128_pix =
1736 block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1737
1738 for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1739 const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1740
1741 // Calculate the threshold for zeromv-skip decision based on area of the
1742 // partition
1743 unsigned int thresh_exit_part_blk =
1744 (unsigned int)(thresh_exit_128x128_part *
1745 sqrt((double)num_block_pix / num_128x128_pix) +
1746 0.5);
1747 thresh_exit_part_blk = AOMMIN(
1748 thresh_exit_part_blk,
1749 (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1750 cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1751 }
1752 }
1753
free_block_hash_buffers(uint32_t * block_hash_values[2][2],int8_t * is_block_same[2][3])1754 static void free_block_hash_buffers(uint32_t *block_hash_values[2][2],
1755 int8_t *is_block_same[2][3]) {
1756 for (int k = 0; k < 2; ++k) {
1757 for (int j = 0; j < 2; ++j) {
1758 aom_free(block_hash_values[k][j]);
1759 }
1760
1761 for (int j = 0; j < 3; ++j) {
1762 aom_free(is_block_same[k][j]);
1763 }
1764 }
1765 }
1766
1767 /*!\brief Determines delta_q_res value for Variance Boost modulation.
1768 */
aom_get_variance_boost_delta_q_res(int qindex)1769 static int aom_get_variance_boost_delta_q_res(int qindex) {
1770 // Signaling delta_q changes across superblocks comes with inherent syntax
1771 // element overhead, which adds up to total payload size. This overhead
1772 // becomes proportionally bigger the higher the base qindex (i.e. lower
1773 // quality, smaller file size), so a balance needs to be struck.
1774 // - Smaller delta_q_res: more granular delta_q control, more bits spent
1775 // signaling deltas.
1776 // - Larger delta_q_res: coarser delta_q control, less bits spent signaling
1777 // deltas.
1778 //
1779 // At the same time, SB qindex fluctuations become larger the higher
1780 // the base qindex (between lowest and highest-variance regions):
1781 // - For QP 5: up to 8 qindexes
1782 // - For QP 60: up to 52 qindexes
1783 //
1784 // With these factors in mind, it was found that the best strategy that
1785 // maximizes quality per bitrate is by having very finely-grained delta_q
1786 // values for the lowest picture qindexes (to preserve tiny qindex SB deltas),
1787 // and progressively making them coarser as base qindex increases (to reduce
1788 // total signaling overhead).
1789 int delta_q_res = 1;
1790
1791 if (qindex >= 160) {
1792 delta_q_res = 8;
1793 } else if (qindex >= 120) {
1794 delta_q_res = 4;
1795 } else if (qindex >= 80) {
1796 delta_q_res = 2;
1797 } else {
1798 delta_q_res = 1;
1799 }
1800
1801 return delta_q_res;
1802 }
1803
1804 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction
1805 * for a single frame
1806 *
1807 * \ingroup high_level_algo
1808 */
encode_frame_internal(AV1_COMP * cpi)1809 static inline void encode_frame_internal(AV1_COMP *cpi) {
1810 ThreadData *const td = &cpi->td;
1811 MACROBLOCK *const x = &td->mb;
1812 AV1_COMMON *const cm = &cpi->common;
1813 CommonModeInfoParams *const mi_params = &cm->mi_params;
1814 FeatureFlags *const features = &cm->features;
1815 MACROBLOCKD *const xd = &x->e_mbd;
1816 RD_COUNTS *const rdc = &cpi->td.rd_counts;
1817 #if CONFIG_FPMT_TEST
1818 FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
1819 FrameProbInfo *const temp_frame_probs_simulation =
1820 &cpi->ppi->temp_frame_probs_simulation;
1821 #endif
1822 FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
1823 IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
1824 MultiThreadInfo *const mt_info = &cpi->mt_info;
1825 AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1826 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1827 const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
1828 int i;
1829
1830 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
1831 mi_params->setup_mi(mi_params);
1832 }
1833
1834 set_mi_offsets(mi_params, xd, 0, 0);
1835
1836 av1_zero(*td->counts);
1837 av1_zero(rdc->tx_type_used);
1838 av1_zero(rdc->obmc_used);
1839 av1_zero(rdc->warped_used);
1840 av1_zero(rdc->seg_tmp_pred_cost);
1841
1842 // Reset the flag.
1843 cpi->intrabc_used = 0;
1844 // Need to disable intrabc when superres is selected
1845 if (av1_superres_scaled(cm)) {
1846 features->allow_intrabc = 0;
1847 }
1848
1849 features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
1850
1851 if (features->allow_warped_motion &&
1852 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1853 const FRAME_UPDATE_TYPE update_type =
1854 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1855 int warped_probability =
1856 #if CONFIG_FPMT_TEST
1857 cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
1858 ? temp_frame_probs->warped_probs[update_type]
1859 :
1860 #endif // CONFIG_FPMT_TEST
1861 frame_probs->warped_probs[update_type];
1862 if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
1863 features->allow_warped_motion = 0;
1864 }
1865
1866 int hash_table_created = 0;
1867 if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
1868 !cpi->sf.rt_sf.use_nonrd_pick_mode) {
1869 // TODO(any): move this outside of the recoding loop to avoid recalculating
1870 // the hash table.
1871 // add to hash table
1872 const int pic_width = cpi->source->y_crop_width;
1873 const int pic_height = cpi->source->y_crop_height;
1874 uint32_t *block_hash_values[2][2] = { { NULL } };
1875 int8_t *is_block_same[2][3] = { { NULL } };
1876 int k, j;
1877 bool error = false;
1878
1879 for (k = 0; k < 2 && !error; ++k) {
1880 for (j = 0; j < 2; ++j) {
1881 block_hash_values[k][j] = (uint32_t *)aom_malloc(
1882 sizeof(*block_hash_values[0][0]) * pic_width * pic_height);
1883 if (!block_hash_values[k][j]) {
1884 error = true;
1885 break;
1886 }
1887 }
1888
1889 for (j = 0; j < 3 && !error; ++j) {
1890 is_block_same[k][j] = (int8_t *)aom_malloc(
1891 sizeof(*is_block_same[0][0]) * pic_width * pic_height);
1892 if (!is_block_same[k][j]) error = true;
1893 }
1894 }
1895
1896 av1_hash_table_init(intrabc_hash_info);
1897 if (error ||
1898 !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
1899 free_block_hash_buffers(block_hash_values, is_block_same);
1900 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1901 "Error allocating intrabc_hash_table and buffers");
1902 }
1903 hash_table_created = 1;
1904 av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
1905 block_hash_values[0], is_block_same[0]);
1906 // Hash data generated for screen contents is used for intraBC ME
1907 const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
1908 const int max_sb_size =
1909 (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
1910 int src_idx = 0;
1911 for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
1912 const int dst_idx = !src_idx;
1913 av1_generate_block_hash_value(
1914 intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
1915 block_hash_values[dst_idx], is_block_same[src_idx],
1916 is_block_same[dst_idx]);
1917 if (size >= min_alloc_size) {
1918 if (!av1_add_to_hash_map_by_row_with_precal_data(
1919 &intrabc_hash_info->intrabc_hash_table,
1920 block_hash_values[dst_idx], is_block_same[dst_idx][2],
1921 pic_width, pic_height, size)) {
1922 error = true;
1923 break;
1924 }
1925 }
1926 }
1927
1928 free_block_hash_buffers(block_hash_values, is_block_same);
1929
1930 if (error) {
1931 aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1932 "Error adding data to intrabc_hash_table");
1933 }
1934 }
1935
1936 const CommonQuantParams *quant_params = &cm->quant_params;
1937 for (i = 0; i < MAX_SEGMENTS; ++i) {
1938 const int qindex =
1939 cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
1940 : quant_params->base_qindex;
1941 xd->lossless[i] =
1942 qindex == 0 && quant_params->y_dc_delta_q == 0 &&
1943 quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
1944 quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
1945 if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
1946 xd->qindex[i] = qindex;
1947 if (xd->lossless[i]) {
1948 cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
1949 } else {
1950 cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
1951 }
1952 }
1953 features->coded_lossless = is_coded_lossless(cm, xd);
1954 features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
1955
1956 // Fix delta q resolution for the moment
1957
1958 cm->delta_q_info.delta_q_res = 0;
1959 if (cpi->use_ducky_encode) {
1960 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
1961 } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ) {
1962 if (deltaq_mode == DELTA_Q_OBJECTIVE)
1963 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
1964 else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
1965 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1966 else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
1967 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1968 else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
1969 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1970 else if (deltaq_mode == DELTA_Q_HDR)
1971 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1972 else if (deltaq_mode == DELTA_Q_VARIANCE_BOOST)
1973 cm->delta_q_info.delta_q_res =
1974 aom_get_variance_boost_delta_q_res(quant_params->base_qindex);
1975 // Set delta_q_present_flag before it is used for the first time
1976 cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
1977 cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
1978
1979 // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
1980 // is used for ineligible frames. That effectively will turn off row_mt
1981 // usage. Note objective delta_q and tpl eligible frames are only altref
1982 // frames currently.
1983 const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1984 if (cm->delta_q_info.delta_q_present_flag) {
1985 if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1986 gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
1987 cm->delta_q_info.delta_q_present_flag = 0;
1988
1989 if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1990 cm->delta_q_info.delta_q_present_flag) {
1991 cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
1992 }
1993 }
1994
1995 // Reset delta_q_used flag
1996 cpi->deltaq_used = 0;
1997
1998 cm->delta_q_info.delta_lf_present_flag =
1999 cm->delta_q_info.delta_q_present_flag &&
2000 oxcf->tool_cfg.enable_deltalf_mode;
2001 cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
2002
2003 // update delta_q_present_flag and delta_lf_present_flag based on
2004 // base_qindex
2005 cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
2006 cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
2007 } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
2008 cpi->svc.number_temporal_layers == 1) {
2009 cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
2010 cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
2011 }
2012 cpi->rc.cnt_zeromv = 0;
2013
2014 av1_frame_init_quantizer(cpi);
2015 init_encode_frame_mb_context(cpi);
2016 set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
2017
2018 if (cm->prev_frame && cm->prev_frame->seg.enabled)
2019 cm->last_frame_seg_map = cm->prev_frame->seg_map;
2020 else
2021 cm->last_frame_seg_map = NULL;
2022 if (features->allow_intrabc || features->coded_lossless) {
2023 av1_set_default_ref_deltas(cm->lf.ref_deltas);
2024 av1_set_default_mode_deltas(cm->lf.mode_deltas);
2025 } else if (cm->prev_frame) {
2026 memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
2027 memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
2028 }
2029 memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
2030 memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
2031
2032 cpi->all_one_sided_refs =
2033 frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
2034
2035 cpi->prune_ref_frame_mask = 0;
2036 // Figure out which ref frames can be skipped at frame level.
2037 setup_prune_ref_frame_mask(cpi);
2038
2039 x->txfm_search_info.txb_split_count = 0;
2040 #if CONFIG_SPEED_STATS
2041 x->txfm_search_info.tx_search_count = 0;
2042 #endif // CONFIG_SPEED_STATS
2043
2044 #if !CONFIG_REALTIME_ONLY
2045 #if CONFIG_COLLECT_COMPONENT_TIMING
2046 start_timing(cpi, av1_compute_global_motion_time);
2047 #endif
2048 av1_compute_global_motion_facade(cpi);
2049 #if CONFIG_COLLECT_COMPONENT_TIMING
2050 end_timing(cpi, av1_compute_global_motion_time);
2051 #endif
2052 #endif // !CONFIG_REALTIME_ONLY
2053
2054 #if CONFIG_COLLECT_COMPONENT_TIMING
2055 start_timing(cpi, av1_setup_motion_field_time);
2056 #endif
2057 av1_calculate_ref_frame_side(cm);
2058 if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2059 #if CONFIG_COLLECT_COMPONENT_TIMING
2060 end_timing(cpi, av1_setup_motion_field_time);
2061 #endif
2062
2063 cm->current_frame.skip_mode_info.skip_mode_flag =
2064 check_skip_mode_enabled(cpi);
2065
2066 // Initialization of skip mode cost depends on the value of
2067 // 'skip_mode_flag'. This initialization happens in the function
2068 // av1_fill_mode_rates(), which is in turn called in
2069 // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2070 // has to be called after 'skip_mode_flag' is initialized.
2071 av1_initialize_rd_consts(cpi);
2072 av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2073 populate_thresh_to_force_zeromv_skip(cpi);
2074
2075 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2076 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2077 mt_info->row_mt_enabled = 0;
2078 mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2079 cm->tiles.cols * cm->tiles.rows) > 1;
2080
2081 if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2082 mt_info->row_mt_enabled = 1;
2083 enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2084 enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2085 av1_encode_tiles_row_mt(cpi);
2086 } else {
2087 if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2088 av1_encode_tiles_mt(cpi);
2089 } else {
2090 // Preallocate the pc_tree for realtime coding to reduce the cost of
2091 // memory allocation.
2092 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2093 if (use_nonrd_mode) {
2094 td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2095 if (!td->pc_root)
2096 aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2097 "Failed to allocate PC_TREE");
2098 } else {
2099 td->pc_root = NULL;
2100 }
2101
2102 encode_tiles(cpi);
2103 av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2104 cpi->sf.part_sf.partition_search_type);
2105 td->pc_root = NULL;
2106 }
2107 }
2108
2109 // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2110 if (features->allow_intrabc && !cpi->intrabc_used) {
2111 features->allow_intrabc = 0;
2112 }
2113 if (features->allow_intrabc) {
2114 cm->delta_q_info.delta_lf_present_flag = 0;
2115 }
2116
2117 if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2118 cm->delta_q_info.delta_q_present_flag = 0;
2119 }
2120
2121 // Set the transform size appropriately before bitstream creation
2122 const MODE_EVAL_TYPE eval_type =
2123 cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2124 ? WINNER_MODE_EVAL
2125 : DEFAULT_EVAL;
2126 const TX_SIZE_SEARCH_METHOD tx_search_type =
2127 cpi->winner_mode_params.tx_size_search_methods[eval_type];
2128 assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2129 features->tx_mode = select_tx_mode(cm, tx_search_type);
2130
2131 // Retain the frame level probability update conditions for parallel frames.
2132 // These conditions will be consumed during postencode stage to update the
2133 // probability.
2134 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2135 cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2136 cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2137 cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2138 (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2139 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2140 cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2141 (features->allow_warped_motion &&
2142 cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2143 cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2144 (cm->current_frame.frame_type != KEY_FRAME &&
2145 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2146 features->interp_filter == SWITCHABLE);
2147 }
2148
2149 if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2150 ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2151 INT_MAX) &&
2152 (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2153 const FRAME_UPDATE_TYPE update_type =
2154 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2155 for (i = 0; i < TX_SIZES_ALL; i++) {
2156 int sum = 0;
2157 int j;
2158 int left = MAX_TX_TYPE_PROB;
2159
2160 for (j = 0; j < TX_TYPES; j++)
2161 sum += cpi->td.rd_counts.tx_type_used[i][j];
2162
2163 for (j = TX_TYPES - 1; j >= 0; j--) {
2164 int update_txtype_frameprobs = 1;
2165 const int new_prob =
2166 sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2167 cpi->td.rd_counts.tx_type_used[i][j] / sum)
2168 : (j ? 0 : MAX_TX_TYPE_PROB);
2169 #if CONFIG_FPMT_TEST
2170 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2171 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2172 0) {
2173 int prob =
2174 (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2175 new_prob) >>
2176 1;
2177 left -= prob;
2178 if (j == 0) prob += left;
2179 temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2180 prob;
2181 // Copy temp_frame_probs_simulation to temp_frame_probs
2182 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2183 update_type_idx++) {
2184 temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2185 temp_frame_probs_simulation
2186 ->tx_type_probs[update_type_idx][i][j];
2187 }
2188 }
2189 update_txtype_frameprobs = 0;
2190 }
2191 #endif // CONFIG_FPMT_TEST
2192 // Track the frame probabilities of parallel encode frames to update
2193 // during postencode stage.
2194 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2195 update_txtype_frameprobs = 0;
2196 cpi->frame_new_probs[cpi->num_frame_recode]
2197 .tx_type_probs[update_type][i][j] = new_prob;
2198 }
2199 if (update_txtype_frameprobs) {
2200 int prob =
2201 (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2202 left -= prob;
2203 if (j == 0) prob += left;
2204 frame_probs->tx_type_probs[update_type][i][j] = prob;
2205 }
2206 }
2207 }
2208 }
2209
2210 if (cm->seg.enabled) {
2211 cm->seg.temporal_update = 1;
2212 if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2213 cm->seg.temporal_update = 0;
2214 }
2215
2216 if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2217 cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2218 const FRAME_UPDATE_TYPE update_type =
2219 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2220
2221 for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2222 int sum = 0;
2223 int update_obmc_frameprobs = 1;
2224 for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2225
2226 const int new_prob =
2227 sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2228 #if CONFIG_FPMT_TEST
2229 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2230 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2231 temp_frame_probs_simulation->obmc_probs[update_type][i] =
2232 (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2233 new_prob) >>
2234 1;
2235 // Copy temp_frame_probs_simulation to temp_frame_probs
2236 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2237 update_type_idx++) {
2238 temp_frame_probs->obmc_probs[update_type_idx][i] =
2239 temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2240 }
2241 }
2242 update_obmc_frameprobs = 0;
2243 }
2244 #endif // CONFIG_FPMT_TEST
2245 // Track the frame probabilities of parallel encode frames to update
2246 // during postencode stage.
2247 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2248 update_obmc_frameprobs = 0;
2249 cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2250 new_prob;
2251 }
2252 if (update_obmc_frameprobs) {
2253 frame_probs->obmc_probs[update_type][i] =
2254 (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2255 }
2256 }
2257 }
2258
2259 if (features->allow_warped_motion &&
2260 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2261 const FRAME_UPDATE_TYPE update_type =
2262 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2263 int update_warp_frameprobs = 1;
2264 int sum = 0;
2265 for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2266 const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2267 #if CONFIG_FPMT_TEST
2268 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2269 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2270 temp_frame_probs_simulation->warped_probs[update_type] =
2271 (temp_frame_probs_simulation->warped_probs[update_type] +
2272 new_prob) >>
2273 1;
2274 // Copy temp_frame_probs_simulation to temp_frame_probs
2275 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2276 update_type_idx++) {
2277 temp_frame_probs->warped_probs[update_type_idx] =
2278 temp_frame_probs_simulation->warped_probs[update_type_idx];
2279 }
2280 }
2281 update_warp_frameprobs = 0;
2282 }
2283 #endif // CONFIG_FPMT_TEST
2284 // Track the frame probabilities of parallel encode frames to update
2285 // during postencode stage.
2286 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2287 update_warp_frameprobs = 0;
2288 cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2289 new_prob;
2290 }
2291 if (update_warp_frameprobs) {
2292 frame_probs->warped_probs[update_type] =
2293 (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2294 }
2295 }
2296
2297 if (cm->current_frame.frame_type != KEY_FRAME &&
2298 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2299 features->interp_filter == SWITCHABLE) {
2300 const FRAME_UPDATE_TYPE update_type =
2301 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2302
2303 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2304 int sum = 0;
2305 int j;
2306 int left = 1536;
2307
2308 for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2309 sum += cpi->td.counts->switchable_interp[i][j];
2310 }
2311
2312 for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2313 int update_interpfilter_frameprobs = 1;
2314 const int new_prob =
2315 sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2316 : (j ? 0 : 1536);
2317 #if CONFIG_FPMT_TEST
2318 if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2319 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2320 0) {
2321 int prob = (temp_frame_probs_simulation
2322 ->switchable_interp_probs[update_type][i][j] +
2323 new_prob) >>
2324 1;
2325 left -= prob;
2326 if (j == 0) prob += left;
2327 temp_frame_probs_simulation
2328 ->switchable_interp_probs[update_type][i][j] = prob;
2329 // Copy temp_frame_probs_simulation to temp_frame_probs
2330 for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2331 update_type_idx++) {
2332 temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2333 temp_frame_probs_simulation
2334 ->switchable_interp_probs[update_type_idx][i][j];
2335 }
2336 }
2337 update_interpfilter_frameprobs = 0;
2338 }
2339 #endif // CONFIG_FPMT_TEST
2340 // Track the frame probabilities of parallel encode frames to update
2341 // during postencode stage.
2342 if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2343 update_interpfilter_frameprobs = 0;
2344 cpi->frame_new_probs[cpi->num_frame_recode]
2345 .switchable_interp_probs[update_type][i][j] = new_prob;
2346 }
2347 if (update_interpfilter_frameprobs) {
2348 int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2349 new_prob) >>
2350 1;
2351 left -= prob;
2352 if (j == 0) prob += left;
2353 frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2354 }
2355 }
2356 }
2357 }
2358 if (hash_table_created) {
2359 av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2360 }
2361 }
2362
2363 /*!\brief Setup reference frame buffers and encode a frame
2364 *
2365 * \ingroup high_level_algo
2366 * \callgraph
2367 * \callergraph
2368 *
2369 * \param[in] cpi Top-level encoder structure
2370 */
av1_encode_frame(AV1_COMP * cpi)2371 void av1_encode_frame(AV1_COMP *cpi) {
2372 AV1_COMMON *const cm = &cpi->common;
2373 CurrentFrame *const current_frame = &cm->current_frame;
2374 FeatureFlags *const features = &cm->features;
2375 RD_COUNTS *const rdc = &cpi->td.rd_counts;
2376 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2377 // Indicates whether or not to use a default reduced set for ext-tx
2378 // rather than the potential full set of 16 transforms
2379 features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2380
2381 // Make sure segment_id is no larger than last_active_segid.
2382 if (cm->seg.enabled && cm->seg.update_map) {
2383 const int mi_rows = cm->mi_params.mi_rows;
2384 const int mi_cols = cm->mi_params.mi_cols;
2385 const int last_active_segid = cm->seg.last_active_segid;
2386 uint8_t *map = cpi->enc_seg.map;
2387 for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2388 for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2389 map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2390 }
2391 map += mi_cols;
2392 }
2393 }
2394
2395 av1_setup_frame_buf_refs(cm);
2396 enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2397 cm->cur_frame->ref_display_order_hint,
2398 cm->current_frame.display_order_hint);
2399 set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2400 cpi->ref_frame_flags);
2401 av1_setup_frame_sign_bias(cm);
2402
2403 // If global motion is enabled, then every buffer which is used as either
2404 // a source or a ref frame should have an image pyramid allocated.
2405 // Check here so that issues can be caught early in debug mode
2406 #if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2407 if (cpi->alloc_pyramid) {
2408 assert(cpi->source->y_pyramid);
2409 for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2410 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2411 if (buf != NULL) {
2412 assert(buf->buf.y_pyramid);
2413 }
2414 }
2415 }
2416 #endif // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2417
2418 #if CONFIG_MISMATCH_DEBUG
2419 mismatch_reset_frame(av1_num_planes(cm));
2420 #endif
2421
2422 rdc->newmv_or_intra_blocks = 0;
2423 cpi->palette_pixel_num = 0;
2424
2425 if (cpi->sf.hl_sf.frame_parameter_update ||
2426 cpi->sf.rt_sf.use_comp_ref_nonrd) {
2427 if (frame_is_intra_only(cm))
2428 current_frame->reference_mode = SINGLE_REFERENCE;
2429 else
2430 current_frame->reference_mode = REFERENCE_MODE_SELECT;
2431
2432 features->interp_filter = SWITCHABLE;
2433 if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2434
2435 features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2436 features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2437
2438 rdc->compound_ref_used_flag = 0;
2439 rdc->skip_mode_used_flag = 0;
2440
2441 encode_frame_internal(cpi);
2442
2443 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2444 // Use a flag that includes 4x4 blocks
2445 if (rdc->compound_ref_used_flag == 0) {
2446 current_frame->reference_mode = SINGLE_REFERENCE;
2447 #if CONFIG_ENTROPY_STATS
2448 av1_zero(cpi->td.counts->comp_inter);
2449 #endif // CONFIG_ENTROPY_STATS
2450 }
2451 }
2452 // Re-check on the skip mode status as reference mode may have been
2453 // changed.
2454 SkipModeInfo *const skip_mode_info = ¤t_frame->skip_mode_info;
2455 if (frame_is_intra_only(cm) ||
2456 current_frame->reference_mode == SINGLE_REFERENCE) {
2457 skip_mode_info->skip_mode_allowed = 0;
2458 skip_mode_info->skip_mode_flag = 0;
2459 }
2460 if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2461 skip_mode_info->skip_mode_flag = 0;
2462
2463 if (!cm->tiles.large_scale) {
2464 if (features->tx_mode == TX_MODE_SELECT &&
2465 cpi->td.mb.txfm_search_info.txb_split_count == 0)
2466 features->tx_mode = TX_MODE_LARGEST;
2467 }
2468 } else {
2469 // This is needed if real-time speed setting is changed on the fly
2470 // from one using compound prediction to one using single reference.
2471 if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2472 current_frame->reference_mode = SINGLE_REFERENCE;
2473 encode_frame_internal(cpi);
2474 }
2475 }
2476