• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17 
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21 
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_util/aom_pthread.h"
27 #if CONFIG_MISMATCH_DEBUG
28 #include "aom_util/debug_util.h"
29 #endif  // CONFIG_MISMATCH_DEBUG
30 
31 #include "av1/common/cfl.h"
32 #include "av1/common/common.h"
33 #include "av1/common/common_data.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46 
47 #include "av1/encoder/allintra_vis.h"
48 #include "av1/encoder/aq_complexity.h"
49 #include "av1/encoder/aq_cyclicrefresh.h"
50 #include "av1/encoder/aq_variance.h"
51 #include "av1/encoder/global_motion_facade.h"
52 #include "av1/encoder/encodeframe.h"
53 #include "av1/encoder/encodeframe_utils.h"
54 #include "av1/encoder/encodemb.h"
55 #include "av1/encoder/encodemv.h"
56 #include "av1/encoder/encodetxb.h"
57 #include "av1/encoder/ethread.h"
58 #include "av1/encoder/extend.h"
59 #include "av1/encoder/intra_mode_search_utils.h"
60 #include "av1/encoder/ml.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/partition_strategy.h"
63 #if !CONFIG_REALTIME_ONLY
64 #include "av1/encoder/partition_model_weights.h"
65 #endif
66 #include "av1/encoder/partition_search.h"
67 #include "av1/encoder/rd.h"
68 #include "av1/encoder/rdopt.h"
69 #include "av1/encoder/reconinter_enc.h"
70 #include "av1/encoder/segmentation.h"
71 #include "av1/encoder/tokenize.h"
72 #include "av1/encoder/tpl_model.h"
73 #include "av1/encoder/var_based_part.h"
74 
75 #if CONFIG_TUNE_VMAF
76 #include "av1/encoder/tune_vmaf.h"
77 #endif
78 
79 /*!\cond */
80 // This is used as a reference when computing the source variance for the
81 //  purposes of activity masking.
82 // Eventually this should be replaced by custom no-reference routines,
83 //  which will be faster.
84 static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
85   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93   128, 128, 128, 128, 128, 128, 128, 128
94 };
95 
96 #if CONFIG_AV1_HIGHBITDEPTH
97 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
98   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106   128, 128, 128, 128, 128, 128, 128, 128
107 };
108 
109 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
110   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
126 };
127 
128 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
129   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147   128 * 16, 128 * 16
148 };
149 #endif  // CONFIG_AV1_HIGHBITDEPTH
150 /*!\endcond */
151 
152 // For the given bit depth, returns a constant array used to assist the
153 // calculation of source block variance, which will then be used to decide
154 // adaptive quantizers.
get_var_offs(int use_hbd,int bd)155 static const uint8_t *get_var_offs(int use_hbd, int bd) {
156 #if CONFIG_AV1_HIGHBITDEPTH
157   if (use_hbd) {
158     assert(bd == 8 || bd == 10 || bd == 12);
159     const int off_index = (bd - 8) >> 1;
160     static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
161                                                 AV1_HIGH_VAR_OFFS_10,
162                                                 AV1_HIGH_VAR_OFFS_12 };
163     return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
164   }
165 #else
166   (void)use_hbd;
167   (void)bd;
168   assert(!use_hbd);
169 #endif
170   assert(bd == 8);
171   return AV1_VAR_OFFS;
172 }
173 
av1_init_rtc_counters(MACROBLOCK * const x)174 void av1_init_rtc_counters(MACROBLOCK *const x) {
175   av1_init_cyclic_refresh_counters(x);
176   x->cnt_zeromv = 0;
177 }
178 
av1_accumulate_rtc_counters(AV1_COMP * cpi,const MACROBLOCK * const x)179 void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
180   if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
181     av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
182   cpi->rc.cnt_zeromv += x->cnt_zeromv;
183 }
184 
av1_get_perpixel_variance(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane,int use_hbd)185 unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
186                                        const MACROBLOCKD *xd,
187                                        const struct buf_2d *ref,
188                                        BLOCK_SIZE bsize, int plane,
189                                        int use_hbd) {
190   const int subsampling_x = xd->plane[plane].subsampling_x;
191   const int subsampling_y = xd->plane[plane].subsampling_y;
192   const BLOCK_SIZE plane_bsize =
193       get_plane_block_size(bsize, subsampling_x, subsampling_y);
194   unsigned int sse;
195   const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
196       ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
197   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
198 }
199 
av1_get_perpixel_variance_facade(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane)200 unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
201                                               const MACROBLOCKD *xd,
202                                               const struct buf_2d *ref,
203                                               BLOCK_SIZE bsize, int plane) {
204   const int use_hbd = is_cur_buf_hbd(xd);
205   return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
206 }
207 
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)208 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
209                           int mi_row, int mi_col, const int num_planes,
210                           BLOCK_SIZE bsize) {
211   // Set current frame pointer.
212   x->e_mbd.cur_buf = src;
213 
214   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
215   // the static analysis warnings.
216   for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
217     const int is_uv = i > 0;
218     setup_pred_plane(
219         &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
220         src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
221         x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
222   }
223 }
224 
225 #if !CONFIG_REALTIME_ONLY
226 /*!\brief Assigns different quantization parameters to each super
227  * block based on its TPL weight.
228  *
229  * \ingroup tpl_modelling
230  *
231  * \param[in]     cpi         Top level encoder instance structure
232  * \param[in,out] td          Thread data structure
233  * \param[in,out] x           Macro block level data for this block.
234  * \param[in]     tile_info   Tile infromation / identification
235  * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
236  * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
237  * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
238  *
239  * \remark No return value but updates macroblock and thread data
240  * related to the q / q delta to be used.
241  */
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)242 static inline void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
243                                  MACROBLOCK *const x,
244                                  const TileInfo *const tile_info, int mi_row,
245                                  int mi_col, int num_planes) {
246   AV1_COMMON *const cm = &cpi->common;
247   const CommonModeInfoParams *const mi_params = &cm->mi_params;
248   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
249   assert(delta_q_info->delta_q_present_flag);
250 
251   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
252   // Delta-q modulation based on variance
253   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254 
255   const int delta_q_res = delta_q_info->delta_q_res;
256   int current_qindex = cm->quant_params.base_qindex;
257   if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
258                                    DUCKY_ENCODE_FRAME_MODE_QINDEX) {
259     const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
260     const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
261     const int sb_cols =
262         CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
263     const int sb_index = sb_row * sb_cols + sb_col;
264     current_qindex =
265         cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
266   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
267     if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
268       const int block_wavelet_energy_level =
269           av1_block_wavelet_energy_level(cpi, x, sb_size);
270       x->sb_energy_level = block_wavelet_energy_level;
271       current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
272           cpi, block_wavelet_energy_level);
273     } else {
274       const int block_var_level = av1_log_block_var(cpi, x, sb_size);
275       x->sb_energy_level = block_var_level;
276       current_qindex =
277           av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
278     }
279   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
280              cpi->oxcf.algo_cfg.enable_tpl_model) {
281     // Setup deltaq based on tpl stats
282     current_qindex =
283         av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
284   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
285     current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
286   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
287     current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
288   } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
289     current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
290   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_VARIANCE_BOOST) {
291     current_qindex = av1_get_sbq_variance_boost(cpi, x);
292   }
293 
294   x->rdmult_cur_qindex = current_qindex;
295   MACROBLOCKD *const xd = &x->e_mbd;
296   const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
297       delta_q_res, xd->current_base_qindex, current_qindex);
298   if (cpi->use_ducky_encode) {
299     assert(adjusted_qindex == current_qindex);
300   }
301   current_qindex = adjusted_qindex;
302 
303   x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
304   x->rdmult_delta_qindex = x->delta_qindex;
305 
306   av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
307   xd->mi[0]->current_qindex = current_qindex;
308   av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
309 
310   // keep track of any non-zero delta-q used
311   td->deltaq_used |= (x->delta_qindex != 0);
312 
313   if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
314     const int delta_lf_res = delta_q_info->delta_lf_res;
315     const int lfmask = ~(delta_lf_res - 1);
316     const int delta_lf_from_base =
317         ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
318     const int8_t delta_lf =
319         (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
320     const int frame_lf_count =
321         av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
322     const int mib_size = cm->seq_params->mib_size;
323 
324     // pre-set the delta lf for loop filter. Note that this value is set
325     // before mi is assigned for each block in current superblock
326     for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
327       for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
328         const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
329         mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
330         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
331           mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
332         }
333       }
334     }
335   }
336 }
337 
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)338 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
339                                  int mi_col) {
340   const AV1_COMMON *cm = &cpi->common;
341   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
342   const CommonModeInfoParams *const mi_params = &cm->mi_params;
343   MACROBLOCK *x = &td->mb;
344   const int frame_idx = cpi->gf_frame_index;
345   TplParams *const tpl_data = &cpi->ppi->tpl_data;
346   const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
347 
348   av1_zero(x->tpl_keep_ref_frame);
349 
350   if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
351   if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
352   if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
353 
354   const int is_overlay =
355       cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
356   if (is_overlay) {
357     memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
358     return;
359   }
360 
361   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
362   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
363   const int tpl_stride = tpl_frame->stride;
364   int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
365   const int step = 1 << block_mis_log2;
366   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
367 
368   const int mi_row_end =
369       AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
370   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
371   const int mi_col_sr =
372       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
373   const int mi_col_end_sr =
374       AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
375                                   cm->superres_scale_denominator),
376              mi_cols_sr);
377   const int row_step = step;
378   const int col_step_sr =
379       coded_to_superres_mi(step, cm->superres_scale_denominator);
380   for (int row = mi_row; row < mi_row_end; row += row_step) {
381     for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
382       const TplDepStats *this_stats =
383           &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
384       int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
385       // Find the winner ref frame idx for the current block
386       int64_t best_inter_cost = this_stats->pred_error[0];
387       int best_rf_idx = 0;
388       for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
389         if ((this_stats->pred_error[idx] < best_inter_cost) &&
390             (this_stats->pred_error[idx] != 0)) {
391           best_inter_cost = this_stats->pred_error[idx];
392           best_rf_idx = idx;
393         }
394       }
395       // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
396       // LAST_FRAME.
397       tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
398                                     this_stats->pred_error[LAST_FRAME - 1];
399 
400       for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
401         inter_cost[rf_idx] += tpl_pred_error[rf_idx];
402     }
403   }
404 
405   int rank_index[INTER_REFS_PER_FRAME - 1];
406   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
407     rank_index[idx] = idx + 1;
408     for (int i = idx; i > 0; --i) {
409       if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
410         const int tmp = rank_index[i - 1];
411         rank_index[i - 1] = rank_index[i];
412         rank_index[i] = tmp;
413       }
414     }
415   }
416 
417   x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
418   x->tpl_keep_ref_frame[LAST_FRAME] = 1;
419 
420   int cutoff_ref = 0;
421   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
422     x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
423     if (idx > 2) {
424       if (!cutoff_ref) {
425         // If the predictive coding gains are smaller than the previous more
426         // relevant frame over certain amount, discard this frame and all the
427         // frames afterwards.
428         if (llabs(inter_cost[rank_index[idx]]) <
429                 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
430             inter_cost[rank_index[idx]] == 0)
431           cutoff_ref = 1;
432       }
433 
434       if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
435     }
436   }
437 }
438 
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)439 static inline void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
440                                            int mi_row, int mi_col) {
441   const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
442   const int orig_rdmult = cpi->rd.RDMULT;
443 
444   assert(IMPLIES(cpi->ppi->gf_group.size > 0,
445                  cpi->gf_frame_index < cpi->ppi->gf_group.size));
446   const int gf_group_index = cpi->gf_frame_index;
447   if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
448       cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
449       cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
450     const int dr =
451         av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
452     x->rdmult = dr;
453   }
454 }
455 #endif  // !CONFIG_REALTIME_ONLY
456 
457 #if CONFIG_RT_ML_PARTITIONING
458 // Get a prediction(stored in x->est_pred) for the whole superblock.
get_estimated_pred(AV1_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)459 static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
460                                MACROBLOCK *x, int mi_row, int mi_col) {
461   AV1_COMMON *const cm = &cpi->common;
462   const int is_key_frame = frame_is_intra_only(cm);
463   MACROBLOCKD *xd = &x->e_mbd;
464 
465   // TODO(kyslov) Extend to 128x128
466   assert(cm->seq_params->sb_size == BLOCK_64X64);
467 
468   av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
469 
470   if (!is_key_frame) {
471     MB_MODE_INFO *mi = xd->mi[0];
472     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
473 
474     assert(yv12 != NULL);
475 
476     av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
477                          get_ref_scale_factors(cm, LAST_FRAME), 1);
478     mi->ref_frame[0] = LAST_FRAME;
479     mi->ref_frame[1] = NONE;
480     mi->bsize = BLOCK_64X64;
481     mi->mv[0].as_int = 0;
482     mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
483 
484     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
485 
486     xd->plane[0].dst.buf = x->est_pred;
487     xd->plane[0].dst.stride = 64;
488     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
489   } else {
490 #if CONFIG_AV1_HIGHBITDEPTH
491     switch (xd->bd) {
492       case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
493       case 10:
494         memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
495         break;
496       case 12:
497         memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
498         break;
499     }
500 #else
501     memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
502 #endif  // CONFIG_VP9_HIGHBITDEPTH
503   }
504 }
505 #endif  // CONFIG_RT_ML_PARTITIONING
506 
507 #define AVG_CDF_WEIGHT_LEFT 3
508 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
509 
510 /*!\brief Encode a superblock (minimal RD search involved)
511  *
512  * \ingroup partition_search
513  * Encodes the superblock by a pre-determined partition pattern, only minor
514  * rd-based searches are allowed to adjust the initial pattern. It is only used
515  * by realtime encoding.
516  */
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)517 static inline void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
518                                    TileDataEnc *tile_data, TokenExtra **tp,
519                                    const int mi_row, const int mi_col,
520                                    const int seg_skip) {
521   AV1_COMMON *const cm = &cpi->common;
522   MACROBLOCK *const x = &td->mb;
523   const SPEED_FEATURES *const sf = &cpi->sf;
524   const TileInfo *const tile_info = &tile_data->tile_info;
525   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
526                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
527   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
528   PC_TREE *const pc_root = td->pc_root;
529 
530 #if CONFIG_RT_ML_PARTITIONING
531   if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
532     RD_STATS dummy_rdc;
533     get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
534     av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
535                              BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
536     return;
537   }
538 #endif
539   // Set the partition
540   if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
541       (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
542        (!frame_is_intra_only(cm) &&
543         (!cpi->ppi->use_svc ||
544          !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
545     // set a fixed-size partition
546     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
547     BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
548     if (sf->rt_sf.use_fast_fixed_part &&
549         x->content_state_sb.source_sad_nonrd < kLowSad) {
550       bsize_select = cm->seq_params->sb_size;
551     }
552     if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
553         cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
554       bsize_select = cm->seq_params->sb_size;
555       x->force_zeromv_skip_for_sb = 1;
556     }
557     const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
558     if (x->content_state_sb.source_sad_nonrd > kZeroSad)
559       x->force_color_check_block_level = 1;
560     av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
561   } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
562     // set a variance-based partition
563     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
564     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
565   }
566   assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
567          sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
568   set_cb_offsets(td->mb.cb_offset, 0, 0);
569 
570   // Initialize the flag to skip cdef to 1.
571   if (sf->rt_sf.skip_cdef_sb) {
572     const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
573     // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
574     // "blocks".
575     for (int r = 0; r < block64_in_sb; ++r) {
576       for (int c = 0; c < block64_in_sb; ++c) {
577         const int idx_in_sb =
578             r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
579         if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
580       }
581     }
582   }
583 
584 #if CONFIG_COLLECT_COMPONENT_TIMING
585   start_timing(cpi, nonrd_use_partition_time);
586 #endif
587   av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
588                           pc_root);
589 #if CONFIG_COLLECT_COMPONENT_TIMING
590   end_timing(cpi, nonrd_use_partition_time);
591 #endif
592 }
593 
594 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)595 static inline void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
596                                      const TileDataEnc *tile_data,
597                                      SIMPLE_MOTION_DATA_TREE *sms_root,
598                                      RD_STATS *rd_cost, int mi_row, int mi_col,
599                                      int gather_tpl_data) {
600   const AV1_COMMON *cm = &cpi->common;
601   const TileInfo *tile_info = &tile_data->tile_info;
602   MACROBLOCK *x = &td->mb;
603 
604   const SPEED_FEATURES *sf = &cpi->sf;
605   const int use_simple_motion_search =
606       (sf->part_sf.simple_motion_search_split ||
607        sf->part_sf.simple_motion_search_prune_rect ||
608        sf->part_sf.simple_motion_search_early_term_none ||
609        sf->part_sf.ml_early_term_after_part_split_level) &&
610       !frame_is_intra_only(cm);
611   if (use_simple_motion_search) {
612     av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
613                                              mi_row, mi_col);
614   }
615 
616 #if !CONFIG_REALTIME_ONLY
617   if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
618         cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
619     init_ref_frame_space(cpi, td, mi_row, mi_col);
620     x->sb_energy_level = 0;
621     x->part_search_info.cnn_output_valid = 0;
622     if (gather_tpl_data) {
623       if (cm->delta_q_info.delta_q_present_flag) {
624         const int num_planes = av1_num_planes(cm);
625         const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
626         setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
627         av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
628       }
629 
630       // TODO(jingning): revisit this function.
631       if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
632         adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
633       }
634     }
635   }
636 #else
637   (void)tile_info;
638   (void)mi_row;
639   (void)mi_col;
640   (void)gather_tpl_data;
641 #endif
642 
643   x->reuse_inter_pred = false;
644   x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
645   reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
646   av1_zero(x->picked_ref_frames_mask);
647   av1_invalid_rd_stats(rd_cost);
648 }
649 
650 #if !CONFIG_REALTIME_ONLY
sb_qp_sweep_init_quantizers(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_tree,RD_STATS * rd_cost,int mi_row,int mi_col,int delta_qp_ofs)651 static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
652                                         const TileDataEnc *tile_data,
653                                         SIMPLE_MOTION_DATA_TREE *sms_tree,
654                                         RD_STATS *rd_cost, int mi_row,
655                                         int mi_col, int delta_qp_ofs) {
656   AV1_COMMON *const cm = &cpi->common;
657   MACROBLOCK *const x = &td->mb;
658   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
659   const TileInfo *tile_info = &tile_data->tile_info;
660   const CommonModeInfoParams *const mi_params = &cm->mi_params;
661   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
662   assert(delta_q_info->delta_q_present_flag);
663   const int delta_q_res = delta_q_info->delta_q_res;
664 
665   const SPEED_FEATURES *sf = &cpi->sf;
666   const int use_simple_motion_search =
667       (sf->part_sf.simple_motion_search_split ||
668        sf->part_sf.simple_motion_search_prune_rect ||
669        sf->part_sf.simple_motion_search_early_term_none ||
670        sf->part_sf.ml_early_term_after_part_split_level) &&
671       !frame_is_intra_only(cm);
672   if (use_simple_motion_search) {
673     av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
674                                              mi_row, mi_col);
675   }
676 
677   int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
678 
679   MACROBLOCKD *const xd = &x->e_mbd;
680   current_qindex = av1_adjust_q_from_delta_q_res(
681       delta_q_res, xd->current_base_qindex, current_qindex);
682 
683   x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
684 
685   av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
686   xd->mi[0]->current_qindex = current_qindex;
687   av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
688 
689   // keep track of any non-zero delta-q used
690   td->deltaq_used |= (x->delta_qindex != 0);
691 
692   if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
693     const int delta_lf_res = delta_q_info->delta_lf_res;
694     const int lfmask = ~(delta_lf_res - 1);
695     const int delta_lf_from_base =
696         ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
697     const int8_t delta_lf =
698         (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
699     const int frame_lf_count =
700         av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
701     const int mib_size = cm->seq_params->mib_size;
702 
703     // pre-set the delta lf for loop filter. Note that this value is set
704     // before mi is assigned for each block in current superblock
705     for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
706       for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
707         const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
708         mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
709         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
710           mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
711         }
712       }
713     }
714   }
715 
716   x->reuse_inter_pred = false;
717   x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
718   reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
719   av1_zero(x->picked_ref_frames_mask);
720   av1_invalid_rd_stats(rd_cost);
721 }
722 
sb_qp_sweep(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,SIMPLE_MOTION_DATA_TREE * sms_tree,SB_FIRST_PASS_STATS * sb_org_stats)723 static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
724                        TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
725                        int mi_col, BLOCK_SIZE bsize,
726                        SIMPLE_MOTION_DATA_TREE *sms_tree,
727                        SB_FIRST_PASS_STATS *sb_org_stats) {
728   AV1_COMMON *const cm = &cpi->common;
729   MACROBLOCK *const x = &td->mb;
730   RD_STATS rdc_winner, cur_rdc;
731   av1_invalid_rd_stats(&rdc_winner);
732 
733   int best_qindex = td->mb.rdmult_delta_qindex;
734   const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
735   const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
736   const int step = cm->delta_q_info.delta_q_res;
737 
738   for (int sweep_qp_delta = start; sweep_qp_delta <= end;
739        sweep_qp_delta += step) {
740     sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
741                                 mi_col, sweep_qp_delta);
742 
743     const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
744     const int backup_current_qindex =
745         cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
746 
747     av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
748     av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
749     cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
750 
751     td->pc_root = av1_alloc_pc_tree_node(bsize);
752     if (!td->pc_root)
753       aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
754                          "Failed to allocate PC_TREE");
755     av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
756                           &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
757                           SB_DRY_PASS, NULL);
758 
759     if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
760         (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
761          rdc_winner.rdcost == cur_rdc.rdcost)) {
762       rdc_winner = cur_rdc;
763       best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
764     }
765   }
766 
767   return best_qindex;
768 }
769 #endif  //! CONFIG_REALTIME_ONLY
770 
771 /*!\brief Encode a superblock (RD-search-based)
772  *
773  * \ingroup partition_search
774  * Conducts partition search for a superblock, based on rate-distortion costs,
775  * from scratch or adjusting from a pre-calculated partition pattern.
776  */
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)777 static inline void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
778                                 TileDataEnc *tile_data, TokenExtra **tp,
779                                 const int mi_row, const int mi_col,
780                                 const int seg_skip) {
781   AV1_COMMON *const cm = &cpi->common;
782   MACROBLOCK *const x = &td->mb;
783   MACROBLOCKD *const xd = &x->e_mbd;
784   const SPEED_FEATURES *const sf = &cpi->sf;
785   const TileInfo *const tile_info = &tile_data->tile_info;
786   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
787                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
788   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
789   const int num_planes = av1_num_planes(cm);
790   int dummy_rate;
791   int64_t dummy_dist;
792   RD_STATS dummy_rdc;
793   SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
794 
795 #if CONFIG_REALTIME_ONLY
796   (void)seg_skip;
797 #endif  // CONFIG_REALTIME_ONLY
798 
799   init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
800                     1);
801 
802   // Encode the superblock
803   if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
804     // partition search starting from a variance-based partition
805     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
806     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
807 
808 #if CONFIG_COLLECT_COMPONENT_TIMING
809     start_timing(cpi, rd_use_partition_time);
810 #endif
811     td->pc_root = av1_alloc_pc_tree_node(sb_size);
812     if (!td->pc_root)
813       aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
814                          "Failed to allocate PC_TREE");
815     av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
816                          &dummy_rate, &dummy_dist, 1, td->pc_root);
817     av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
818                                sf->part_sf.partition_search_type);
819     td->pc_root = NULL;
820 #if CONFIG_COLLECT_COMPONENT_TIMING
821     end_timing(cpi, rd_use_partition_time);
822 #endif
823   }
824 #if !CONFIG_REALTIME_ONLY
825   else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
826     // partition search by adjusting a fixed-size partition
827     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
828     const BLOCK_SIZE bsize =
829         seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
830     av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
831     td->pc_root = av1_alloc_pc_tree_node(sb_size);
832     if (!td->pc_root)
833       aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
834                          "Failed to allocate PC_TREE");
835     av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
836                          &dummy_rate, &dummy_dist, 1, td->pc_root);
837     av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
838                                sf->part_sf.partition_search_type);
839     td->pc_root = NULL;
840   } else {
841     // The most exhaustive recursive partition search
842     SuperBlockEnc *sb_enc = &x->sb_enc;
843     // No stats for overlay frames. Exclude key frame.
844     av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
845 
846     // Reset the tree for simple motion search data
847     av1_reset_simple_motion_tree_partition(sms_root, sb_size);
848 
849 #if CONFIG_COLLECT_COMPONENT_TIMING
850     start_timing(cpi, rd_pick_partition_time);
851 #endif
852 
853     // Estimate the maximum square partition block size, which will be used
854     // as the starting block size for partitioning the sb
855     set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
856 
857     // The superblock can be searched only once, or twice consecutively for
858     // better quality. Note that the meaning of passes here is different from
859     // the general concept of 1-pass/2-pass encoders.
860     const int num_passes =
861         cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
862 
863     if (cpi->oxcf.sb_qp_sweep &&
864         !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
865           cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
866         cm->delta_q_info.delta_q_present_flag) {
867       AOM_CHECK_MEM_ERROR(
868           x->e_mbd.error_info, td->mb.sb_stats_cache,
869           (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
870       av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
871                           mi_col);
872       assert(x->rdmult_delta_qindex == x->delta_qindex);
873 
874       const int best_qp_diff =
875           sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
876                       td->mb.sb_stats_cache) -
877           x->rdmult_delta_qindex;
878 
879       sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
880                                   mi_row, mi_col, best_qp_diff);
881 
882       const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
883       const int backup_current_qindex =
884           cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
885 
886       av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
887       av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
888                            mi_col);
889 
890       cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
891           backup_current_qindex;
892       aom_free(td->mb.sb_stats_cache);
893       td->mb.sb_stats_cache = NULL;
894     }
895     if (num_passes == 1) {
896 #if CONFIG_PARTITION_SEARCH_ORDER
897       if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
898         av1_reset_part_sf(&cpi->sf.part_sf);
899         av1_reset_sf_for_ext_part(cpi);
900         RD_STATS this_rdc;
901         av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
902                                 mi_col, sb_size, &this_rdc);
903       } else {
904         td->pc_root = av1_alloc_pc_tree_node(sb_size);
905         if (!td->pc_root)
906           aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
907                              "Failed to allocate PC_TREE");
908         av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
909                               &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
910                               NULL, SB_SINGLE_PASS, NULL);
911       }
912 #else
913       td->pc_root = av1_alloc_pc_tree_node(sb_size);
914       if (!td->pc_root)
915         aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
916                            "Failed to allocate PC_TREE");
917       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
918                             &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
919                             SB_SINGLE_PASS, NULL);
920 #endif  // CONFIG_PARTITION_SEARCH_ORDER
921     } else {
922       // First pass
923       AOM_CHECK_MEM_ERROR(
924           x->e_mbd.error_info, td->mb.sb_fp_stats,
925           (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
926       av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
927                           mi_col);
928       td->pc_root = av1_alloc_pc_tree_node(sb_size);
929       if (!td->pc_root)
930         aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
931                            "Failed to allocate PC_TREE");
932       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
933                             &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
934                             SB_DRY_PASS, NULL);
935 
936       // Second pass
937       init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
938                         mi_col, 0);
939       av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
940       av1_reset_simple_motion_tree_partition(sms_root, sb_size);
941 
942       av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
943                            mi_col);
944 
945       td->pc_root = av1_alloc_pc_tree_node(sb_size);
946       if (!td->pc_root)
947         aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
948                            "Failed to allocate PC_TREE");
949       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
950                             &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
951                             SB_WET_PASS, NULL);
952       aom_free(td->mb.sb_fp_stats);
953       td->mb.sb_fp_stats = NULL;
954     }
955 
956     // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
957     sb_enc->tpl_data_count = 0;
958 #if CONFIG_COLLECT_COMPONENT_TIMING
959     end_timing(cpi, rd_pick_partition_time);
960 #endif
961   }
962 #endif  // !CONFIG_REALTIME_ONLY
963 
964   // Update the inter rd model
965   // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
966   if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
967       cm->tiles.cols == 1 && cm->tiles.rows == 1) {
968     av1_inter_mode_data_fit(tile_data, x->rdmult);
969   }
970 }
971 
972 // Check if the cost update of symbols mode, coeff and dv are tile or off.
is_mode_coeff_dv_upd_freq_tile_or_off(const AV1_COMP * const cpi)973 static inline int is_mode_coeff_dv_upd_freq_tile_or_off(
974     const AV1_COMP *const cpi) {
975   const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
976 
977   return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
978           inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
979           cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
980 }
981 
982 // When row-mt is enabled and cost update frequencies are set to off/tile,
983 // processing of current SB can start even before processing of top-right SB
984 // is finished. This function checks if it is sufficient to wait for top SB
985 // to finish processing before current SB starts processing.
delay_wait_for_top_right_sb(const AV1_COMP * const cpi)986 static inline int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
987   const MODE mode = cpi->oxcf.mode;
988   if (mode == GOOD) return 0;
989 
990   if (mode == ALLINTRA)
991     return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
992   else if (mode == REALTIME)
993     return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
994             cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
995   else
996     return 0;
997 }
998 
999 /*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
1000  *
1001  * \ingroup partition_search
1002  * \callgraph
1003  * \callergraph
1004  */
get_sb_source_sad(const AV1_COMP * cpi,int mi_row,int mi_col)1005 static inline uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
1006                                          int mi_col) {
1007   if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
1008 
1009   const AV1_COMMON *const cm = &cpi->common;
1010   const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1011                                    ? (cm->seq_params->mib_size >> 1)
1012                                    : cm->seq_params->mib_size;
1013   const int num_blk_64x64_cols =
1014       (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1015   const int num_blk_64x64_rows =
1016       (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1017   const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1018   const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1019   uint64_t curr_sb_sad = UINT64_MAX;
1020   // Avoid the border as sad_blk_64x64 may not be set for the border
1021   // in the scene detection.
1022   if ((blk_64x64_row_index >= num_blk_64x64_rows - 1) ||
1023       (blk_64x64_col_index >= num_blk_64x64_cols - 1)) {
1024     return curr_sb_sad;
1025   }
1026   const uint64_t *const src_sad_blk_64x64_data =
1027       &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1028                               blk_64x64_row_index * num_blk_64x64_cols];
1029   if (cm->seq_params->sb_size == BLOCK_128X128) {
1030     // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1031     // superblock
1032     curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1033                   src_sad_blk_64x64_data[num_blk_64x64_cols] +
1034                   src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1035   } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1036     curr_sb_sad = src_sad_blk_64x64_data[0];
1037   }
1038   return curr_sb_sad;
1039 }
1040 
1041 /*!\brief Determine whether grading content can be skipped based on sad stat
1042  *
1043  * \ingroup partition_search
1044  * \callgraph
1045  * \callergraph
1046  */
is_calc_src_content_needed(AV1_COMP * cpi,MACROBLOCK * const x,int mi_row,int mi_col)1047 static inline bool is_calc_src_content_needed(AV1_COMP *cpi,
1048                                               MACROBLOCK *const x, int mi_row,
1049                                               int mi_col) {
1050   if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1051     return true;
1052   const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1053   if (curr_sb_sad == UINT64_MAX) return true;
1054   if (curr_sb_sad == 0) {
1055     x->content_state_sb.source_sad_nonrd = kZeroSad;
1056     return false;
1057   }
1058   AV1_COMMON *const cm = &cpi->common;
1059   bool do_calc_src_content = true;
1060 
1061   if (cpi->oxcf.speed < 9) return do_calc_src_content;
1062 
1063   // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1064   if (AOMMIN(cm->width, cm->height) < 360) {
1065     // Derive Average 64x64 block source SAD from SB source SAD
1066     const uint64_t avg_64x64_blk_sad =
1067         (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1068                                                    : curr_sb_sad;
1069 
1070     // The threshold is determined based on kLowSad and kHighSad threshold and
1071     // test results.
1072     uint64_t thresh_low = 15000;
1073     uint64_t thresh_high = 40000;
1074 
1075     if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1076       thresh_low = thresh_low << 1;
1077       thresh_high = thresh_high << 1;
1078     }
1079 
1080     if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1081       do_calc_src_content = false;
1082       // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1083       // to RTC rd path.
1084       x->content_state_sb.source_sad_nonrd = kMedSad;
1085     }
1086   }
1087 
1088   return do_calc_src_content;
1089 }
1090 
1091 /*!\brief Determine whether grading content is needed based on sf and frame stat
1092  *
1093  * \ingroup partition_search
1094  * \callgraph
1095  * \callergraph
1096  */
1097 // TODO(any): consolidate sfs to make interface cleaner
grade_source_content_sb(AV1_COMP * cpi,MACROBLOCK * const x,TileDataEnc * tile_data,int mi_row,int mi_col)1098 static inline void grade_source_content_sb(AV1_COMP *cpi, MACROBLOCK *const x,
1099                                            TileDataEnc *tile_data, int mi_row,
1100                                            int mi_col) {
1101   AV1_COMMON *const cm = &cpi->common;
1102   if (cm->current_frame.frame_type == KEY_FRAME ||
1103       (cpi->ppi->use_svc &&
1104        cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1105     assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1106     assert(x->content_state_sb.source_sad_rd == kMedSad);
1107     return;
1108   }
1109   bool calc_src_content = false;
1110 
1111   if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1112     if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1113       calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1114     } else {
1115       x->content_state_sb.source_sad_nonrd = kZeroSad;
1116     }
1117   } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1118              (cm->width * cm->height <= 352 * 288)) {
1119     if (cpi->rc.frame_source_sad > 0)
1120       calc_src_content = true;
1121     else
1122       x->content_state_sb.source_sad_rd = kZeroSad;
1123   }
1124   if (calc_src_content)
1125     av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1126 }
1127 
1128 /*!\brief Encode a superblock row by breaking it into superblocks
1129  *
1130  * \ingroup partition_search
1131  * \callgraph
1132  * \callergraph
1133  * Do partition and mode search for an sb row: one row of superblocks filling up
1134  * the width of the current tile.
1135  */
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TokenExtra ** tp)1136 static inline void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1137                                  TileDataEnc *tile_data, int mi_row,
1138                                  TokenExtra **tp) {
1139   AV1_COMMON *const cm = &cpi->common;
1140   const TileInfo *const tile_info = &tile_data->tile_info;
1141   MultiThreadInfo *const mt_info = &cpi->mt_info;
1142   AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1143   AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1144   bool row_mt_enabled = mt_info->row_mt_enabled;
1145   MACROBLOCK *const x = &td->mb;
1146   MACROBLOCKD *const xd = &x->e_mbd;
1147   const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1148   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1149   const int mib_size = cm->seq_params->mib_size;
1150   const int mib_size_log2 = cm->seq_params->mib_size_log2;
1151   const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1152   const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1153 
1154 #if CONFIG_COLLECT_COMPONENT_TIMING
1155   start_timing(cpi, encode_sb_row_time);
1156 #endif
1157 
1158   // Initialize the left context for the new SB row
1159   av1_zero_left_context(xd);
1160 
1161   // Reset delta for quantizer and loof filters at the beginning of every tile
1162   if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1163     if (cm->delta_q_info.delta_q_present_flag)
1164       xd->current_base_qindex = cm->quant_params.base_qindex;
1165     if (cm->delta_q_info.delta_lf_present_flag) {
1166       av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1167     }
1168   }
1169 
1170   reset_thresh_freq_fact(x);
1171 
1172   // Code each SB in the row
1173   for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1174        mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1175     // In realtime/allintra mode and when frequency of cost updates is off/tile,
1176     // wait for the top superblock to finish encoding. Otherwise, wait for the
1177     // top-right superblock to finish encoding.
1178     enc_row_mt->sync_read_ptr(
1179         row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1180 
1181 #if CONFIG_MULTITHREAD
1182     if (row_mt_enabled) {
1183       pthread_mutex_lock(enc_row_mt->mutex_);
1184       const bool row_mt_exit = enc_row_mt->row_mt_exit;
1185       pthread_mutex_unlock(enc_row_mt->mutex_);
1186       // Exit in case any worker has encountered an error.
1187       if (row_mt_exit) return;
1188     }
1189 #endif
1190 
1191     const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1192     if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1193       if ((tile_info->mi_col_start == mi_col)) {
1194         // restore frame context at the 1st column sb
1195         memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
1196       } else {
1197         // update context
1198         int wt_left = AVG_CDF_WEIGHT_LEFT;
1199         int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1200         if (tile_info->mi_col_end > (mi_col + mib_size))
1201           av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1202                               wt_left, wt_tr);
1203         else
1204           av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1205                               wt_left, wt_tr);
1206       }
1207     }
1208 
1209     // Update the rate cost tables for some symbols
1210     av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1211 
1212     // Reset color coding related parameters
1213     av1_zero(x->color_sensitivity_sb);
1214     av1_zero(x->color_sensitivity_sb_g);
1215     av1_zero(x->color_sensitivity_sb_alt);
1216     av1_zero(x->color_sensitivity);
1217     x->content_state_sb.source_sad_nonrd = kMedSad;
1218     x->content_state_sb.source_sad_rd = kMedSad;
1219     x->content_state_sb.lighting_change = 0;
1220     x->content_state_sb.low_sumdiff = 0;
1221     x->force_zeromv_skip_for_sb = 0;
1222     x->sb_me_block = 0;
1223     x->sb_me_partition = 0;
1224     x->sb_me_mv.as_int = 0;
1225     x->sb_force_fixed_part = 1;
1226     x->color_palette_thresh = 64;
1227     x->force_color_check_block_level = 0;
1228     x->nonrd_prune_ref_frame_search =
1229         cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1230 
1231     if (cpi->oxcf.mode == ALLINTRA) {
1232       x->intra_sb_rdmult_modifier = 128;
1233     }
1234 
1235     xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1236     x->source_variance = UINT_MAX;
1237     td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1238 
1239     // Get segment id and skip flag
1240     const struct segmentation *const seg = &cm->seg;
1241     int seg_skip = 0;
1242     if (seg->enabled) {
1243       const uint8_t *const map =
1244           seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1245       const uint8_t segment_id =
1246           map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1247               : 0;
1248       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1249     }
1250 
1251     produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1252 
1253     init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1254                                         sb_size);
1255 
1256     // Grade the temporal variation of the sb, the grade will be used to decide
1257     // fast mode search strategy for coding blocks
1258     if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1259 
1260     // encode the superblock
1261     if (use_nonrd_mode) {
1262       encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1263     } else {
1264       encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1265     }
1266 
1267     // Update the top-right context in row_mt coding
1268     if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1269       if (sb_cols_in_tile == 1)
1270         memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
1271       else if (sb_col_in_tile >= 1)
1272         memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
1273                sizeof(*xd->tile_ctx));
1274     }
1275     enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1276                                sb_cols_in_tile);
1277   }
1278 
1279 #if CONFIG_COLLECT_COMPONENT_TIMING
1280   end_timing(cpi, encode_sb_row_time);
1281 #endif
1282 }
1283 
init_encode_frame_mb_context(AV1_COMP * cpi)1284 static inline void init_encode_frame_mb_context(AV1_COMP *cpi) {
1285   AV1_COMMON *const cm = &cpi->common;
1286   const int num_planes = av1_num_planes(cm);
1287   MACROBLOCK *const x = &cpi->td.mb;
1288   MACROBLOCKD *const xd = &x->e_mbd;
1289 
1290   // Copy data over into macro block data structures.
1291   av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1292                        cm->seq_params->sb_size);
1293 
1294   av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1295                          cm->seq_params->subsampling_y, num_planes);
1296 }
1297 
av1_alloc_tile_data(AV1_COMP * cpi)1298 void av1_alloc_tile_data(AV1_COMP *cpi) {
1299   AV1_COMMON *const cm = &cpi->common;
1300   AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1301   const int tile_cols = cm->tiles.cols;
1302   const int tile_rows = cm->tiles.rows;
1303 
1304   av1_row_mt_mem_dealloc(cpi);
1305 
1306   aom_free(cpi->tile_data);
1307   cpi->allocated_tiles = 0;
1308   enc_row_mt->allocated_tile_cols = 0;
1309   enc_row_mt->allocated_tile_rows = 0;
1310 
1311   CHECK_MEM_ERROR(
1312       cm, cpi->tile_data,
1313       aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1314 
1315   cpi->allocated_tiles = tile_cols * tile_rows;
1316   enc_row_mt->allocated_tile_cols = tile_cols;
1317   enc_row_mt->allocated_tile_rows = tile_rows;
1318   for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1319     for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1320       const int tile_index = tile_row * tile_cols + tile_col;
1321       TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1322       av1_zero(this_tile->row_mt_sync);
1323       this_tile->row_ctx = NULL;
1324     }
1325   }
1326 }
1327 
av1_init_tile_data(AV1_COMP * cpi)1328 void av1_init_tile_data(AV1_COMP *cpi) {
1329   AV1_COMMON *const cm = &cpi->common;
1330   const int num_planes = av1_num_planes(cm);
1331   const int tile_cols = cm->tiles.cols;
1332   const int tile_rows = cm->tiles.rows;
1333   int tile_col, tile_row;
1334   TokenInfo *const token_info = &cpi->token_info;
1335   TokenExtra *pre_tok = token_info->tile_tok[0][0];
1336   TokenList *tplist = token_info->tplist[0][0];
1337   unsigned int tile_tok = 0;
1338   int tplist_count = 0;
1339 
1340   if (!is_stat_generation_stage(cpi) &&
1341       cm->features.allow_screen_content_tools) {
1342     // Number of tokens for which token info needs to be allocated.
1343     unsigned int tokens_required =
1344         get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1345                         MAX_SB_SIZE_LOG2, num_planes);
1346     // Allocate/reallocate memory for token related info if the number of tokens
1347     // required is more than the number of tokens already allocated. This could
1348     // occur in case of the following:
1349     // 1) If the memory is not yet allocated
1350     // 2) If the frame dimensions have changed
1351     const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1352     if (realloc_tokens) {
1353       free_token_info(token_info);
1354       alloc_token_info(cm, token_info, tokens_required);
1355       pre_tok = token_info->tile_tok[0][0];
1356       tplist = token_info->tplist[0][0];
1357     }
1358   }
1359 
1360   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1361     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1362       TileDataEnc *const tile_data =
1363           &cpi->tile_data[tile_row * tile_cols + tile_col];
1364       TileInfo *const tile_info = &tile_data->tile_info;
1365       av1_tile_init(tile_info, cm, tile_row, tile_col);
1366       tile_data->firstpass_top_mv = kZeroMv;
1367       tile_data->abs_sum_level = 0;
1368 
1369       if (is_token_info_allocated(token_info)) {
1370         token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1371         pre_tok = token_info->tile_tok[tile_row][tile_col];
1372         tile_tok = allocated_tokens(
1373             tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1374             num_planes);
1375         token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1376         tplist = token_info->tplist[tile_row][tile_col];
1377         tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1378       }
1379       tile_data->allow_update_cdf = !cm->tiles.large_scale;
1380       tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1381                                     !cm->features.disable_cdf_update &&
1382                                     !delay_wait_for_top_right_sb(cpi);
1383       tile_data->tctx = *cm->fc;
1384     }
1385   }
1386 }
1387 
1388 // Populate the start palette token info prior to encoding an SB row.
get_token_start(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra ** tp)1389 static inline void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1390                                    int tile_row, int tile_col, int mi_row,
1391                                    TokenExtra **tp) {
1392   const TokenInfo *token_info = &cpi->token_info;
1393   if (!is_token_info_allocated(token_info)) return;
1394 
1395   const AV1_COMMON *cm = &cpi->common;
1396   const int num_planes = av1_num_planes(cm);
1397   TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1398   const int sb_row_in_tile =
1399       (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1400 
1401   get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1402                 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1403   assert(tplist != NULL);
1404   tplist[sb_row_in_tile].start = *tp;
1405 }
1406 
1407 // Populate the token count after encoding an SB row.
populate_token_count(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra * tok)1408 static inline void populate_token_count(AV1_COMP *cpi,
1409                                         const TileInfo *tile_info, int tile_row,
1410                                         int tile_col, int mi_row,
1411                                         TokenExtra *tok) {
1412   const TokenInfo *token_info = &cpi->token_info;
1413   if (!is_token_info_allocated(token_info)) return;
1414 
1415   const AV1_COMMON *cm = &cpi->common;
1416   const int num_planes = av1_num_planes(cm);
1417   TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1418   const int sb_row_in_tile =
1419       (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1420   const int tile_mb_cols =
1421       (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1422   const int num_mb_rows_in_sb =
1423       ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1424   tplist[sb_row_in_tile].count =
1425       (unsigned int)(tok - tplist[sb_row_in_tile].start);
1426 
1427   assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1428          get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1429                          cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1430                          num_planes));
1431 
1432   (void)num_planes;
1433   (void)tile_mb_cols;
1434   (void)num_mb_rows_in_sb;
1435 }
1436 
1437 /*!\brief Encode a superblock row
1438  *
1439  * \ingroup partition_search
1440  */
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)1441 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1442                        int tile_col, int mi_row) {
1443   AV1_COMMON *const cm = &cpi->common;
1444   const int tile_cols = cm->tiles.cols;
1445   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1446   const TileInfo *const tile_info = &this_tile->tile_info;
1447   TokenExtra *tok = NULL;
1448 
1449   get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1450 
1451   encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1452 
1453   populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1454 }
1455 
1456 /*!\brief Encode a tile
1457  *
1458  * \ingroup partition_search
1459  */
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)1460 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1461                      int tile_col) {
1462   AV1_COMMON *const cm = &cpi->common;
1463   TileDataEnc *const this_tile =
1464       &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1465   const TileInfo *const tile_info = &this_tile->tile_info;
1466 
1467   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1468 
1469   av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1470                          tile_info->mi_col_end, tile_row);
1471   av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1472                          &td->mb.e_mbd);
1473 
1474 #if !CONFIG_REALTIME_ONLY
1475   if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1476     cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1477 #endif
1478 
1479   if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1480     av1_crc32c_calculator_init(
1481         &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1482   }
1483 
1484   for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1485        mi_row += cm->seq_params->mib_size) {
1486     av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1487   }
1488   this_tile->abs_sum_level = td->abs_sum_level;
1489 }
1490 
1491 /*!\brief Break one frame into tiles and encode the tiles
1492  *
1493  * \ingroup partition_search
1494  *
1495  * \param[in]    cpi    Top-level encoder structure
1496  */
encode_tiles(AV1_COMP * cpi)1497 static inline void encode_tiles(AV1_COMP *cpi) {
1498   AV1_COMMON *const cm = &cpi->common;
1499   const int tile_cols = cm->tiles.cols;
1500   const int tile_rows = cm->tiles.rows;
1501   int tile_col, tile_row;
1502 
1503   MACROBLOCK *const mb = &cpi->td.mb;
1504   assert(IMPLIES(cpi->tile_data == NULL,
1505                  cpi->allocated_tiles < tile_cols * tile_rows));
1506   if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1507 
1508   av1_init_tile_data(cpi);
1509   av1_alloc_mb_data(cpi, mb);
1510 
1511   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1512     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1513       TileDataEnc *const this_tile =
1514           &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1515       cpi->td.intrabc_used = 0;
1516       cpi->td.deltaq_used = 0;
1517       cpi->td.abs_sum_level = 0;
1518       cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1519       cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1520       cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1521       cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1522       av1_init_rtc_counters(&cpi->td.mb);
1523       cpi->td.mb.palette_pixels = 0;
1524       av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1525       if (!frame_is_intra_only(&cpi->common))
1526         av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1527       cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1528       cpi->intrabc_used |= cpi->td.intrabc_used;
1529       cpi->deltaq_used |= cpi->td.deltaq_used;
1530     }
1531   }
1532 
1533   av1_dealloc_mb_data(mb, av1_num_planes(cm));
1534 }
1535 
1536 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(const AV1_COMMON * const cm,RefFrameDistanceInfo * const ref_frame_dist_info,const int ref_frame_flags)1537 static inline void set_rel_frame_dist(
1538     const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1539     const int ref_frame_flags) {
1540   MV_REFERENCE_FRAME ref_frame;
1541   int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1542   ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1543   ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1544   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1545     ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1546     if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1547       int dist = av1_encoder_get_relative_dist(
1548           cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1549           cm->current_frame.display_order_hint);
1550       ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1551       // Get the nearest ref_frame in the past
1552       if (abs(dist) < min_past_dist && dist < 0) {
1553         ref_frame_dist_info->nearest_past_ref = ref_frame;
1554         min_past_dist = abs(dist);
1555       }
1556       // Get the nearest ref_frame in the future
1557       if (dist < min_future_dist && dist > 0) {
1558         ref_frame_dist_info->nearest_future_ref = ref_frame;
1559         min_future_dist = dist;
1560       }
1561     }
1562   }
1563 }
1564 
refs_are_one_sided(const AV1_COMMON * cm)1565 static inline int refs_are_one_sided(const AV1_COMMON *cm) {
1566   assert(!frame_is_intra_only(cm));
1567 
1568   int one_sided_refs = 1;
1569   const int cur_display_order_hint = cm->current_frame.display_order_hint;
1570   for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1571     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1572     if (buf == NULL) continue;
1573     if (av1_encoder_get_relative_dist(buf->display_order_hint,
1574                                       cur_display_order_hint) > 0) {
1575       one_sided_refs = 0;  // bwd reference
1576       break;
1577     }
1578   }
1579   return one_sided_refs;
1580 }
1581 
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])1582 static inline void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1583                                              int ref_order_hint[2]) {
1584   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1585   ref_order_hint[0] = ref_order_hint[1] = 0;
1586   if (!skip_mode_info->skip_mode_allowed) return;
1587 
1588   const RefCntBuffer *const buf_0 =
1589       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1590   const RefCntBuffer *const buf_1 =
1591       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1592   assert(buf_0 != NULL && buf_1 != NULL);
1593 
1594   ref_order_hint[0] = buf_0->order_hint;
1595   ref_order_hint[1] = buf_1->order_hint;
1596 }
1597 
check_skip_mode_enabled(AV1_COMP * const cpi)1598 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1599   AV1_COMMON *const cm = &cpi->common;
1600 
1601   av1_setup_skip_mode_allowed(cm);
1602   if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1603 
1604   // Turn off skip mode if the temporal distances of the reference pair to the
1605   // current frame are different by more than 1 frame.
1606   const int cur_offset = (int)cm->current_frame.order_hint;
1607   int ref_offset[2];
1608   get_skip_mode_ref_offsets(cm, ref_offset);
1609   const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1610                                             cur_offset, ref_offset[0]);
1611   const int cur_to_ref1 = abs(get_relative_dist(
1612       &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1613   if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1614 
1615   // High Latency: Turn off skip mode if all refs are fwd.
1616   if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1617 
1618   const int ref_frame[2] = {
1619     cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1620     cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1621   };
1622   if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1623       !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1624     return 0;
1625 
1626   return 1;
1627 }
1628 
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)1629 static inline void set_default_interp_skip_flags(
1630     const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1631   const int num_planes = av1_num_planes(cm);
1632   interp_search_flags->default_interp_skip_flags =
1633       (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1634                         : INTERP_SKIP_LUMA_SKIP_CHROMA;
1635 }
1636 
setup_prune_ref_frame_mask(AV1_COMP * cpi)1637 static inline void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1638   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1639        cpi->sf.inter_sf.disable_onesided_comp) &&
1640       cpi->all_one_sided_refs) {
1641     // Disable all compound references
1642     cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1643   } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1644              cpi->sf.inter_sf.selective_ref_frame >= 2) {
1645     AV1_COMMON *const cm = &cpi->common;
1646     const int cur_frame_display_order_hint =
1647         cm->current_frame.display_order_hint;
1648     unsigned int *ref_display_order_hint =
1649         cm->cur_frame->ref_display_order_hint;
1650     const int arf2_dist = av1_encoder_get_relative_dist(
1651         ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1652         cur_frame_display_order_hint);
1653     const int bwd_dist = av1_encoder_get_relative_dist(
1654         ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1655         cur_frame_display_order_hint);
1656 
1657     for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1658       MV_REFERENCE_FRAME rf[2];
1659       av1_set_ref_frame(rf, ref_idx);
1660       if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1661           !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1662         continue;
1663       }
1664 
1665       if (!cpi->all_one_sided_refs) {
1666         int ref_dist[2];
1667         for (int i = 0; i < 2; ++i) {
1668           ref_dist[i] = av1_encoder_get_relative_dist(
1669               ref_display_order_hint[rf[i] - LAST_FRAME],
1670               cur_frame_display_order_hint);
1671         }
1672 
1673         // One-sided compound is used only when all reference frames are
1674         // one-sided.
1675         if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1676           cpi->prune_ref_frame_mask |= 1 << ref_idx;
1677         }
1678       }
1679 
1680       if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1681           (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1682           (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1683         // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1684         if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1685           // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1686           // reference to the current frame than ALTREF2_FRAME
1687           cpi->prune_ref_frame_mask |= 1 << ref_idx;
1688         }
1689       }
1690     }
1691   }
1692 }
1693 
allow_deltaq_mode(AV1_COMP * cpi)1694 static int allow_deltaq_mode(AV1_COMP *cpi) {
1695 #if !CONFIG_REALTIME_ONLY
1696   AV1_COMMON *const cm = &cpi->common;
1697   BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1698   int sbs_wide = mi_size_wide[sb_size];
1699   int sbs_high = mi_size_high[sb_size];
1700 
1701   int64_t delta_rdcost = 0;
1702   for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1703     for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1704       int64_t this_delta_rdcost = 0;
1705       av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1706                                      mi_row, mi_col);
1707       delta_rdcost += this_delta_rdcost;
1708     }
1709   }
1710   return delta_rdcost < 0;
1711 #else
1712   (void)cpi;
1713   return 1;
1714 #endif  // !CONFIG_REALTIME_ONLY
1715 }
1716 
1717 #define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1718 #define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1719 
1720 // Populates block level thresholds for force zeromv-skip decision
populate_thresh_to_force_zeromv_skip(AV1_COMP * cpi)1721 static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1722   if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1723 
1724   // Threshold for forcing zeromv-skip decision is as below:
1725   // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1726   // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1727   // allowing slightly higher error for smaller blocks.
1728   // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
1729   // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1730   // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
1731   // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
1732   // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1733   // small blocks, the same is clipped to 4.
1734   const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1735   const int num_128x128_pix =
1736       block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1737 
1738   for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1739     const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1740 
1741     // Calculate the threshold for zeromv-skip decision based on area of the
1742     // partition
1743     unsigned int thresh_exit_part_blk =
1744         (unsigned int)(thresh_exit_128x128_part *
1745                            sqrt((double)num_block_pix / num_128x128_pix) +
1746                        0.5);
1747     thresh_exit_part_blk = AOMMIN(
1748         thresh_exit_part_blk,
1749         (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1750     cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1751   }
1752 }
1753 
free_block_hash_buffers(uint32_t * block_hash_values[2][2],int8_t * is_block_same[2][3])1754 static void free_block_hash_buffers(uint32_t *block_hash_values[2][2],
1755                                     int8_t *is_block_same[2][3]) {
1756   for (int k = 0; k < 2; ++k) {
1757     for (int j = 0; j < 2; ++j) {
1758       aom_free(block_hash_values[k][j]);
1759     }
1760 
1761     for (int j = 0; j < 3; ++j) {
1762       aom_free(is_block_same[k][j]);
1763     }
1764   }
1765 }
1766 
1767 /*!\brief Determines delta_q_res value for Variance Boost modulation.
1768  */
aom_get_variance_boost_delta_q_res(int qindex)1769 static int aom_get_variance_boost_delta_q_res(int qindex) {
1770   // Signaling delta_q changes across superblocks comes with inherent syntax
1771   // element overhead, which adds up to total payload size. This overhead
1772   // becomes proportionally bigger the higher the base qindex (i.e. lower
1773   // quality, smaller file size), so a balance needs to be struck.
1774   // - Smaller delta_q_res: more granular delta_q control, more bits spent
1775   // signaling deltas.
1776   // - Larger delta_q_res: coarser delta_q control, less bits spent signaling
1777   // deltas.
1778   //
1779   // At the same time, SB qindex fluctuations become larger the higher
1780   // the base qindex (between lowest and highest-variance regions):
1781   // - For QP 5: up to 8 qindexes
1782   // - For QP 60: up to 52 qindexes
1783   //
1784   // With these factors in mind, it was found that the best strategy that
1785   // maximizes quality per bitrate is by having very finely-grained delta_q
1786   // values for the lowest picture qindexes (to preserve tiny qindex SB deltas),
1787   // and progressively making them coarser as base qindex increases (to reduce
1788   // total signaling overhead).
1789   int delta_q_res = 1;
1790 
1791   if (qindex >= 160) {
1792     delta_q_res = 8;
1793   } else if (qindex >= 120) {
1794     delta_q_res = 4;
1795   } else if (qindex >= 80) {
1796     delta_q_res = 2;
1797   } else {
1798     delta_q_res = 1;
1799   }
1800 
1801   return delta_q_res;
1802 }
1803 
1804 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction
1805  * for a single frame
1806  *
1807  * \ingroup high_level_algo
1808  */
encode_frame_internal(AV1_COMP * cpi)1809 static inline void encode_frame_internal(AV1_COMP *cpi) {
1810   ThreadData *const td = &cpi->td;
1811   MACROBLOCK *const x = &td->mb;
1812   AV1_COMMON *const cm = &cpi->common;
1813   CommonModeInfoParams *const mi_params = &cm->mi_params;
1814   FeatureFlags *const features = &cm->features;
1815   MACROBLOCKD *const xd = &x->e_mbd;
1816   RD_COUNTS *const rdc = &cpi->td.rd_counts;
1817 #if CONFIG_FPMT_TEST
1818   FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
1819   FrameProbInfo *const temp_frame_probs_simulation =
1820       &cpi->ppi->temp_frame_probs_simulation;
1821 #endif
1822   FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
1823   IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
1824   MultiThreadInfo *const mt_info = &cpi->mt_info;
1825   AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1826   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1827   const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
1828   int i;
1829 
1830   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
1831     mi_params->setup_mi(mi_params);
1832   }
1833 
1834   set_mi_offsets(mi_params, xd, 0, 0);
1835 
1836   av1_zero(*td->counts);
1837   av1_zero(rdc->tx_type_used);
1838   av1_zero(rdc->obmc_used);
1839   av1_zero(rdc->warped_used);
1840   av1_zero(rdc->seg_tmp_pred_cost);
1841 
1842   // Reset the flag.
1843   cpi->intrabc_used = 0;
1844   // Need to disable intrabc when superres is selected
1845   if (av1_superres_scaled(cm)) {
1846     features->allow_intrabc = 0;
1847   }
1848 
1849   features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
1850 
1851   if (features->allow_warped_motion &&
1852       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1853     const FRAME_UPDATE_TYPE update_type =
1854         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1855     int warped_probability =
1856 #if CONFIG_FPMT_TEST
1857         cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
1858             ? temp_frame_probs->warped_probs[update_type]
1859             :
1860 #endif  // CONFIG_FPMT_TEST
1861             frame_probs->warped_probs[update_type];
1862     if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
1863       features->allow_warped_motion = 0;
1864   }
1865 
1866   int hash_table_created = 0;
1867   if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
1868       !cpi->sf.rt_sf.use_nonrd_pick_mode) {
1869     // TODO(any): move this outside of the recoding loop to avoid recalculating
1870     // the hash table.
1871     // add to hash table
1872     const int pic_width = cpi->source->y_crop_width;
1873     const int pic_height = cpi->source->y_crop_height;
1874     uint32_t *block_hash_values[2][2] = { { NULL } };
1875     int8_t *is_block_same[2][3] = { { NULL } };
1876     int k, j;
1877     bool error = false;
1878 
1879     for (k = 0; k < 2 && !error; ++k) {
1880       for (j = 0; j < 2; ++j) {
1881         block_hash_values[k][j] = (uint32_t *)aom_malloc(
1882             sizeof(*block_hash_values[0][0]) * pic_width * pic_height);
1883         if (!block_hash_values[k][j]) {
1884           error = true;
1885           break;
1886         }
1887       }
1888 
1889       for (j = 0; j < 3 && !error; ++j) {
1890         is_block_same[k][j] = (int8_t *)aom_malloc(
1891             sizeof(*is_block_same[0][0]) * pic_width * pic_height);
1892         if (!is_block_same[k][j]) error = true;
1893       }
1894     }
1895 
1896     av1_hash_table_init(intrabc_hash_info);
1897     if (error ||
1898         !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
1899       free_block_hash_buffers(block_hash_values, is_block_same);
1900       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1901                          "Error allocating intrabc_hash_table and buffers");
1902     }
1903     hash_table_created = 1;
1904     av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
1905                                       block_hash_values[0], is_block_same[0]);
1906     // Hash data generated for screen contents is used for intraBC ME
1907     const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
1908     const int max_sb_size =
1909         (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
1910     int src_idx = 0;
1911     for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
1912       const int dst_idx = !src_idx;
1913       av1_generate_block_hash_value(
1914           intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
1915           block_hash_values[dst_idx], is_block_same[src_idx],
1916           is_block_same[dst_idx]);
1917       if (size >= min_alloc_size) {
1918         if (!av1_add_to_hash_map_by_row_with_precal_data(
1919                 &intrabc_hash_info->intrabc_hash_table,
1920                 block_hash_values[dst_idx], is_block_same[dst_idx][2],
1921                 pic_width, pic_height, size)) {
1922           error = true;
1923           break;
1924         }
1925       }
1926     }
1927 
1928     free_block_hash_buffers(block_hash_values, is_block_same);
1929 
1930     if (error) {
1931       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1932                          "Error adding data to intrabc_hash_table");
1933     }
1934   }
1935 
1936   const CommonQuantParams *quant_params = &cm->quant_params;
1937   for (i = 0; i < MAX_SEGMENTS; ++i) {
1938     const int qindex =
1939         cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
1940                         : quant_params->base_qindex;
1941     xd->lossless[i] =
1942         qindex == 0 && quant_params->y_dc_delta_q == 0 &&
1943         quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
1944         quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
1945     if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
1946     xd->qindex[i] = qindex;
1947     if (xd->lossless[i]) {
1948       cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
1949     } else {
1950       cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
1951     }
1952   }
1953   features->coded_lossless = is_coded_lossless(cm, xd);
1954   features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
1955 
1956   // Fix delta q resolution for the moment
1957 
1958   cm->delta_q_info.delta_q_res = 0;
1959   if (cpi->use_ducky_encode) {
1960     cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
1961   } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ) {
1962     if (deltaq_mode == DELTA_Q_OBJECTIVE)
1963       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
1964     else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
1965       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1966     else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
1967       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1968     else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
1969       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1970     else if (deltaq_mode == DELTA_Q_HDR)
1971       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1972     else if (deltaq_mode == DELTA_Q_VARIANCE_BOOST)
1973       cm->delta_q_info.delta_q_res =
1974           aom_get_variance_boost_delta_q_res(quant_params->base_qindex);
1975     // Set delta_q_present_flag before it is used for the first time
1976     cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
1977     cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
1978 
1979     // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
1980     // is used for ineligible frames. That effectively will turn off row_mt
1981     // usage. Note objective delta_q and tpl eligible frames are only altref
1982     // frames currently.
1983     const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1984     if (cm->delta_q_info.delta_q_present_flag) {
1985       if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1986           gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
1987         cm->delta_q_info.delta_q_present_flag = 0;
1988 
1989       if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1990           cm->delta_q_info.delta_q_present_flag) {
1991         cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
1992       }
1993     }
1994 
1995     // Reset delta_q_used flag
1996     cpi->deltaq_used = 0;
1997 
1998     cm->delta_q_info.delta_lf_present_flag =
1999         cm->delta_q_info.delta_q_present_flag &&
2000         oxcf->tool_cfg.enable_deltalf_mode;
2001     cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
2002 
2003     // update delta_q_present_flag and delta_lf_present_flag based on
2004     // base_qindex
2005     cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
2006     cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
2007   } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
2008              cpi->svc.number_temporal_layers == 1) {
2009     cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
2010     cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
2011   }
2012   cpi->rc.cnt_zeromv = 0;
2013 
2014   av1_frame_init_quantizer(cpi);
2015   init_encode_frame_mb_context(cpi);
2016   set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
2017 
2018   if (cm->prev_frame && cm->prev_frame->seg.enabled)
2019     cm->last_frame_seg_map = cm->prev_frame->seg_map;
2020   else
2021     cm->last_frame_seg_map = NULL;
2022   if (features->allow_intrabc || features->coded_lossless) {
2023     av1_set_default_ref_deltas(cm->lf.ref_deltas);
2024     av1_set_default_mode_deltas(cm->lf.mode_deltas);
2025   } else if (cm->prev_frame) {
2026     memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
2027     memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
2028   }
2029   memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
2030   memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
2031 
2032   cpi->all_one_sided_refs =
2033       frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
2034 
2035   cpi->prune_ref_frame_mask = 0;
2036   // Figure out which ref frames can be skipped at frame level.
2037   setup_prune_ref_frame_mask(cpi);
2038 
2039   x->txfm_search_info.txb_split_count = 0;
2040 #if CONFIG_SPEED_STATS
2041   x->txfm_search_info.tx_search_count = 0;
2042 #endif  // CONFIG_SPEED_STATS
2043 
2044 #if !CONFIG_REALTIME_ONLY
2045 #if CONFIG_COLLECT_COMPONENT_TIMING
2046   start_timing(cpi, av1_compute_global_motion_time);
2047 #endif
2048   av1_compute_global_motion_facade(cpi);
2049 #if CONFIG_COLLECT_COMPONENT_TIMING
2050   end_timing(cpi, av1_compute_global_motion_time);
2051 #endif
2052 #endif  // !CONFIG_REALTIME_ONLY
2053 
2054 #if CONFIG_COLLECT_COMPONENT_TIMING
2055   start_timing(cpi, av1_setup_motion_field_time);
2056 #endif
2057   av1_calculate_ref_frame_side(cm);
2058   if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2059 #if CONFIG_COLLECT_COMPONENT_TIMING
2060   end_timing(cpi, av1_setup_motion_field_time);
2061 #endif
2062 
2063   cm->current_frame.skip_mode_info.skip_mode_flag =
2064       check_skip_mode_enabled(cpi);
2065 
2066   // Initialization of skip mode cost depends on the value of
2067   // 'skip_mode_flag'. This initialization happens in the function
2068   // av1_fill_mode_rates(), which is in turn called in
2069   // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2070   // has to be called after 'skip_mode_flag' is initialized.
2071   av1_initialize_rd_consts(cpi);
2072   av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2073   populate_thresh_to_force_zeromv_skip(cpi);
2074 
2075   enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2076   enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2077   mt_info->row_mt_enabled = 0;
2078   mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2079                                        cm->tiles.cols * cm->tiles.rows) > 1;
2080 
2081   if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2082     mt_info->row_mt_enabled = 1;
2083     enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2084     enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2085     av1_encode_tiles_row_mt(cpi);
2086   } else {
2087     if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2088       av1_encode_tiles_mt(cpi);
2089     } else {
2090       // Preallocate the pc_tree for realtime coding to reduce the cost of
2091       // memory allocation.
2092       const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2093       if (use_nonrd_mode) {
2094         td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2095         if (!td->pc_root)
2096           aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2097                              "Failed to allocate PC_TREE");
2098       } else {
2099         td->pc_root = NULL;
2100       }
2101 
2102       encode_tiles(cpi);
2103       av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2104                                  cpi->sf.part_sf.partition_search_type);
2105       td->pc_root = NULL;
2106     }
2107   }
2108 
2109   // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2110   if (features->allow_intrabc && !cpi->intrabc_used) {
2111     features->allow_intrabc = 0;
2112   }
2113   if (features->allow_intrabc) {
2114     cm->delta_q_info.delta_lf_present_flag = 0;
2115   }
2116 
2117   if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2118     cm->delta_q_info.delta_q_present_flag = 0;
2119   }
2120 
2121   // Set the transform size appropriately before bitstream creation
2122   const MODE_EVAL_TYPE eval_type =
2123       cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2124           ? WINNER_MODE_EVAL
2125           : DEFAULT_EVAL;
2126   const TX_SIZE_SEARCH_METHOD tx_search_type =
2127       cpi->winner_mode_params.tx_size_search_methods[eval_type];
2128   assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2129   features->tx_mode = select_tx_mode(cm, tx_search_type);
2130 
2131   // Retain the frame level probability update conditions for parallel frames.
2132   // These conditions will be consumed during postencode stage to update the
2133   // probability.
2134   if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2135     cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2136         cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2137     cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2138         (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2139          cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2140     cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2141         (features->allow_warped_motion &&
2142          cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2143     cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2144         (cm->current_frame.frame_type != KEY_FRAME &&
2145          cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2146          features->interp_filter == SWITCHABLE);
2147   }
2148 
2149   if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2150       ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2151         INT_MAX) &&
2152        (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2153     const FRAME_UPDATE_TYPE update_type =
2154         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2155     for (i = 0; i < TX_SIZES_ALL; i++) {
2156       int sum = 0;
2157       int j;
2158       int left = MAX_TX_TYPE_PROB;
2159 
2160       for (j = 0; j < TX_TYPES; j++)
2161         sum += cpi->td.rd_counts.tx_type_used[i][j];
2162 
2163       for (j = TX_TYPES - 1; j >= 0; j--) {
2164         int update_txtype_frameprobs = 1;
2165         const int new_prob =
2166             sum ? (int)((int64_t)MAX_TX_TYPE_PROB *
2167                         cpi->td.rd_counts.tx_type_used[i][j] / sum)
2168                 : (j ? 0 : MAX_TX_TYPE_PROB);
2169 #if CONFIG_FPMT_TEST
2170         if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2171           if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2172               0) {
2173             int prob =
2174                 (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2175                  new_prob) >>
2176                 1;
2177             left -= prob;
2178             if (j == 0) prob += left;
2179             temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2180                 prob;
2181             // Copy temp_frame_probs_simulation to temp_frame_probs
2182             for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2183                  update_type_idx++) {
2184               temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2185                   temp_frame_probs_simulation
2186                       ->tx_type_probs[update_type_idx][i][j];
2187             }
2188           }
2189           update_txtype_frameprobs = 0;
2190         }
2191 #endif  // CONFIG_FPMT_TEST
2192         // Track the frame probabilities of parallel encode frames to update
2193         // during postencode stage.
2194         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2195           update_txtype_frameprobs = 0;
2196           cpi->frame_new_probs[cpi->num_frame_recode]
2197               .tx_type_probs[update_type][i][j] = new_prob;
2198         }
2199         if (update_txtype_frameprobs) {
2200           int prob =
2201               (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2202           left -= prob;
2203           if (j == 0) prob += left;
2204           frame_probs->tx_type_probs[update_type][i][j] = prob;
2205         }
2206       }
2207     }
2208   }
2209 
2210   if (cm->seg.enabled) {
2211     cm->seg.temporal_update = 1;
2212     if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2213       cm->seg.temporal_update = 0;
2214   }
2215 
2216   if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2217       cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2218     const FRAME_UPDATE_TYPE update_type =
2219         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2220 
2221     for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2222       int sum = 0;
2223       int update_obmc_frameprobs = 1;
2224       for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2225 
2226       const int new_prob =
2227           sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2228 #if CONFIG_FPMT_TEST
2229       if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2230         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2231           temp_frame_probs_simulation->obmc_probs[update_type][i] =
2232               (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2233                new_prob) >>
2234               1;
2235           // Copy temp_frame_probs_simulation to temp_frame_probs
2236           for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2237                update_type_idx++) {
2238             temp_frame_probs->obmc_probs[update_type_idx][i] =
2239                 temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2240           }
2241         }
2242         update_obmc_frameprobs = 0;
2243       }
2244 #endif  // CONFIG_FPMT_TEST
2245       // Track the frame probabilities of parallel encode frames to update
2246       // during postencode stage.
2247       if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2248         update_obmc_frameprobs = 0;
2249         cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2250             new_prob;
2251       }
2252       if (update_obmc_frameprobs) {
2253         frame_probs->obmc_probs[update_type][i] =
2254             (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2255       }
2256     }
2257   }
2258 
2259   if (features->allow_warped_motion &&
2260       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2261     const FRAME_UPDATE_TYPE update_type =
2262         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2263     int update_warp_frameprobs = 1;
2264     int sum = 0;
2265     for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2266     const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2267 #if CONFIG_FPMT_TEST
2268     if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2269       if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2270         temp_frame_probs_simulation->warped_probs[update_type] =
2271             (temp_frame_probs_simulation->warped_probs[update_type] +
2272              new_prob) >>
2273             1;
2274         // Copy temp_frame_probs_simulation to temp_frame_probs
2275         for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2276              update_type_idx++) {
2277           temp_frame_probs->warped_probs[update_type_idx] =
2278               temp_frame_probs_simulation->warped_probs[update_type_idx];
2279         }
2280       }
2281       update_warp_frameprobs = 0;
2282     }
2283 #endif  // CONFIG_FPMT_TEST
2284     // Track the frame probabilities of parallel encode frames to update
2285     // during postencode stage.
2286     if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2287       update_warp_frameprobs = 0;
2288       cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2289           new_prob;
2290     }
2291     if (update_warp_frameprobs) {
2292       frame_probs->warped_probs[update_type] =
2293           (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2294     }
2295   }
2296 
2297   if (cm->current_frame.frame_type != KEY_FRAME &&
2298       cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2299       features->interp_filter == SWITCHABLE) {
2300     const FRAME_UPDATE_TYPE update_type =
2301         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2302 
2303     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2304       int sum = 0;
2305       int j;
2306       int left = 1536;
2307 
2308       for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2309         sum += cpi->td.counts->switchable_interp[i][j];
2310       }
2311 
2312       for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2313         int update_interpfilter_frameprobs = 1;
2314         const int new_prob =
2315             sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2316                 : (j ? 0 : 1536);
2317 #if CONFIG_FPMT_TEST
2318         if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2319           if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2320               0) {
2321             int prob = (temp_frame_probs_simulation
2322                             ->switchable_interp_probs[update_type][i][j] +
2323                         new_prob) >>
2324                        1;
2325             left -= prob;
2326             if (j == 0) prob += left;
2327             temp_frame_probs_simulation
2328                 ->switchable_interp_probs[update_type][i][j] = prob;
2329             // Copy temp_frame_probs_simulation to temp_frame_probs
2330             for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2331                  update_type_idx++) {
2332               temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2333                   temp_frame_probs_simulation
2334                       ->switchable_interp_probs[update_type_idx][i][j];
2335             }
2336           }
2337           update_interpfilter_frameprobs = 0;
2338         }
2339 #endif  // CONFIG_FPMT_TEST
2340         // Track the frame probabilities of parallel encode frames to update
2341         // during postencode stage.
2342         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2343           update_interpfilter_frameprobs = 0;
2344           cpi->frame_new_probs[cpi->num_frame_recode]
2345               .switchable_interp_probs[update_type][i][j] = new_prob;
2346         }
2347         if (update_interpfilter_frameprobs) {
2348           int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2349                       new_prob) >>
2350                      1;
2351           left -= prob;
2352           if (j == 0) prob += left;
2353           frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2354         }
2355       }
2356     }
2357   }
2358   if (hash_table_created) {
2359     av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2360   }
2361 }
2362 
2363 /*!\brief Setup reference frame buffers and encode a frame
2364  *
2365  * \ingroup high_level_algo
2366  * \callgraph
2367  * \callergraph
2368  *
2369  * \param[in]    cpi    Top-level encoder structure
2370  */
av1_encode_frame(AV1_COMP * cpi)2371 void av1_encode_frame(AV1_COMP *cpi) {
2372   AV1_COMMON *const cm = &cpi->common;
2373   CurrentFrame *const current_frame = &cm->current_frame;
2374   FeatureFlags *const features = &cm->features;
2375   RD_COUNTS *const rdc = &cpi->td.rd_counts;
2376   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2377   // Indicates whether or not to use a default reduced set for ext-tx
2378   // rather than the potential full set of 16 transforms
2379   features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2380 
2381   // Make sure segment_id is no larger than last_active_segid.
2382   if (cm->seg.enabled && cm->seg.update_map) {
2383     const int mi_rows = cm->mi_params.mi_rows;
2384     const int mi_cols = cm->mi_params.mi_cols;
2385     const int last_active_segid = cm->seg.last_active_segid;
2386     uint8_t *map = cpi->enc_seg.map;
2387     for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2388       for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2389         map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2390       }
2391       map += mi_cols;
2392     }
2393   }
2394 
2395   av1_setup_frame_buf_refs(cm);
2396   enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2397                          cm->cur_frame->ref_display_order_hint,
2398                          cm->current_frame.display_order_hint);
2399   set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2400                      cpi->ref_frame_flags);
2401   av1_setup_frame_sign_bias(cm);
2402 
2403   // If global motion is enabled, then every buffer which is used as either
2404   // a source or a ref frame should have an image pyramid allocated.
2405   // Check here so that issues can be caught early in debug mode
2406 #if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2407   if (cpi->alloc_pyramid) {
2408     assert(cpi->source->y_pyramid);
2409     for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2410       const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2411       if (buf != NULL) {
2412         assert(buf->buf.y_pyramid);
2413       }
2414     }
2415   }
2416 #endif  // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2417 
2418 #if CONFIG_MISMATCH_DEBUG
2419   mismatch_reset_frame(av1_num_planes(cm));
2420 #endif
2421 
2422   rdc->newmv_or_intra_blocks = 0;
2423   cpi->palette_pixel_num = 0;
2424 
2425   if (cpi->sf.hl_sf.frame_parameter_update ||
2426       cpi->sf.rt_sf.use_comp_ref_nonrd) {
2427     if (frame_is_intra_only(cm))
2428       current_frame->reference_mode = SINGLE_REFERENCE;
2429     else
2430       current_frame->reference_mode = REFERENCE_MODE_SELECT;
2431 
2432     features->interp_filter = SWITCHABLE;
2433     if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2434 
2435     features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2436         features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2437 
2438     rdc->compound_ref_used_flag = 0;
2439     rdc->skip_mode_used_flag = 0;
2440 
2441     encode_frame_internal(cpi);
2442 
2443     if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2444       // Use a flag that includes 4x4 blocks
2445       if (rdc->compound_ref_used_flag == 0) {
2446         current_frame->reference_mode = SINGLE_REFERENCE;
2447 #if CONFIG_ENTROPY_STATS
2448         av1_zero(cpi->td.counts->comp_inter);
2449 #endif  // CONFIG_ENTROPY_STATS
2450       }
2451     }
2452     // Re-check on the skip mode status as reference mode may have been
2453     // changed.
2454     SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
2455     if (frame_is_intra_only(cm) ||
2456         current_frame->reference_mode == SINGLE_REFERENCE) {
2457       skip_mode_info->skip_mode_allowed = 0;
2458       skip_mode_info->skip_mode_flag = 0;
2459     }
2460     if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2461       skip_mode_info->skip_mode_flag = 0;
2462 
2463     if (!cm->tiles.large_scale) {
2464       if (features->tx_mode == TX_MODE_SELECT &&
2465           cpi->td.mb.txfm_search_info.txb_split_count == 0)
2466         features->tx_mode = TX_MODE_LARGEST;
2467     }
2468   } else {
2469     // This is needed if real-time speed setting is changed on the fly
2470     // from one using compound prediction to one using single reference.
2471     if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2472       current_frame->reference_mode = SINGLE_REFERENCE;
2473     encode_frame_internal(cpi);
2474   }
2475 }
2476