• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17 
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21 
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_util/aom_pthread.h"
27 #if CONFIG_MISMATCH_DEBUG
28 #include "aom_util/debug_util.h"
29 #endif  // CONFIG_MISMATCH_DEBUG
30 
31 #include "av1/common/cfl.h"
32 #include "av1/common/common.h"
33 #include "av1/common/common_data.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46 
47 #include "av1/encoder/allintra_vis.h"
48 #include "av1/encoder/aq_complexity.h"
49 #include "av1/encoder/aq_cyclicrefresh.h"
50 #include "av1/encoder/aq_variance.h"
51 #include "av1/encoder/global_motion_facade.h"
52 #include "av1/encoder/encodeframe.h"
53 #include "av1/encoder/encodeframe_utils.h"
54 #include "av1/encoder/encodemb.h"
55 #include "av1/encoder/encodemv.h"
56 #include "av1/encoder/encodetxb.h"
57 #include "av1/encoder/ethread.h"
58 #include "av1/encoder/extend.h"
59 #include "av1/encoder/intra_mode_search_utils.h"
60 #include "av1/encoder/ml.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/partition_strategy.h"
63 #if !CONFIG_REALTIME_ONLY
64 #include "av1/encoder/partition_model_weights.h"
65 #endif
66 #include "av1/encoder/partition_search.h"
67 #include "av1/encoder/rd.h"
68 #include "av1/encoder/rdopt.h"
69 #include "av1/encoder/reconinter_enc.h"
70 #include "av1/encoder/segmentation.h"
71 #include "av1/encoder/tokenize.h"
72 #include "av1/encoder/tpl_model.h"
73 #include "av1/encoder/var_based_part.h"
74 
75 #if CONFIG_TUNE_VMAF
76 #include "av1/encoder/tune_vmaf.h"
77 #endif
78 
79 /*!\cond */
80 // This is used as a reference when computing the source variance for the
81 //  purposes of activity masking.
82 // Eventually this should be replaced by custom no-reference routines,
83 //  which will be faster.
84 static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
85   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93   128, 128, 128, 128, 128, 128, 128, 128
94 };
95 
96 #if CONFIG_AV1_HIGHBITDEPTH
97 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
98   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106   128, 128, 128, 128, 128, 128, 128, 128
107 };
108 
109 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
110   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
126 };
127 
128 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
129   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147   128 * 16, 128 * 16
148 };
149 #endif  // CONFIG_AV1_HIGHBITDEPTH
150 /*!\endcond */
151 
152 // For the given bit depth, returns a constant array used to assist the
153 // calculation of source block variance, which will then be used to decide
154 // adaptive quantizers.
get_var_offs(int use_hbd,int bd)155 static const uint8_t *get_var_offs(int use_hbd, int bd) {
156 #if CONFIG_AV1_HIGHBITDEPTH
157   if (use_hbd) {
158     assert(bd == 8 || bd == 10 || bd == 12);
159     const int off_index = (bd - 8) >> 1;
160     static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
161                                                 AV1_HIGH_VAR_OFFS_10,
162                                                 AV1_HIGH_VAR_OFFS_12 };
163     return CONVERT_TO_BYTEPTR(high_var_offs[off_index]);
164   }
165 #else
166   (void)use_hbd;
167   (void)bd;
168   assert(!use_hbd);
169 #endif
170   assert(bd == 8);
171   return AV1_VAR_OFFS;
172 }
173 
av1_init_rtc_counters(MACROBLOCK * const x)174 void av1_init_rtc_counters(MACROBLOCK *const x) {
175   av1_init_cyclic_refresh_counters(x);
176   x->cnt_zeromv = 0;
177 }
178 
av1_accumulate_rtc_counters(AV1_COMP * cpi,const MACROBLOCK * const x)179 void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
180   if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
181     av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
182   cpi->rc.cnt_zeromv += x->cnt_zeromv;
183 }
184 
av1_get_perpixel_variance(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane,int use_hbd)185 unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
186                                        const MACROBLOCKD *xd,
187                                        const struct buf_2d *ref,
188                                        BLOCK_SIZE bsize, int plane,
189                                        int use_hbd) {
190   const int subsampling_x = xd->plane[plane].subsampling_x;
191   const int subsampling_y = xd->plane[plane].subsampling_y;
192   const BLOCK_SIZE plane_bsize =
193       get_plane_block_size(bsize, subsampling_x, subsampling_y);
194   unsigned int sse;
195   const unsigned int var = cpi->ppi->fn_ptr[plane_bsize].vf(
196       ref->buf, ref->stride, get_var_offs(use_hbd, xd->bd), 0, &sse);
197   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
198 }
199 
av1_get_perpixel_variance_facade(const AV1_COMP * cpi,const MACROBLOCKD * xd,const struct buf_2d * ref,BLOCK_SIZE bsize,int plane)200 unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
201                                               const MACROBLOCKD *xd,
202                                               const struct buf_2d *ref,
203                                               BLOCK_SIZE bsize, int plane) {
204   const int use_hbd = is_cur_buf_hbd(xd);
205   return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
206 }
207 
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)208 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
209                           int mi_row, int mi_col, const int num_planes,
210                           BLOCK_SIZE bsize) {
211   // Set current frame pointer.
212   x->e_mbd.cur_buf = src;
213 
214   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
215   // the static analysis warnings.
216   for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
217     const int is_uv = i > 0;
218     setup_pred_plane(
219         &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
220         src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
221         x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
222   }
223 }
224 
225 #if !CONFIG_REALTIME_ONLY
226 /*!\brief Assigns different quantization parameters to each super
227  * block based on its TPL weight.
228  *
229  * \ingroup tpl_modelling
230  *
231  * \param[in]     cpi         Top level encoder instance structure
232  * \param[in,out] td          Thread data structure
233  * \param[in,out] x           Macro block level data for this block.
234  * \param[in]     tile_info   Tile infromation / identification
235  * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
236  * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
237  * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
238  *
239  * \remark No return value but updates macroblock and thread data
240  * related to the q / q delta to be used.
241  */
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)242 static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
243                                      MACROBLOCK *const x,
244                                      const TileInfo *const tile_info,
245                                      int mi_row, int mi_col, int num_planes) {
246   AV1_COMMON *const cm = &cpi->common;
247   const CommonModeInfoParams *const mi_params = &cm->mi_params;
248   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
249   assert(delta_q_info->delta_q_present_flag);
250 
251   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
252   // Delta-q modulation based on variance
253   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
254 
255   const int delta_q_res = delta_q_info->delta_q_res;
256   int current_qindex = cm->quant_params.base_qindex;
257   if (cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.qp_mode ==
258                                    DUCKY_ENCODE_FRAME_MODE_QINDEX) {
259     const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
260     const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
261     const int sb_cols =
262         CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
263     const int sb_index = sb_row * sb_cols + sb_col;
264     current_qindex =
265         cpi->ducky_encode_info.frame_info.superblock_encode_qindex[sb_index];
266   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
267     if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
268       const int block_wavelet_energy_level =
269           av1_block_wavelet_energy_level(cpi, x, sb_size);
270       x->sb_energy_level = block_wavelet_energy_level;
271       current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
272           cpi, block_wavelet_energy_level);
273     } else {
274       const int block_var_level = av1_log_block_var(cpi, x, sb_size);
275       x->sb_energy_level = block_var_level;
276       current_qindex =
277           av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
278     }
279   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
280              cpi->oxcf.algo_cfg.enable_tpl_model) {
281     // Setup deltaq based on tpl stats
282     current_qindex =
283         av1_get_q_for_deltaq_objective(cpi, td, NULL, sb_size, mi_row, mi_col);
284   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
285     current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
286   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
287     current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
288   } else if (cpi->oxcf.q_cfg.enable_hdr_deltaq) {
289     current_qindex = av1_get_q_for_hdr(cpi, x, sb_size, mi_row, mi_col);
290   }
291 
292   x->rdmult_cur_qindex = current_qindex;
293   MACROBLOCKD *const xd = &x->e_mbd;
294   const int adjusted_qindex = av1_adjust_q_from_delta_q_res(
295       delta_q_res, xd->current_base_qindex, current_qindex);
296   if (cpi->use_ducky_encode) {
297     assert(adjusted_qindex == current_qindex);
298   }
299   current_qindex = adjusted_qindex;
300 
301   x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
302   x->rdmult_delta_qindex = x->delta_qindex;
303 
304   av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
305   xd->mi[0]->current_qindex = current_qindex;
306   av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
307 
308   // keep track of any non-zero delta-q used
309   td->deltaq_used |= (x->delta_qindex != 0);
310 
311   if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
312     const int delta_lf_res = delta_q_info->delta_lf_res;
313     const int lfmask = ~(delta_lf_res - 1);
314     const int delta_lf_from_base =
315         ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
316     const int8_t delta_lf =
317         (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
318     const int frame_lf_count =
319         av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
320     const int mib_size = cm->seq_params->mib_size;
321 
322     // pre-set the delta lf for loop filter. Note that this value is set
323     // before mi is assigned for each block in current superblock
324     for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
325       for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
326         const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
327         mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
328         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
329           mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
330         }
331       }
332     }
333   }
334 }
335 
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)336 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
337                                  int mi_col) {
338   const AV1_COMMON *cm = &cpi->common;
339   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
340   const CommonModeInfoParams *const mi_params = &cm->mi_params;
341   MACROBLOCK *x = &td->mb;
342   const int frame_idx = cpi->gf_frame_index;
343   TplParams *const tpl_data = &cpi->ppi->tpl_data;
344   const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
345 
346   av1_zero(x->tpl_keep_ref_frame);
347 
348   if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
349   if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
350   if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
351 
352   const int is_overlay =
353       cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
354   if (is_overlay) {
355     memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
356     return;
357   }
358 
359   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
360   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
361   const int tpl_stride = tpl_frame->stride;
362   int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
363   const int step = 1 << block_mis_log2;
364   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
365 
366   const int mi_row_end =
367       AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
368   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
369   const int mi_col_sr =
370       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
371   const int mi_col_end_sr =
372       AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
373                                   cm->superres_scale_denominator),
374              mi_cols_sr);
375   const int row_step = step;
376   const int col_step_sr =
377       coded_to_superres_mi(step, cm->superres_scale_denominator);
378   for (int row = mi_row; row < mi_row_end; row += row_step) {
379     for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
380       const TplDepStats *this_stats =
381           &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
382       int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
383       // Find the winner ref frame idx for the current block
384       int64_t best_inter_cost = this_stats->pred_error[0];
385       int best_rf_idx = 0;
386       for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
387         if ((this_stats->pred_error[idx] < best_inter_cost) &&
388             (this_stats->pred_error[idx] != 0)) {
389           best_inter_cost = this_stats->pred_error[idx];
390           best_rf_idx = idx;
391         }
392       }
393       // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
394       // LAST_FRAME.
395       tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
396                                     this_stats->pred_error[LAST_FRAME - 1];
397 
398       for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
399         inter_cost[rf_idx] += tpl_pred_error[rf_idx];
400     }
401   }
402 
403   int rank_index[INTER_REFS_PER_FRAME - 1];
404   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
405     rank_index[idx] = idx + 1;
406     for (int i = idx; i > 0; --i) {
407       if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
408         const int tmp = rank_index[i - 1];
409         rank_index[i - 1] = rank_index[i];
410         rank_index[i] = tmp;
411       }
412     }
413   }
414 
415   x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
416   x->tpl_keep_ref_frame[LAST_FRAME] = 1;
417 
418   int cutoff_ref = 0;
419   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
420     x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
421     if (idx > 2) {
422       if (!cutoff_ref) {
423         // If the predictive coding gains are smaller than the previous more
424         // relevant frame over certain amount, discard this frame and all the
425         // frames afterwards.
426         if (llabs(inter_cost[rank_index[idx]]) <
427                 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
428             inter_cost[rank_index[idx]] == 0)
429           cutoff_ref = 1;
430       }
431 
432       if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
433     }
434   }
435 }
436 
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)437 static AOM_INLINE void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
438                                                int mi_row, int mi_col) {
439   const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
440   const int orig_rdmult = cpi->rd.RDMULT;
441 
442   assert(IMPLIES(cpi->ppi->gf_group.size > 0,
443                  cpi->gf_frame_index < cpi->ppi->gf_group.size));
444   const int gf_group_index = cpi->gf_frame_index;
445   if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
446       cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
447       cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
448     const int dr =
449         av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
450     x->rdmult = dr;
451   }
452 }
453 #endif  // !CONFIG_REALTIME_ONLY
454 
455 #if CONFIG_RT_ML_PARTITIONING
456 // Get a prediction(stored in x->est_pred) for the whole superblock.
get_estimated_pred(AV1_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)457 static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
458                                MACROBLOCK *x, int mi_row, int mi_col) {
459   AV1_COMMON *const cm = &cpi->common;
460   const int is_key_frame = frame_is_intra_only(cm);
461   MACROBLOCKD *xd = &x->e_mbd;
462 
463   // TODO(kyslov) Extend to 128x128
464   assert(cm->seq_params->sb_size == BLOCK_64X64);
465 
466   av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
467 
468   if (!is_key_frame) {
469     MB_MODE_INFO *mi = xd->mi[0];
470     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
471 
472     assert(yv12 != NULL);
473 
474     av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
475                          get_ref_scale_factors(cm, LAST_FRAME), 1);
476     mi->ref_frame[0] = LAST_FRAME;
477     mi->ref_frame[1] = NONE;
478     mi->bsize = BLOCK_64X64;
479     mi->mv[0].as_int = 0;
480     mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
481 
482     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
483 
484     xd->plane[0].dst.buf = x->est_pred;
485     xd->plane[0].dst.stride = 64;
486     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
487   } else {
488 #if CONFIG_AV1_HIGHBITDEPTH
489     switch (xd->bd) {
490       case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
491       case 10:
492         memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
493         break;
494       case 12:
495         memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
496         break;
497     }
498 #else
499     memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
500 #endif  // CONFIG_VP9_HIGHBITDEPTH
501   }
502 }
503 #endif  // CONFIG_RT_ML_PARTITIONING
504 
505 #define AVG_CDF_WEIGHT_LEFT 3
506 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
507 
508 /*!\brief Encode a superblock (minimal RD search involved)
509  *
510  * \ingroup partition_search
511  * Encodes the superblock by a pre-determined partition pattern, only minor
512  * rd-based searches are allowed to adjust the initial pattern. It is only used
513  * by realtime encoding.
514  */
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)515 static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
516                                        TileDataEnc *tile_data, TokenExtra **tp,
517                                        const int mi_row, const int mi_col,
518                                        const int seg_skip) {
519   AV1_COMMON *const cm = &cpi->common;
520   MACROBLOCK *const x = &td->mb;
521   const SPEED_FEATURES *const sf = &cpi->sf;
522   const TileInfo *const tile_info = &tile_data->tile_info;
523   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
524                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
525   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
526   PC_TREE *const pc_root = td->pc_root;
527 
528 #if CONFIG_RT_ML_PARTITIONING
529   if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
530     RD_STATS dummy_rdc;
531     get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
532     av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
533                              BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
534     return;
535   }
536 #endif
537   // Set the partition
538   if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
539       (sf->rt_sf.use_fast_fixed_part && x->sb_force_fixed_part == 1 &&
540        (!frame_is_intra_only(cm) &&
541         (!cpi->ppi->use_svc ||
542          !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)))) {
543     // set a fixed-size partition
544     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
545     BLOCK_SIZE bsize_select = sf->part_sf.fixed_partition_size;
546     if (sf->rt_sf.use_fast_fixed_part &&
547         x->content_state_sb.source_sad_nonrd < kLowSad) {
548       bsize_select = BLOCK_64X64;
549     }
550     const BLOCK_SIZE bsize = seg_skip ? sb_size : bsize_select;
551     av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
552   } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
553     // set a variance-based partition
554     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
555     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
556   }
557   assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
558          sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
559   set_cb_offsets(td->mb.cb_offset, 0, 0);
560 
561   // Initialize the flag to skip cdef to 1.
562   if (sf->rt_sf.skip_cdef_sb) {
563     const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
564     // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
565     // "blocks".
566     for (int r = 0; r < block64_in_sb; ++r) {
567       for (int c = 0; c < block64_in_sb; ++c) {
568         const int idx_in_sb =
569             r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
570         if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
571       }
572     }
573   }
574 
575 #if CONFIG_COLLECT_COMPONENT_TIMING
576   start_timing(cpi, nonrd_use_partition_time);
577 #endif
578   av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
579                           pc_root);
580 #if CONFIG_COLLECT_COMPONENT_TIMING
581   end_timing(cpi, nonrd_use_partition_time);
582 #endif
583 }
584 
585 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)586 static INLINE void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
587                                      const TileDataEnc *tile_data,
588                                      SIMPLE_MOTION_DATA_TREE *sms_root,
589                                      RD_STATS *rd_cost, int mi_row, int mi_col,
590                                      int gather_tpl_data) {
591   const AV1_COMMON *cm = &cpi->common;
592   const TileInfo *tile_info = &tile_data->tile_info;
593   MACROBLOCK *x = &td->mb;
594 
595   const SPEED_FEATURES *sf = &cpi->sf;
596   const int use_simple_motion_search =
597       (sf->part_sf.simple_motion_search_split ||
598        sf->part_sf.simple_motion_search_prune_rect ||
599        sf->part_sf.simple_motion_search_early_term_none ||
600        sf->part_sf.ml_early_term_after_part_split_level) &&
601       !frame_is_intra_only(cm);
602   if (use_simple_motion_search) {
603     av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
604                                              mi_row, mi_col);
605   }
606 
607 #if !CONFIG_REALTIME_ONLY
608   if (!(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
609         cpi->oxcf.gf_cfg.lag_in_frames == 0)) {
610     init_ref_frame_space(cpi, td, mi_row, mi_col);
611     x->sb_energy_level = 0;
612     x->part_search_info.cnn_output_valid = 0;
613     if (gather_tpl_data) {
614       if (cm->delta_q_info.delta_q_present_flag) {
615         const int num_planes = av1_num_planes(cm);
616         const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
617         setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
618         av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
619       }
620 
621       // TODO(jingning): revisit this function.
622       if (cpi->oxcf.algo_cfg.enable_tpl_model && (0)) {
623         adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
624       }
625     }
626   }
627 #else
628   (void)tile_info;
629   (void)mi_row;
630   (void)mi_col;
631   (void)gather_tpl_data;
632 #endif
633 
634   x->reuse_inter_pred = false;
635   x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
636   reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
637   av1_zero(x->picked_ref_frames_mask);
638   av1_invalid_rd_stats(rd_cost);
639 }
640 
641 #if !CONFIG_REALTIME_ONLY
sb_qp_sweep_init_quantizers(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_tree,RD_STATS * rd_cost,int mi_row,int mi_col,int delta_qp_ofs)642 static void sb_qp_sweep_init_quantizers(AV1_COMP *cpi, ThreadData *td,
643                                         const TileDataEnc *tile_data,
644                                         SIMPLE_MOTION_DATA_TREE *sms_tree,
645                                         RD_STATS *rd_cost, int mi_row,
646                                         int mi_col, int delta_qp_ofs) {
647   AV1_COMMON *const cm = &cpi->common;
648   MACROBLOCK *const x = &td->mb;
649   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
650   const TileInfo *tile_info = &tile_data->tile_info;
651   const CommonModeInfoParams *const mi_params = &cm->mi_params;
652   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
653   assert(delta_q_info->delta_q_present_flag);
654   const int delta_q_res = delta_q_info->delta_q_res;
655 
656   const SPEED_FEATURES *sf = &cpi->sf;
657   const int use_simple_motion_search =
658       (sf->part_sf.simple_motion_search_split ||
659        sf->part_sf.simple_motion_search_prune_rect ||
660        sf->part_sf.simple_motion_search_early_term_none ||
661        sf->part_sf.ml_early_term_after_part_split_level) &&
662       !frame_is_intra_only(cm);
663   if (use_simple_motion_search) {
664     av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_tree,
665                                              mi_row, mi_col);
666   }
667 
668   int current_qindex = x->rdmult_cur_qindex + delta_qp_ofs;
669 
670   MACROBLOCKD *const xd = &x->e_mbd;
671   current_qindex = av1_adjust_q_from_delta_q_res(
672       delta_q_res, xd->current_base_qindex, current_qindex);
673 
674   x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
675 
676   av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
677   xd->mi[0]->current_qindex = current_qindex;
678   av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id, 0);
679 
680   // keep track of any non-zero delta-q used
681   td->deltaq_used |= (x->delta_qindex != 0);
682 
683   if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
684     const int delta_lf_res = delta_q_info->delta_lf_res;
685     const int lfmask = ~(delta_lf_res - 1);
686     const int delta_lf_from_base =
687         ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
688     const int8_t delta_lf =
689         (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
690     const int frame_lf_count =
691         av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
692     const int mib_size = cm->seq_params->mib_size;
693 
694     // pre-set the delta lf for loop filter. Note that this value is set
695     // before mi is assigned for each block in current superblock
696     for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
697       for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
698         const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
699         mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
700         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
701           mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
702         }
703       }
704     }
705   }
706 
707   x->reuse_inter_pred = false;
708   x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
709   reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
710   av1_zero(x->picked_ref_frames_mask);
711   av1_invalid_rd_stats(rd_cost);
712 }
713 
sb_qp_sweep(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,SIMPLE_MOTION_DATA_TREE * sms_tree,SB_FIRST_PASS_STATS * sb_org_stats)714 static int sb_qp_sweep(AV1_COMP *const cpi, ThreadData *td,
715                        TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
716                        int mi_col, BLOCK_SIZE bsize,
717                        SIMPLE_MOTION_DATA_TREE *sms_tree,
718                        SB_FIRST_PASS_STATS *sb_org_stats) {
719   AV1_COMMON *const cm = &cpi->common;
720   MACROBLOCK *const x = &td->mb;
721   RD_STATS rdc_winner, cur_rdc;
722   av1_invalid_rd_stats(&rdc_winner);
723 
724   int best_qindex = td->mb.rdmult_delta_qindex;
725   const int start = cm->current_frame.frame_type == KEY_FRAME ? -20 : -12;
726   const int end = cm->current_frame.frame_type == KEY_FRAME ? 20 : 12;
727   const int step = cm->delta_q_info.delta_q_res;
728 
729   for (int sweep_qp_delta = start; sweep_qp_delta <= end;
730        sweep_qp_delta += step) {
731     sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_tree, &cur_rdc, mi_row,
732                                 mi_col, sweep_qp_delta);
733 
734     const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
735     const int backup_current_qindex =
736         cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
737 
738     av1_reset_mbmi(&cm->mi_params, bsize, mi_row, mi_col);
739     av1_restore_sb_state(sb_org_stats, cpi, td, tile_data, mi_row, mi_col);
740     cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = backup_current_qindex;
741 
742     td->pc_root = av1_alloc_pc_tree_node(bsize);
743     if (!td->pc_root)
744       aom_internal_error(x->e_mbd.error_info, AOM_CODEC_MEM_ERROR,
745                          "Failed to allocate PC_TREE");
746     av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
747                           &cur_rdc, cur_rdc, td->pc_root, sms_tree, NULL,
748                           SB_DRY_PASS, NULL);
749 
750     if ((rdc_winner.rdcost > cur_rdc.rdcost) ||
751         (abs(sweep_qp_delta) < abs(best_qindex - x->rdmult_delta_qindex) &&
752          rdc_winner.rdcost == cur_rdc.rdcost)) {
753       rdc_winner = cur_rdc;
754       best_qindex = x->rdmult_delta_qindex + sweep_qp_delta;
755     }
756   }
757 
758   return best_qindex;
759 }
760 #endif  //! CONFIG_REALTIME_ONLY
761 
762 /*!\brief Encode a superblock (RD-search-based)
763  *
764  * \ingroup partition_search
765  * Conducts partition search for a superblock, based on rate-distortion costs,
766  * from scratch or adjusting from a pre-calculated partition pattern.
767  */
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)768 static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
769                                     TileDataEnc *tile_data, TokenExtra **tp,
770                                     const int mi_row, const int mi_col,
771                                     const int seg_skip) {
772   AV1_COMMON *const cm = &cpi->common;
773   MACROBLOCK *const x = &td->mb;
774   MACROBLOCKD *const xd = &x->e_mbd;
775   const SPEED_FEATURES *const sf = &cpi->sf;
776   const TileInfo *const tile_info = &tile_data->tile_info;
777   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
778                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
779   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
780   const int num_planes = av1_num_planes(cm);
781   int dummy_rate;
782   int64_t dummy_dist;
783   RD_STATS dummy_rdc;
784   SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
785 
786 #if CONFIG_REALTIME_ONLY
787   (void)seg_skip;
788 #endif  // CONFIG_REALTIME_ONLY
789 
790   init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
791                     1);
792 
793   // Encode the superblock
794   if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
795     // partition search starting from a variance-based partition
796     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
797     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
798 
799 #if CONFIG_COLLECT_COMPONENT_TIMING
800     start_timing(cpi, rd_use_partition_time);
801 #endif
802     td->pc_root = av1_alloc_pc_tree_node(sb_size);
803     if (!td->pc_root)
804       aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
805                          "Failed to allocate PC_TREE");
806     av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
807                          &dummy_rate, &dummy_dist, 1, td->pc_root);
808     av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
809                                sf->part_sf.partition_search_type);
810     td->pc_root = NULL;
811 #if CONFIG_COLLECT_COMPONENT_TIMING
812     end_timing(cpi, rd_use_partition_time);
813 #endif
814   }
815 #if !CONFIG_REALTIME_ONLY
816   else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
817     // partition search by adjusting a fixed-size partition
818     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
819     const BLOCK_SIZE bsize =
820         seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
821     av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
822     td->pc_root = av1_alloc_pc_tree_node(sb_size);
823     if (!td->pc_root)
824       aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
825                          "Failed to allocate PC_TREE");
826     av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
827                          &dummy_rate, &dummy_dist, 1, td->pc_root);
828     av1_free_pc_tree_recursive(td->pc_root, num_planes, 0, 0,
829                                sf->part_sf.partition_search_type);
830     td->pc_root = NULL;
831   } else {
832     // The most exhaustive recursive partition search
833     SuperBlockEnc *sb_enc = &x->sb_enc;
834     // No stats for overlay frames. Exclude key frame.
835     av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
836 
837     // Reset the tree for simple motion search data
838     av1_reset_simple_motion_tree_partition(sms_root, sb_size);
839 
840 #if CONFIG_COLLECT_COMPONENT_TIMING
841     start_timing(cpi, rd_pick_partition_time);
842 #endif
843 
844     // Estimate the maximum square partition block size, which will be used
845     // as the starting block size for partitioning the sb
846     set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
847 
848     // The superblock can be searched only once, or twice consecutively for
849     // better quality. Note that the meaning of passes here is different from
850     // the general concept of 1-pass/2-pass encoders.
851     const int num_passes =
852         cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
853 
854     if (cpi->oxcf.sb_qp_sweep &&
855         !(has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
856           cpi->oxcf.gf_cfg.lag_in_frames == 0) &&
857         cm->delta_q_info.delta_q_present_flag) {
858       AOM_CHECK_MEM_ERROR(
859           x->e_mbd.error_info, td->mb.sb_stats_cache,
860           (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_stats_cache)));
861       av1_backup_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
862                           mi_col);
863       assert(x->rdmult_delta_qindex == x->delta_qindex);
864 
865       const int best_qp_diff =
866           sb_qp_sweep(cpi, td, tile_data, tp, mi_row, mi_col, sb_size, sms_root,
867                       td->mb.sb_stats_cache) -
868           x->rdmult_delta_qindex;
869 
870       sb_qp_sweep_init_quantizers(cpi, td, tile_data, sms_root, &dummy_rdc,
871                                   mi_row, mi_col, best_qp_diff);
872 
873       const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
874       const int backup_current_qindex =
875           cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
876 
877       av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
878       av1_restore_sb_state(td->mb.sb_stats_cache, cpi, td, tile_data, mi_row,
879                            mi_col);
880 
881       cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
882           backup_current_qindex;
883       aom_free(td->mb.sb_stats_cache);
884       td->mb.sb_stats_cache = NULL;
885     }
886     if (num_passes == 1) {
887 #if CONFIG_PARTITION_SEARCH_ORDER
888       if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
889         av1_reset_part_sf(&cpi->sf.part_sf);
890         av1_reset_sf_for_ext_part(cpi);
891         RD_STATS this_rdc;
892         av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
893                                 mi_col, sb_size, &this_rdc);
894       } else {
895         td->pc_root = av1_alloc_pc_tree_node(sb_size);
896         if (!td->pc_root)
897           aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
898                              "Failed to allocate PC_TREE");
899         av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
900                               &dummy_rdc, dummy_rdc, td->pc_root, sms_root,
901                               NULL, SB_SINGLE_PASS, NULL);
902       }
903 #else
904       td->pc_root = av1_alloc_pc_tree_node(sb_size);
905       if (!td->pc_root)
906         aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
907                            "Failed to allocate PC_TREE");
908       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
909                             &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
910                             SB_SINGLE_PASS, NULL);
911 #endif  // CONFIG_PARTITION_SEARCH_ORDER
912     } else {
913       // First pass
914       AOM_CHECK_MEM_ERROR(
915           x->e_mbd.error_info, td->mb.sb_fp_stats,
916           (SB_FIRST_PASS_STATS *)aom_malloc(sizeof(*td->mb.sb_fp_stats)));
917       av1_backup_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
918                           mi_col);
919       td->pc_root = av1_alloc_pc_tree_node(sb_size);
920       if (!td->pc_root)
921         aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
922                            "Failed to allocate PC_TREE");
923       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
924                             &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
925                             SB_DRY_PASS, NULL);
926 
927       // Second pass
928       init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
929                         mi_col, 0);
930       av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
931       av1_reset_simple_motion_tree_partition(sms_root, sb_size);
932 
933       av1_restore_sb_state(td->mb.sb_fp_stats, cpi, td, tile_data, mi_row,
934                            mi_col);
935 
936       td->pc_root = av1_alloc_pc_tree_node(sb_size);
937       if (!td->pc_root)
938         aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
939                            "Failed to allocate PC_TREE");
940       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
941                             &dummy_rdc, dummy_rdc, td->pc_root, sms_root, NULL,
942                             SB_WET_PASS, NULL);
943       aom_free(td->mb.sb_fp_stats);
944       td->mb.sb_fp_stats = NULL;
945     }
946 
947     // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
948     sb_enc->tpl_data_count = 0;
949 #if CONFIG_COLLECT_COMPONENT_TIMING
950     end_timing(cpi, rd_pick_partition_time);
951 #endif
952   }
953 #endif  // !CONFIG_REALTIME_ONLY
954 
955   // Update the inter rd model
956   // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
957   if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
958       cm->tiles.cols == 1 && cm->tiles.rows == 1) {
959     av1_inter_mode_data_fit(tile_data, x->rdmult);
960   }
961 }
962 
963 // Check if the cost update of symbols mode, coeff and dv are tile or off.
is_mode_coeff_dv_upd_freq_tile_or_off(const AV1_COMP * const cpi)964 static AOM_INLINE int is_mode_coeff_dv_upd_freq_tile_or_off(
965     const AV1_COMP *const cpi) {
966   const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
967 
968   return (inter_sf->coeff_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
969           inter_sf->mode_cost_upd_level <= INTERNAL_COST_UPD_TILE &&
970           cpi->sf.intra_sf.dv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
971 }
972 
973 // When row-mt is enabled and cost update frequencies are set to off/tile,
974 // processing of current SB can start even before processing of top-right SB
975 // is finished. This function checks if it is sufficient to wait for top SB
976 // to finish processing before current SB starts processing.
delay_wait_for_top_right_sb(const AV1_COMP * const cpi)977 static AOM_INLINE int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
978   const MODE mode = cpi->oxcf.mode;
979   if (mode == GOOD) return 0;
980 
981   if (mode == ALLINTRA)
982     return is_mode_coeff_dv_upd_freq_tile_or_off(cpi);
983   else if (mode == REALTIME)
984     return (is_mode_coeff_dv_upd_freq_tile_or_off(cpi) &&
985             cpi->sf.inter_sf.mv_cost_upd_level <= INTERNAL_COST_UPD_TILE);
986   else
987     return 0;
988 }
989 
990 /*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
991  *
992  * \ingroup partition_search
993  * \callgraph
994  * \callergraph
995  */
get_sb_source_sad(const AV1_COMP * cpi,int mi_row,int mi_col)996 static AOM_INLINE uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
997                                              int mi_col) {
998   if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
999 
1000   const AV1_COMMON *const cm = &cpi->common;
1001   const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
1002                                    ? (cm->seq_params->mib_size >> 1)
1003                                    : cm->seq_params->mib_size;
1004   const int num_blk_64x64_cols =
1005       (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1006   const int num_blk_64x64_rows =
1007       (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
1008   const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
1009   const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
1010   uint64_t curr_sb_sad = UINT64_MAX;
1011   const uint64_t *const src_sad_blk_64x64_data =
1012       &cpi->src_sad_blk_64x64[blk_64x64_col_index +
1013                               blk_64x64_row_index * num_blk_64x64_cols];
1014   if (cm->seq_params->sb_size == BLOCK_128X128 &&
1015       blk_64x64_col_index + 1 < num_blk_64x64_cols &&
1016       blk_64x64_row_index + 1 < num_blk_64x64_rows) {
1017     // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
1018     // superblock
1019     curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
1020                   src_sad_blk_64x64_data[num_blk_64x64_cols] +
1021                   src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
1022   } else if (cm->seq_params->sb_size == BLOCK_64X64) {
1023     curr_sb_sad = src_sad_blk_64x64_data[0];
1024   }
1025   return curr_sb_sad;
1026 }
1027 
1028 /*!\brief Determine whether grading content can be skipped based on sad stat
1029  *
1030  * \ingroup partition_search
1031  * \callgraph
1032  * \callergraph
1033  */
is_calc_src_content_needed(AV1_COMP * cpi,MACROBLOCK * const x,int mi_row,int mi_col)1034 static AOM_INLINE bool is_calc_src_content_needed(AV1_COMP *cpi,
1035                                                   MACROBLOCK *const x,
1036                                                   int mi_row, int mi_col) {
1037   if (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
1038     return true;
1039   const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
1040   if (curr_sb_sad == UINT64_MAX) return true;
1041   if (curr_sb_sad == 0) {
1042     x->content_state_sb.source_sad_nonrd = kZeroSad;
1043     return false;
1044   }
1045   AV1_COMMON *const cm = &cpi->common;
1046   bool do_calc_src_content = true;
1047 
1048   if (cpi->oxcf.speed < 9) return do_calc_src_content;
1049 
1050   // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
1051   if (AOMMIN(cm->width, cm->height) < 360) {
1052     // Derive Average 64x64 block source SAD from SB source SAD
1053     const uint64_t avg_64x64_blk_sad =
1054         (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
1055                                                    : curr_sb_sad;
1056 
1057     // The threshold is determined based on kLowSad and kHighSad threshold and
1058     // test results.
1059     uint64_t thresh_low = 15000;
1060     uint64_t thresh_high = 40000;
1061 
1062     if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1063       thresh_low = thresh_low << 1;
1064       thresh_high = thresh_high << 1;
1065     }
1066 
1067     if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
1068       do_calc_src_content = false;
1069       // Note: set x->content_state_sb.source_sad_rd as well if this is extended
1070       // to RTC rd path.
1071       x->content_state_sb.source_sad_nonrd = kMedSad;
1072     }
1073   }
1074 
1075   return do_calc_src_content;
1076 }
1077 
1078 /*!\brief Determine whether grading content is needed based on sf and frame stat
1079  *
1080  * \ingroup partition_search
1081  * \callgraph
1082  * \callergraph
1083  */
1084 // TODO(any): consolidate sfs to make interface cleaner
grade_source_content_sb(AV1_COMP * cpi,MACROBLOCK * const x,TileDataEnc * tile_data,int mi_row,int mi_col)1085 static AOM_INLINE void grade_source_content_sb(AV1_COMP *cpi,
1086                                                MACROBLOCK *const x,
1087                                                TileDataEnc *tile_data,
1088                                                int mi_row, int mi_col) {
1089   AV1_COMMON *const cm = &cpi->common;
1090   if (cm->current_frame.frame_type == KEY_FRAME ||
1091       (cpi->ppi->use_svc &&
1092        cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
1093     assert(x->content_state_sb.source_sad_nonrd == kMedSad);
1094     assert(x->content_state_sb.source_sad_rd == kMedSad);
1095     return;
1096   }
1097   bool calc_src_content = false;
1098 
1099   if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
1100     if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
1101       calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
1102     } else {
1103       x->content_state_sb.source_sad_nonrd = kZeroSad;
1104     }
1105   } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
1106              (cm->width * cm->height <= 352 * 288)) {
1107     if (cpi->rc.frame_source_sad > 0)
1108       calc_src_content = true;
1109     else
1110       x->content_state_sb.source_sad_rd = kZeroSad;
1111   }
1112   if (calc_src_content)
1113     av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1114 }
1115 
1116 /*!\brief Encode a superblock row by breaking it into superblocks
1117  *
1118  * \ingroup partition_search
1119  * \callgraph
1120  * \callergraph
1121  * Do partition and mode search for an sb row: one row of superblocks filling up
1122  * the width of the current tile.
1123  */
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TokenExtra ** tp)1124 static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
1125                                      TileDataEnc *tile_data, int mi_row,
1126                                      TokenExtra **tp) {
1127   AV1_COMMON *const cm = &cpi->common;
1128   const TileInfo *const tile_info = &tile_data->tile_info;
1129   MultiThreadInfo *const mt_info = &cpi->mt_info;
1130   AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1131   AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
1132   bool row_mt_enabled = mt_info->row_mt_enabled;
1133   MACROBLOCK *const x = &td->mb;
1134   MACROBLOCKD *const xd = &x->e_mbd;
1135   const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
1136   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1137   const int mib_size = cm->seq_params->mib_size;
1138   const int mib_size_log2 = cm->seq_params->mib_size_log2;
1139   const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1140   const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
1141 
1142 #if CONFIG_COLLECT_COMPONENT_TIMING
1143   start_timing(cpi, encode_sb_row_time);
1144 #endif
1145 
1146   // Initialize the left context for the new SB row
1147   av1_zero_left_context(xd);
1148 
1149   // Reset delta for quantizer and loof filters at the beginning of every tile
1150   if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
1151     if (cm->delta_q_info.delta_q_present_flag)
1152       xd->current_base_qindex = cm->quant_params.base_qindex;
1153     if (cm->delta_q_info.delta_lf_present_flag) {
1154       av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
1155     }
1156   }
1157 
1158   reset_thresh_freq_fact(x);
1159 
1160   // Code each SB in the row
1161   for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
1162        mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
1163     // In realtime/allintra mode and when frequency of cost updates is off/tile,
1164     // wait for the top superblock to finish encoding. Otherwise, wait for the
1165     // top-right superblock to finish encoding.
1166     enc_row_mt->sync_read_ptr(
1167         row_mt_sync, sb_row, sb_col_in_tile - delay_wait_for_top_right_sb(cpi));
1168 
1169 #if CONFIG_MULTITHREAD
1170     if (row_mt_enabled) {
1171       pthread_mutex_lock(enc_row_mt->mutex_);
1172       const bool row_mt_exit = enc_row_mt->row_mt_exit;
1173       pthread_mutex_unlock(enc_row_mt->mutex_);
1174       // Exit in case any worker has encountered an error.
1175       if (row_mt_exit) return;
1176     }
1177 #endif
1178 
1179     const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
1180     if (update_cdf && (tile_info->mi_row_start != mi_row)) {
1181       if ((tile_info->mi_col_start == mi_col)) {
1182         // restore frame context at the 1st column sb
1183         memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
1184       } else {
1185         // update context
1186         int wt_left = AVG_CDF_WEIGHT_LEFT;
1187         int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
1188         if (tile_info->mi_col_end > (mi_col + mib_size))
1189           av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
1190                               wt_left, wt_tr);
1191         else
1192           av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
1193                               wt_left, wt_tr);
1194       }
1195     }
1196 
1197     // Update the rate cost tables for some symbols
1198     av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
1199 
1200     // Reset color coding related parameters
1201     av1_zero(x->color_sensitivity_sb);
1202     av1_zero(x->color_sensitivity_sb_g);
1203     av1_zero(x->color_sensitivity_sb_alt);
1204     av1_zero(x->color_sensitivity);
1205     x->content_state_sb.source_sad_nonrd = kMedSad;
1206     x->content_state_sb.source_sad_rd = kMedSad;
1207     x->content_state_sb.lighting_change = 0;
1208     x->content_state_sb.low_sumdiff = 0;
1209     x->force_zeromv_skip_for_sb = 0;
1210     x->sb_me_block = 0;
1211     x->sb_me_partition = 0;
1212     x->sb_me_mv.as_int = 0;
1213     x->sb_force_fixed_part = 1;
1214 
1215     if (cpi->oxcf.mode == ALLINTRA) {
1216       x->intra_sb_rdmult_modifier = 128;
1217     }
1218 
1219     xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
1220     x->source_variance = UINT_MAX;
1221     td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
1222 
1223     // Get segment id and skip flag
1224     const struct segmentation *const seg = &cm->seg;
1225     int seg_skip = 0;
1226     if (seg->enabled) {
1227       const uint8_t *const map =
1228           seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
1229       const uint8_t segment_id =
1230           map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
1231               : 0;
1232       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
1233     }
1234 
1235     produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
1236 
1237     init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
1238                                         sb_size);
1239 
1240     // Grade the temporal variation of the sb, the grade will be used to decide
1241     // fast mode search strategy for coding blocks
1242     if (!seg_skip) grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
1243 
1244     // encode the superblock
1245     if (use_nonrd_mode) {
1246       encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1247     } else {
1248       encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
1249     }
1250 
1251     // Update the top-right context in row_mt coding
1252     if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
1253       if (sb_cols_in_tile == 1)
1254         memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
1255       else if (sb_col_in_tile >= 1)
1256         memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
1257                sizeof(*xd->tile_ctx));
1258     }
1259     enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
1260                                sb_cols_in_tile);
1261   }
1262 
1263 #if CONFIG_COLLECT_COMPONENT_TIMING
1264   end_timing(cpi, encode_sb_row_time);
1265 #endif
1266 }
1267 
init_encode_frame_mb_context(AV1_COMP * cpi)1268 static AOM_INLINE void init_encode_frame_mb_context(AV1_COMP *cpi) {
1269   AV1_COMMON *const cm = &cpi->common;
1270   const int num_planes = av1_num_planes(cm);
1271   MACROBLOCK *const x = &cpi->td.mb;
1272   MACROBLOCKD *const xd = &x->e_mbd;
1273 
1274   // Copy data over into macro block data structures.
1275   av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
1276                        cm->seq_params->sb_size);
1277 
1278   av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
1279                          cm->seq_params->subsampling_y, num_planes);
1280 }
1281 
av1_alloc_tile_data(AV1_COMP * cpi)1282 void av1_alloc_tile_data(AV1_COMP *cpi) {
1283   AV1_COMMON *const cm = &cpi->common;
1284   AV1EncRowMultiThreadInfo *const enc_row_mt = &cpi->mt_info.enc_row_mt;
1285   const int tile_cols = cm->tiles.cols;
1286   const int tile_rows = cm->tiles.rows;
1287 
1288   av1_row_mt_mem_dealloc(cpi);
1289 
1290   aom_free(cpi->tile_data);
1291   cpi->allocated_tiles = 0;
1292   enc_row_mt->allocated_tile_cols = 0;
1293   enc_row_mt->allocated_tile_rows = 0;
1294 
1295   CHECK_MEM_ERROR(
1296       cm, cpi->tile_data,
1297       aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
1298 
1299   cpi->allocated_tiles = tile_cols * tile_rows;
1300   enc_row_mt->allocated_tile_cols = tile_cols;
1301   enc_row_mt->allocated_tile_rows = tile_rows;
1302   for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
1303     for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
1304       const int tile_index = tile_row * tile_cols + tile_col;
1305       TileDataEnc *const this_tile = &cpi->tile_data[tile_index];
1306       av1_zero(this_tile->row_mt_sync);
1307       this_tile->row_ctx = NULL;
1308     }
1309   }
1310 }
1311 
av1_init_tile_data(AV1_COMP * cpi)1312 void av1_init_tile_data(AV1_COMP *cpi) {
1313   AV1_COMMON *const cm = &cpi->common;
1314   const int num_planes = av1_num_planes(cm);
1315   const int tile_cols = cm->tiles.cols;
1316   const int tile_rows = cm->tiles.rows;
1317   int tile_col, tile_row;
1318   TokenInfo *const token_info = &cpi->token_info;
1319   TokenExtra *pre_tok = token_info->tile_tok[0][0];
1320   TokenList *tplist = token_info->tplist[0][0];
1321   unsigned int tile_tok = 0;
1322   int tplist_count = 0;
1323 
1324   if (!is_stat_generation_stage(cpi) &&
1325       cm->features.allow_screen_content_tools) {
1326     // Number of tokens for which token info needs to be allocated.
1327     unsigned int tokens_required =
1328         get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
1329                         MAX_SB_SIZE_LOG2, num_planes);
1330     // Allocate/reallocate memory for token related info if the number of tokens
1331     // required is more than the number of tokens already allocated. This could
1332     // occur in case of the following:
1333     // 1) If the memory is not yet allocated
1334     // 2) If the frame dimensions have changed
1335     const bool realloc_tokens = tokens_required > token_info->tokens_allocated;
1336     if (realloc_tokens) {
1337       free_token_info(token_info);
1338       alloc_token_info(cm, token_info, tokens_required);
1339       pre_tok = token_info->tile_tok[0][0];
1340       tplist = token_info->tplist[0][0];
1341     }
1342   }
1343 
1344   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1345     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1346       TileDataEnc *const tile_data =
1347           &cpi->tile_data[tile_row * tile_cols + tile_col];
1348       TileInfo *const tile_info = &tile_data->tile_info;
1349       av1_tile_init(tile_info, cm, tile_row, tile_col);
1350       tile_data->firstpass_top_mv = kZeroMv;
1351       tile_data->abs_sum_level = 0;
1352 
1353       if (is_token_info_allocated(token_info)) {
1354         token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
1355         pre_tok = token_info->tile_tok[tile_row][tile_col];
1356         tile_tok = allocated_tokens(
1357             tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1358             num_planes);
1359         token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
1360         tplist = token_info->tplist[tile_row][tile_col];
1361         tplist_count = av1_get_sb_rows_in_tile(cm, tile_info);
1362       }
1363       tile_data->allow_update_cdf = !cm->tiles.large_scale;
1364       tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
1365                                     !cm->features.disable_cdf_update &&
1366                                     !delay_wait_for_top_right_sb(cpi);
1367       tile_data->tctx = *cm->fc;
1368     }
1369   }
1370 }
1371 
1372 // Populate the start palette token info prior to encoding an SB row.
get_token_start(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra ** tp)1373 static AOM_INLINE void get_token_start(AV1_COMP *cpi, const TileInfo *tile_info,
1374                                        int tile_row, int tile_col, int mi_row,
1375                                        TokenExtra **tp) {
1376   const TokenInfo *token_info = &cpi->token_info;
1377   if (!is_token_info_allocated(token_info)) return;
1378 
1379   const AV1_COMMON *cm = &cpi->common;
1380   const int num_planes = av1_num_planes(cm);
1381   TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
1382   const int sb_row_in_tile =
1383       (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1384 
1385   get_start_tok(cpi, tile_row, tile_col, mi_row, tp,
1386                 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
1387   assert(tplist != NULL);
1388   tplist[sb_row_in_tile].start = *tp;
1389 }
1390 
1391 // Populate the token count after encoding an SB row.
populate_token_count(AV1_COMP * cpi,const TileInfo * tile_info,int tile_row,int tile_col,int mi_row,TokenExtra * tok)1392 static AOM_INLINE void populate_token_count(AV1_COMP *cpi,
1393                                             const TileInfo *tile_info,
1394                                             int tile_row, int tile_col,
1395                                             int mi_row, TokenExtra *tok) {
1396   const TokenInfo *token_info = &cpi->token_info;
1397   if (!is_token_info_allocated(token_info)) return;
1398 
1399   const AV1_COMMON *cm = &cpi->common;
1400   const int num_planes = av1_num_planes(cm);
1401   TokenList *const tplist = token_info->tplist[tile_row][tile_col];
1402   const int sb_row_in_tile =
1403       (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
1404   const int tile_mb_cols =
1405       (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
1406   const int num_mb_rows_in_sb =
1407       ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
1408   tplist[sb_row_in_tile].count =
1409       (unsigned int)(tok - tplist[sb_row_in_tile].start);
1410 
1411   assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
1412          get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
1413                          cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
1414                          num_planes));
1415 
1416   (void)num_planes;
1417   (void)tile_mb_cols;
1418   (void)num_mb_rows_in_sb;
1419 }
1420 
1421 /*!\brief Encode a superblock row
1422  *
1423  * \ingroup partition_search
1424  */
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)1425 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
1426                        int tile_col, int mi_row) {
1427   AV1_COMMON *const cm = &cpi->common;
1428   const int tile_cols = cm->tiles.cols;
1429   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
1430   const TileInfo *const tile_info = &this_tile->tile_info;
1431   TokenExtra *tok = NULL;
1432 
1433   get_token_start(cpi, tile_info, tile_row, tile_col, mi_row, &tok);
1434 
1435   encode_sb_row(cpi, td, this_tile, mi_row, &tok);
1436 
1437   populate_token_count(cpi, tile_info, tile_row, tile_col, mi_row, tok);
1438 }
1439 
1440 /*!\brief Encode a tile
1441  *
1442  * \ingroup partition_search
1443  */
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)1444 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
1445                      int tile_col) {
1446   AV1_COMMON *const cm = &cpi->common;
1447   TileDataEnc *const this_tile =
1448       &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1449   const TileInfo *const tile_info = &this_tile->tile_info;
1450 
1451   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
1452 
1453   av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
1454                          tile_info->mi_col_end, tile_row);
1455   av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1456                          &td->mb.e_mbd);
1457 
1458   if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1459     cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1460 
1461   if (td->mb.txfm_search_info.mb_rd_record != NULL) {
1462     av1_crc32c_calculator_init(
1463         &td->mb.txfm_search_info.mb_rd_record->crc_calculator);
1464   }
1465 
1466   for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1467        mi_row += cm->seq_params->mib_size) {
1468     av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1469   }
1470   this_tile->abs_sum_level = td->abs_sum_level;
1471 }
1472 
1473 /*!\brief Break one frame into tiles and encode the tiles
1474  *
1475  * \ingroup partition_search
1476  *
1477  * \param[in]    cpi    Top-level encoder structure
1478  */
encode_tiles(AV1_COMP * cpi)1479 static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
1480   AV1_COMMON *const cm = &cpi->common;
1481   const int tile_cols = cm->tiles.cols;
1482   const int tile_rows = cm->tiles.rows;
1483   int tile_col, tile_row;
1484 
1485   MACROBLOCK *const mb = &cpi->td.mb;
1486   assert(IMPLIES(cpi->tile_data == NULL,
1487                  cpi->allocated_tiles < tile_cols * tile_rows));
1488   if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1489 
1490   av1_init_tile_data(cpi);
1491   av1_alloc_mb_data(cpi, mb);
1492 
1493   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1494     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1495       TileDataEnc *const this_tile =
1496           &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1497       cpi->td.intrabc_used = 0;
1498       cpi->td.deltaq_used = 0;
1499       cpi->td.abs_sum_level = 0;
1500       cpi->td.rd_counts.seg_tmp_pred_cost[0] = 0;
1501       cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
1502       cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1503       cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1504       av1_init_rtc_counters(&cpi->td.mb);
1505       cpi->td.mb.palette_pixels = 0;
1506       av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1507       if (!frame_is_intra_only(&cpi->common))
1508         av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
1509       cpi->palette_pixel_num += cpi->td.mb.palette_pixels;
1510       cpi->intrabc_used |= cpi->td.intrabc_used;
1511       cpi->deltaq_used |= cpi->td.deltaq_used;
1512     }
1513   }
1514 
1515   av1_dealloc_mb_data(mb, av1_num_planes(cm));
1516 }
1517 
1518 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(const AV1_COMMON * const cm,RefFrameDistanceInfo * const ref_frame_dist_info,const int ref_frame_flags)1519 static AOM_INLINE void set_rel_frame_dist(
1520     const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1521     const int ref_frame_flags) {
1522   MV_REFERENCE_FRAME ref_frame;
1523   int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1524   ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1525   ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1526   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1527     ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1528     if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1529       int dist = av1_encoder_get_relative_dist(
1530           cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1531           cm->current_frame.display_order_hint);
1532       ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1533       // Get the nearest ref_frame in the past
1534       if (abs(dist) < min_past_dist && dist < 0) {
1535         ref_frame_dist_info->nearest_past_ref = ref_frame;
1536         min_past_dist = abs(dist);
1537       }
1538       // Get the nearest ref_frame in the future
1539       if (dist < min_future_dist && dist > 0) {
1540         ref_frame_dist_info->nearest_future_ref = ref_frame;
1541         min_future_dist = dist;
1542       }
1543     }
1544   }
1545 }
1546 
refs_are_one_sided(const AV1_COMMON * cm)1547 static INLINE int refs_are_one_sided(const AV1_COMMON *cm) {
1548   assert(!frame_is_intra_only(cm));
1549 
1550   int one_sided_refs = 1;
1551   const int cur_display_order_hint = cm->current_frame.display_order_hint;
1552   for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1553     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1554     if (buf == NULL) continue;
1555     if (av1_encoder_get_relative_dist(buf->display_order_hint,
1556                                       cur_display_order_hint) > 0) {
1557       one_sided_refs = 0;  // bwd reference
1558       break;
1559     }
1560   }
1561   return one_sided_refs;
1562 }
1563 
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])1564 static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1565                                              int ref_order_hint[2]) {
1566   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1567   ref_order_hint[0] = ref_order_hint[1] = 0;
1568   if (!skip_mode_info->skip_mode_allowed) return;
1569 
1570   const RefCntBuffer *const buf_0 =
1571       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1572   const RefCntBuffer *const buf_1 =
1573       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1574   assert(buf_0 != NULL && buf_1 != NULL);
1575 
1576   ref_order_hint[0] = buf_0->order_hint;
1577   ref_order_hint[1] = buf_1->order_hint;
1578 }
1579 
check_skip_mode_enabled(AV1_COMP * const cpi)1580 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1581   AV1_COMMON *const cm = &cpi->common;
1582 
1583   av1_setup_skip_mode_allowed(cm);
1584   if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1585 
1586   // Turn off skip mode if the temporal distances of the reference pair to the
1587   // current frame are different by more than 1 frame.
1588   const int cur_offset = (int)cm->current_frame.order_hint;
1589   int ref_offset[2];
1590   get_skip_mode_ref_offsets(cm, ref_offset);
1591   const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1592                                             cur_offset, ref_offset[0]);
1593   const int cur_to_ref1 = abs(get_relative_dist(
1594       &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1595   if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1596 
1597   // High Latency: Turn off skip mode if all refs are fwd.
1598   if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1599 
1600   const int ref_frame[2] = {
1601     cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1602     cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1603   };
1604   if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[0]]) ||
1605       !(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame[1]]))
1606     return 0;
1607 
1608   return 1;
1609 }
1610 
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)1611 static AOM_INLINE void set_default_interp_skip_flags(
1612     const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1613   const int num_planes = av1_num_planes(cm);
1614   interp_search_flags->default_interp_skip_flags =
1615       (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1616                         : INTERP_SKIP_LUMA_SKIP_CHROMA;
1617 }
1618 
setup_prune_ref_frame_mask(AV1_COMP * cpi)1619 static AOM_INLINE void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1620   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1621        cpi->sf.inter_sf.disable_onesided_comp) &&
1622       cpi->all_one_sided_refs) {
1623     // Disable all compound references
1624     cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1625   } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1626              cpi->sf.inter_sf.selective_ref_frame >= 2) {
1627     AV1_COMMON *const cm = &cpi->common;
1628     const int cur_frame_display_order_hint =
1629         cm->current_frame.display_order_hint;
1630     unsigned int *ref_display_order_hint =
1631         cm->cur_frame->ref_display_order_hint;
1632     const int arf2_dist = av1_encoder_get_relative_dist(
1633         ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1634         cur_frame_display_order_hint);
1635     const int bwd_dist = av1_encoder_get_relative_dist(
1636         ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1637         cur_frame_display_order_hint);
1638 
1639     for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1640       MV_REFERENCE_FRAME rf[2];
1641       av1_set_ref_frame(rf, ref_idx);
1642       if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1643           !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1644         continue;
1645       }
1646 
1647       if (!cpi->all_one_sided_refs) {
1648         int ref_dist[2];
1649         for (int i = 0; i < 2; ++i) {
1650           ref_dist[i] = av1_encoder_get_relative_dist(
1651               ref_display_order_hint[rf[i] - LAST_FRAME],
1652               cur_frame_display_order_hint);
1653         }
1654 
1655         // One-sided compound is used only when all reference frames are
1656         // one-sided.
1657         if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1658           cpi->prune_ref_frame_mask |= 1 << ref_idx;
1659         }
1660       }
1661 
1662       if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1663           (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1664           (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1665         // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1666         if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1667           // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1668           // reference to the current frame than ALTREF2_FRAME
1669           cpi->prune_ref_frame_mask |= 1 << ref_idx;
1670         }
1671       }
1672     }
1673   }
1674 }
1675 
allow_deltaq_mode(AV1_COMP * cpi)1676 static int allow_deltaq_mode(AV1_COMP *cpi) {
1677 #if !CONFIG_REALTIME_ONLY
1678   AV1_COMMON *const cm = &cpi->common;
1679   BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1680   int sbs_wide = mi_size_wide[sb_size];
1681   int sbs_high = mi_size_high[sb_size];
1682 
1683   int64_t delta_rdcost = 0;
1684   for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += sbs_high) {
1685     for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += sbs_wide) {
1686       int64_t this_delta_rdcost = 0;
1687       av1_get_q_for_deltaq_objective(cpi, &cpi->td, &this_delta_rdcost, sb_size,
1688                                      mi_row, mi_col);
1689       delta_rdcost += this_delta_rdcost;
1690     }
1691   }
1692   return delta_rdcost < 0;
1693 #else
1694   (void)cpi;
1695   return 1;
1696 #endif  // !CONFIG_REALTIME_ONLY
1697 }
1698 
1699 #define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
1700 #define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
1701 
1702 // Populates block level thresholds for force zeromv-skip decision
populate_thresh_to_force_zeromv_skip(AV1_COMP * cpi)1703 static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
1704   if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
1705 
1706   // Threshold for forcing zeromv-skip decision is as below:
1707   // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
1708   // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
1709   // allowing slightly higher error for smaller blocks.
1710   // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
1711   // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
1712   // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
1713   // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
1714   // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
1715   // small blocks, the same is clipped to 4.
1716   const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
1717   const int num_128x128_pix =
1718       block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
1719 
1720   for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
1721     const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
1722 
1723     // Calculate the threshold for zeromv-skip decision based on area of the
1724     // partition
1725     unsigned int thresh_exit_part_blk =
1726         (unsigned int)(thresh_exit_128x128_part *
1727                            sqrt((double)num_block_pix / num_128x128_pix) +
1728                        0.5);
1729     thresh_exit_part_blk = AOMMIN(
1730         thresh_exit_part_blk,
1731         (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
1732     cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
1733   }
1734 }
1735 
free_block_hash_buffers(uint32_t * block_hash_values[2][2],int8_t * is_block_same[2][3])1736 static void free_block_hash_buffers(uint32_t *block_hash_values[2][2],
1737                                     int8_t *is_block_same[2][3]) {
1738   for (int k = 0; k < 2; ++k) {
1739     for (int j = 0; j < 2; ++j) {
1740       aom_free(block_hash_values[k][j]);
1741     }
1742 
1743     for (int j = 0; j < 3; ++j) {
1744       aom_free(is_block_same[k][j]);
1745     }
1746   }
1747 }
1748 
1749 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction
1750  * for a single frame
1751  *
1752  * \ingroup high_level_algo
1753  */
encode_frame_internal(AV1_COMP * cpi)1754 static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
1755   ThreadData *const td = &cpi->td;
1756   MACROBLOCK *const x = &td->mb;
1757   AV1_COMMON *const cm = &cpi->common;
1758   CommonModeInfoParams *const mi_params = &cm->mi_params;
1759   FeatureFlags *const features = &cm->features;
1760   MACROBLOCKD *const xd = &x->e_mbd;
1761   RD_COUNTS *const rdc = &cpi->td.rd_counts;
1762 #if CONFIG_FPMT_TEST
1763   FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
1764   FrameProbInfo *const temp_frame_probs_simulation =
1765       &cpi->ppi->temp_frame_probs_simulation;
1766 #endif
1767   FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
1768   IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
1769   MultiThreadInfo *const mt_info = &cpi->mt_info;
1770   AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1771   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1772   const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
1773   int i;
1774 
1775   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
1776     mi_params->setup_mi(mi_params);
1777   }
1778 
1779   set_mi_offsets(mi_params, xd, 0, 0);
1780 
1781   av1_zero(*td->counts);
1782   av1_zero(rdc->tx_type_used);
1783   av1_zero(rdc->obmc_used);
1784   av1_zero(rdc->warped_used);
1785   av1_zero(rdc->seg_tmp_pred_cost);
1786 
1787   // Reset the flag.
1788   cpi->intrabc_used = 0;
1789   // Need to disable intrabc when superres is selected
1790   if (av1_superres_scaled(cm)) {
1791     features->allow_intrabc = 0;
1792   }
1793 
1794   features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
1795 
1796   if (features->allow_warped_motion &&
1797       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1798     const FRAME_UPDATE_TYPE update_type =
1799         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1800     int warped_probability =
1801 #if CONFIG_FPMT_TEST
1802         cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
1803             ? temp_frame_probs->warped_probs[update_type]
1804             :
1805 #endif  // CONFIG_FPMT_TEST
1806             frame_probs->warped_probs[update_type];
1807     if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
1808       features->allow_warped_motion = 0;
1809   }
1810 
1811   int hash_table_created = 0;
1812   if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
1813       !cpi->sf.rt_sf.use_nonrd_pick_mode) {
1814     // TODO(any): move this outside of the recoding loop to avoid recalculating
1815     // the hash table.
1816     // add to hash table
1817     const int pic_width = cpi->source->y_crop_width;
1818     const int pic_height = cpi->source->y_crop_height;
1819     uint32_t *block_hash_values[2][2] = { { NULL } };
1820     int8_t *is_block_same[2][3] = { { NULL } };
1821     int k, j;
1822     bool error = false;
1823 
1824     for (k = 0; k < 2 && !error; ++k) {
1825       for (j = 0; j < 2; ++j) {
1826         block_hash_values[k][j] = (uint32_t *)aom_malloc(
1827             sizeof(*block_hash_values[0][0]) * pic_width * pic_height);
1828         if (!block_hash_values[k][j]) {
1829           error = true;
1830           break;
1831         }
1832       }
1833 
1834       for (j = 0; j < 3 && !error; ++j) {
1835         is_block_same[k][j] = (int8_t *)aom_malloc(
1836             sizeof(*is_block_same[0][0]) * pic_width * pic_height);
1837         if (!is_block_same[k][j]) error = true;
1838       }
1839     }
1840 
1841     av1_hash_table_init(intrabc_hash_info);
1842     if (error ||
1843         !av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
1844       free_block_hash_buffers(block_hash_values, is_block_same);
1845       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1846                          "Error allocating intrabc_hash_table and buffers");
1847     }
1848     hash_table_created = 1;
1849     av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
1850                                       block_hash_values[0], is_block_same[0]);
1851     // Hash data generated for screen contents is used for intraBC ME
1852     const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
1853     const int max_sb_size =
1854         (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
1855     int src_idx = 0;
1856     for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
1857       const int dst_idx = !src_idx;
1858       av1_generate_block_hash_value(
1859           intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
1860           block_hash_values[dst_idx], is_block_same[src_idx],
1861           is_block_same[dst_idx]);
1862       if (size >= min_alloc_size) {
1863         if (!av1_add_to_hash_map_by_row_with_precal_data(
1864                 &intrabc_hash_info->intrabc_hash_table,
1865                 block_hash_values[dst_idx], is_block_same[dst_idx][2],
1866                 pic_width, pic_height, size)) {
1867           error = true;
1868           break;
1869         }
1870       }
1871     }
1872 
1873     free_block_hash_buffers(block_hash_values, is_block_same);
1874 
1875     if (error) {
1876       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
1877                          "Error adding data to intrabc_hash_table");
1878     }
1879   }
1880 
1881   const CommonQuantParams *quant_params = &cm->quant_params;
1882   for (i = 0; i < MAX_SEGMENTS; ++i) {
1883     const int qindex =
1884         cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
1885                         : quant_params->base_qindex;
1886     xd->lossless[i] =
1887         qindex == 0 && quant_params->y_dc_delta_q == 0 &&
1888         quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
1889         quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
1890     if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
1891     xd->qindex[i] = qindex;
1892     if (xd->lossless[i]) {
1893       cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
1894     } else {
1895       cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
1896     }
1897   }
1898   features->coded_lossless = is_coded_lossless(cm, xd);
1899   features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
1900 
1901   // Fix delta q resolution for the moment
1902 
1903   cm->delta_q_info.delta_q_res = 0;
1904   if (cpi->use_ducky_encode) {
1905     cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_DUCKY_ENCODE;
1906   } else if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ) {
1907     if (deltaq_mode == DELTA_Q_OBJECTIVE)
1908       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
1909     else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
1910       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1911     else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
1912       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1913     else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
1914       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1915     else if (deltaq_mode == DELTA_Q_HDR)
1916       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1917     // Set delta_q_present_flag before it is used for the first time
1918     cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
1919     cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
1920 
1921     // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
1922     // is used for ineligible frames. That effectively will turn off row_mt
1923     // usage. Note objective delta_q and tpl eligible frames are only altref
1924     // frames currently.
1925     const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1926     if (cm->delta_q_info.delta_q_present_flag) {
1927       if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1928           gf_group->update_type[cpi->gf_frame_index] == LF_UPDATE)
1929         cm->delta_q_info.delta_q_present_flag = 0;
1930 
1931       if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1932           cm->delta_q_info.delta_q_present_flag) {
1933         cm->delta_q_info.delta_q_present_flag &= allow_deltaq_mode(cpi);
1934       }
1935     }
1936 
1937     // Reset delta_q_used flag
1938     cpi->deltaq_used = 0;
1939 
1940     cm->delta_q_info.delta_lf_present_flag =
1941         cm->delta_q_info.delta_q_present_flag &&
1942         oxcf->tool_cfg.enable_deltalf_mode;
1943     cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
1944 
1945     // update delta_q_present_flag and delta_lf_present_flag based on
1946     // base_qindex
1947     cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
1948     cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
1949   } else if (cpi->cyclic_refresh->apply_cyclic_refresh ||
1950              cpi->svc.number_temporal_layers == 1) {
1951     cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
1952     cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
1953   }
1954   cpi->rc.cnt_zeromv = 0;
1955 
1956   av1_frame_init_quantizer(cpi);
1957   init_encode_frame_mb_context(cpi);
1958   set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
1959 
1960   if (cm->prev_frame && cm->prev_frame->seg.enabled)
1961     cm->last_frame_seg_map = cm->prev_frame->seg_map;
1962   else
1963     cm->last_frame_seg_map = NULL;
1964   if (features->allow_intrabc || features->coded_lossless) {
1965     av1_set_default_ref_deltas(cm->lf.ref_deltas);
1966     av1_set_default_mode_deltas(cm->lf.mode_deltas);
1967   } else if (cm->prev_frame) {
1968     memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
1969     memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
1970   }
1971   memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
1972   memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
1973 
1974   cpi->all_one_sided_refs =
1975       frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
1976 
1977   cpi->prune_ref_frame_mask = 0;
1978   // Figure out which ref frames can be skipped at frame level.
1979   setup_prune_ref_frame_mask(cpi);
1980 
1981   x->txfm_search_info.txb_split_count = 0;
1982 #if CONFIG_SPEED_STATS
1983   x->txfm_search_info.tx_search_count = 0;
1984 #endif  // CONFIG_SPEED_STATS
1985 
1986 #if !CONFIG_REALTIME_ONLY
1987 #if CONFIG_COLLECT_COMPONENT_TIMING
1988   start_timing(cpi, av1_compute_global_motion_time);
1989 #endif
1990   av1_compute_global_motion_facade(cpi);
1991 #if CONFIG_COLLECT_COMPONENT_TIMING
1992   end_timing(cpi, av1_compute_global_motion_time);
1993 #endif
1994 #endif  // !CONFIG_REALTIME_ONLY
1995 
1996 #if CONFIG_COLLECT_COMPONENT_TIMING
1997   start_timing(cpi, av1_setup_motion_field_time);
1998 #endif
1999   av1_calculate_ref_frame_side(cm);
2000   if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
2001 #if CONFIG_COLLECT_COMPONENT_TIMING
2002   end_timing(cpi, av1_setup_motion_field_time);
2003 #endif
2004 
2005   cm->current_frame.skip_mode_info.skip_mode_flag =
2006       check_skip_mode_enabled(cpi);
2007 
2008   // Initialization of skip mode cost depends on the value of
2009   // 'skip_mode_flag'. This initialization happens in the function
2010   // av1_fill_mode_rates(), which is in turn called in
2011   // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
2012   // has to be called after 'skip_mode_flag' is initialized.
2013   av1_initialize_rd_consts(cpi);
2014   av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
2015   populate_thresh_to_force_zeromv_skip(cpi);
2016 
2017   enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
2018   enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
2019   mt_info->row_mt_enabled = 0;
2020   mt_info->pack_bs_mt_enabled = AOMMIN(mt_info->num_mod_workers[MOD_PACK_BS],
2021                                        cm->tiles.cols * cm->tiles.rows) > 1;
2022 
2023   if (oxcf->row_mt && (mt_info->num_workers > 1)) {
2024     mt_info->row_mt_enabled = 1;
2025     enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
2026     enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
2027     av1_encode_tiles_row_mt(cpi);
2028   } else {
2029     if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
2030       av1_encode_tiles_mt(cpi);
2031     } else {
2032       // Preallocate the pc_tree for realtime coding to reduce the cost of
2033       // memory allocation.
2034       const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
2035       if (use_nonrd_mode) {
2036         td->pc_root = av1_alloc_pc_tree_node(cm->seq_params->sb_size);
2037         if (!td->pc_root)
2038           aom_internal_error(xd->error_info, AOM_CODEC_MEM_ERROR,
2039                              "Failed to allocate PC_TREE");
2040       } else {
2041         td->pc_root = NULL;
2042       }
2043 
2044       encode_tiles(cpi);
2045       av1_free_pc_tree_recursive(td->pc_root, av1_num_planes(cm), 0, 0,
2046                                  cpi->sf.part_sf.partition_search_type);
2047       td->pc_root = NULL;
2048     }
2049   }
2050 
2051   // If intrabc is allowed but never selected, reset the allow_intrabc flag.
2052   if (features->allow_intrabc && !cpi->intrabc_used) {
2053     features->allow_intrabc = 0;
2054   }
2055   if (features->allow_intrabc) {
2056     cm->delta_q_info.delta_lf_present_flag = 0;
2057   }
2058 
2059   if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
2060     cm->delta_q_info.delta_q_present_flag = 0;
2061   }
2062 
2063   // Set the transform size appropriately before bitstream creation
2064   const MODE_EVAL_TYPE eval_type =
2065       cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
2066           ? WINNER_MODE_EVAL
2067           : DEFAULT_EVAL;
2068   const TX_SIZE_SEARCH_METHOD tx_search_type =
2069       cpi->winner_mode_params.tx_size_search_methods[eval_type];
2070   assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
2071   features->tx_mode = select_tx_mode(cm, tx_search_type);
2072 
2073   // Retain the frame level probability update conditions for parallel frames.
2074   // These conditions will be consumed during postencode stage to update the
2075   // probability.
2076   if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2077     cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
2078         cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
2079     cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
2080         (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2081          cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
2082     cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
2083         (features->allow_warped_motion &&
2084          cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
2085     cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
2086         (cm->current_frame.frame_type != KEY_FRAME &&
2087          cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2088          features->interp_filter == SWITCHABLE);
2089   }
2090 
2091   if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
2092       ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
2093         INT_MAX) &&
2094        (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
2095     const FRAME_UPDATE_TYPE update_type =
2096         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2097     for (i = 0; i < TX_SIZES_ALL; i++) {
2098       int sum = 0;
2099       int j;
2100       int left = MAX_TX_TYPE_PROB;
2101 
2102       for (j = 0; j < TX_TYPES; j++)
2103         sum += cpi->td.rd_counts.tx_type_used[i][j];
2104 
2105       for (j = TX_TYPES - 1; j >= 0; j--) {
2106         int update_txtype_frameprobs = 1;
2107         const int new_prob =
2108             sum ? MAX_TX_TYPE_PROB * cpi->td.rd_counts.tx_type_used[i][j] / sum
2109                 : (j ? 0 : MAX_TX_TYPE_PROB);
2110 #if CONFIG_FPMT_TEST
2111         if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2112           if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2113               0) {
2114             int prob =
2115                 (temp_frame_probs_simulation->tx_type_probs[update_type][i][j] +
2116                  new_prob) >>
2117                 1;
2118             left -= prob;
2119             if (j == 0) prob += left;
2120             temp_frame_probs_simulation->tx_type_probs[update_type][i][j] =
2121                 prob;
2122             // Copy temp_frame_probs_simulation to temp_frame_probs
2123             for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2124                  update_type_idx++) {
2125               temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
2126                   temp_frame_probs_simulation
2127                       ->tx_type_probs[update_type_idx][i][j];
2128             }
2129           }
2130           update_txtype_frameprobs = 0;
2131         }
2132 #endif  // CONFIG_FPMT_TEST
2133         // Track the frame probabilities of parallel encode frames to update
2134         // during postencode stage.
2135         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2136           update_txtype_frameprobs = 0;
2137           cpi->frame_new_probs[cpi->num_frame_recode]
2138               .tx_type_probs[update_type][i][j] = new_prob;
2139         }
2140         if (update_txtype_frameprobs) {
2141           int prob =
2142               (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
2143           left -= prob;
2144           if (j == 0) prob += left;
2145           frame_probs->tx_type_probs[update_type][i][j] = prob;
2146         }
2147       }
2148     }
2149   }
2150 
2151   if (cm->seg.enabled) {
2152     cm->seg.temporal_update = 1;
2153     if (rdc->seg_tmp_pred_cost[0] < rdc->seg_tmp_pred_cost[1])
2154       cm->seg.temporal_update = 0;
2155   }
2156 
2157   if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
2158       cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
2159     const FRAME_UPDATE_TYPE update_type =
2160         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2161 
2162     for (i = 0; i < BLOCK_SIZES_ALL; i++) {
2163       int sum = 0;
2164       int update_obmc_frameprobs = 1;
2165       for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
2166 
2167       const int new_prob =
2168           sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
2169 #if CONFIG_FPMT_TEST
2170       if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2171         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2172           temp_frame_probs_simulation->obmc_probs[update_type][i] =
2173               (temp_frame_probs_simulation->obmc_probs[update_type][i] +
2174                new_prob) >>
2175               1;
2176           // Copy temp_frame_probs_simulation to temp_frame_probs
2177           for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2178                update_type_idx++) {
2179             temp_frame_probs->obmc_probs[update_type_idx][i] =
2180                 temp_frame_probs_simulation->obmc_probs[update_type_idx][i];
2181           }
2182         }
2183         update_obmc_frameprobs = 0;
2184       }
2185 #endif  // CONFIG_FPMT_TEST
2186       // Track the frame probabilities of parallel encode frames to update
2187       // during postencode stage.
2188       if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2189         update_obmc_frameprobs = 0;
2190         cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
2191             new_prob;
2192       }
2193       if (update_obmc_frameprobs) {
2194         frame_probs->obmc_probs[update_type][i] =
2195             (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
2196       }
2197     }
2198   }
2199 
2200   if (features->allow_warped_motion &&
2201       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
2202     const FRAME_UPDATE_TYPE update_type =
2203         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2204     int update_warp_frameprobs = 1;
2205     int sum = 0;
2206     for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
2207     const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
2208 #if CONFIG_FPMT_TEST
2209     if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2210       if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
2211         temp_frame_probs_simulation->warped_probs[update_type] =
2212             (temp_frame_probs_simulation->warped_probs[update_type] +
2213              new_prob) >>
2214             1;
2215         // Copy temp_frame_probs_simulation to temp_frame_probs
2216         for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2217              update_type_idx++) {
2218           temp_frame_probs->warped_probs[update_type_idx] =
2219               temp_frame_probs_simulation->warped_probs[update_type_idx];
2220         }
2221       }
2222       update_warp_frameprobs = 0;
2223     }
2224 #endif  // CONFIG_FPMT_TEST
2225     // Track the frame probabilities of parallel encode frames to update
2226     // during postencode stage.
2227     if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2228       update_warp_frameprobs = 0;
2229       cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
2230           new_prob;
2231     }
2232     if (update_warp_frameprobs) {
2233       frame_probs->warped_probs[update_type] =
2234           (frame_probs->warped_probs[update_type] + new_prob) >> 1;
2235     }
2236   }
2237 
2238   if (cm->current_frame.frame_type != KEY_FRAME &&
2239       cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
2240       features->interp_filter == SWITCHABLE) {
2241     const FRAME_UPDATE_TYPE update_type =
2242         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
2243 
2244     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2245       int sum = 0;
2246       int j;
2247       int left = 1536;
2248 
2249       for (j = 0; j < SWITCHABLE_FILTERS; j++) {
2250         sum += cpi->td.counts->switchable_interp[i][j];
2251       }
2252 
2253       for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
2254         int update_interpfilter_frameprobs = 1;
2255         const int new_prob =
2256             sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
2257                 : (j ? 0 : 1536);
2258 #if CONFIG_FPMT_TEST
2259         if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
2260           if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
2261               0) {
2262             int prob = (temp_frame_probs_simulation
2263                             ->switchable_interp_probs[update_type][i][j] +
2264                         new_prob) >>
2265                        1;
2266             left -= prob;
2267             if (j == 0) prob += left;
2268             temp_frame_probs_simulation
2269                 ->switchable_interp_probs[update_type][i][j] = prob;
2270             // Copy temp_frame_probs_simulation to temp_frame_probs
2271             for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
2272                  update_type_idx++) {
2273               temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
2274                   temp_frame_probs_simulation
2275                       ->switchable_interp_probs[update_type_idx][i][j];
2276             }
2277           }
2278           update_interpfilter_frameprobs = 0;
2279         }
2280 #endif  // CONFIG_FPMT_TEST
2281         // Track the frame probabilities of parallel encode frames to update
2282         // during postencode stage.
2283         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
2284           update_interpfilter_frameprobs = 0;
2285           cpi->frame_new_probs[cpi->num_frame_recode]
2286               .switchable_interp_probs[update_type][i][j] = new_prob;
2287         }
2288         if (update_interpfilter_frameprobs) {
2289           int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
2290                       new_prob) >>
2291                      1;
2292           left -= prob;
2293           if (j == 0) prob += left;
2294           frame_probs->switchable_interp_probs[update_type][i][j] = prob;
2295         }
2296       }
2297     }
2298   }
2299   if (hash_table_created) {
2300     av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
2301   }
2302 }
2303 
2304 /*!\brief Setup reference frame buffers and encode a frame
2305  *
2306  * \ingroup high_level_algo
2307  * \callgraph
2308  * \callergraph
2309  *
2310  * \param[in]    cpi    Top-level encoder structure
2311  */
av1_encode_frame(AV1_COMP * cpi)2312 void av1_encode_frame(AV1_COMP *cpi) {
2313   AV1_COMMON *const cm = &cpi->common;
2314   CurrentFrame *const current_frame = &cm->current_frame;
2315   FeatureFlags *const features = &cm->features;
2316   RD_COUNTS *const rdc = &cpi->td.rd_counts;
2317   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
2318   // Indicates whether or not to use a default reduced set for ext-tx
2319   // rather than the potential full set of 16 transforms
2320   features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
2321 
2322   // Make sure segment_id is no larger than last_active_segid.
2323   if (cm->seg.enabled && cm->seg.update_map) {
2324     const int mi_rows = cm->mi_params.mi_rows;
2325     const int mi_cols = cm->mi_params.mi_cols;
2326     const int last_active_segid = cm->seg.last_active_segid;
2327     uint8_t *map = cpi->enc_seg.map;
2328     for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
2329       for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
2330         map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
2331       }
2332       map += mi_cols;
2333     }
2334   }
2335 
2336   av1_setup_frame_buf_refs(cm);
2337   enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
2338                          cm->cur_frame->ref_display_order_hint,
2339                          cm->current_frame.display_order_hint);
2340   set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
2341                      cpi->ref_frame_flags);
2342   av1_setup_frame_sign_bias(cm);
2343 
2344   // If global motion is enabled, then every buffer which is used as either
2345   // a source or a ref frame should have an image pyramid allocated.
2346   // Check here so that issues can be caught early in debug mode
2347 #if !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2348   if (cpi->alloc_pyramid) {
2349     assert(cpi->source->y_pyramid);
2350     for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2351       const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
2352       if (buf != NULL) {
2353         assert(buf->buf.y_pyramid);
2354       }
2355     }
2356   }
2357 #endif  // !defined(NDEBUG) && !CONFIG_REALTIME_ONLY
2358 
2359 #if CONFIG_MISMATCH_DEBUG
2360   mismatch_reset_frame(av1_num_planes(cm));
2361 #endif
2362 
2363   rdc->newmv_or_intra_blocks = 0;
2364   cpi->palette_pixel_num = 0;
2365 
2366   if (cpi->sf.hl_sf.frame_parameter_update ||
2367       cpi->sf.rt_sf.use_comp_ref_nonrd) {
2368     if (frame_is_intra_only(cm))
2369       current_frame->reference_mode = SINGLE_REFERENCE;
2370     else
2371       current_frame->reference_mode = REFERENCE_MODE_SELECT;
2372 
2373     features->interp_filter = SWITCHABLE;
2374     if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
2375 
2376     features->switchable_motion_mode = is_switchable_motion_mode_allowed(
2377         features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
2378 
2379     rdc->compound_ref_used_flag = 0;
2380     rdc->skip_mode_used_flag = 0;
2381 
2382     encode_frame_internal(cpi);
2383 
2384     if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
2385       // Use a flag that includes 4x4 blocks
2386       if (rdc->compound_ref_used_flag == 0) {
2387         current_frame->reference_mode = SINGLE_REFERENCE;
2388 #if CONFIG_ENTROPY_STATS
2389         av1_zero(cpi->td.counts->comp_inter);
2390 #endif  // CONFIG_ENTROPY_STATS
2391       }
2392     }
2393     // Re-check on the skip mode status as reference mode may have been
2394     // changed.
2395     SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
2396     if (frame_is_intra_only(cm) ||
2397         current_frame->reference_mode == SINGLE_REFERENCE) {
2398       skip_mode_info->skip_mode_allowed = 0;
2399       skip_mode_info->skip_mode_flag = 0;
2400     }
2401     if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
2402       skip_mode_info->skip_mode_flag = 0;
2403 
2404     if (!cm->tiles.large_scale) {
2405       if (features->tx_mode == TX_MODE_SELECT &&
2406           cpi->td.mb.txfm_search_info.txb_split_count == 0)
2407         features->tx_mode = TX_MODE_LARGEST;
2408     }
2409   } else {
2410     // This is needed if real-time speed setting is changed on the fly
2411     // from one using compound prediction to one using single reference.
2412     if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
2413       current_frame->reference_mode = SINGLE_REFERENCE;
2414     encode_frame_internal(cpi);
2415   }
2416 }
2417