• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17 
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21 
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 
27 #if CONFIG_MISMATCH_DEBUG
28 #include "aom_util/debug_util.h"
29 #endif  // CONFIG_MISMATCH_DEBUG
30 
31 #include "av1/common/cfl.h"
32 #include "av1/common/common.h"
33 #include "av1/common/entropy.h"
34 #include "av1/common/entropymode.h"
35 #include "av1/common/idct.h"
36 #include "av1/common/mv.h"
37 #include "av1/common/mvref_common.h"
38 #include "av1/common/pred_common.h"
39 #include "av1/common/quant_common.h"
40 #include "av1/common/reconintra.h"
41 #include "av1/common/reconinter.h"
42 #include "av1/common/seg_common.h"
43 #include "av1/common/tile_common.h"
44 #include "av1/common/warped_motion.h"
45 
46 #include "av1/encoder/allintra_vis.h"
47 #include "av1/encoder/aq_complexity.h"
48 #include "av1/encoder/aq_cyclicrefresh.h"
49 #include "av1/encoder/aq_variance.h"
50 #include "av1/encoder/global_motion_facade.h"
51 #include "av1/encoder/encodeframe.h"
52 #include "av1/encoder/encodeframe_utils.h"
53 #include "av1/encoder/encodemb.h"
54 #include "av1/encoder/encodemv.h"
55 #include "av1/encoder/encodetxb.h"
56 #include "av1/encoder/ethread.h"
57 #include "av1/encoder/extend.h"
58 #include "av1/encoder/intra_mode_search_utils.h"
59 #include "av1/encoder/ml.h"
60 #include "av1/encoder/motion_search_facade.h"
61 #include "av1/encoder/partition_strategy.h"
62 #if !CONFIG_REALTIME_ONLY
63 #include "av1/encoder/partition_model_weights.h"
64 #endif
65 #include "av1/encoder/partition_search.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/segmentation.h"
70 #include "av1/encoder/tokenize.h"
71 #include "av1/encoder/tpl_model.h"
72 #include "av1/encoder/var_based_part.h"
73 
74 #if CONFIG_TUNE_VMAF
75 #include "av1/encoder/tune_vmaf.h"
76 #endif
77 
78 /*!\cond */
79 // This is used as a reference when computing the source variance for the
80 //  purposes of activity masking.
81 // Eventually this should be replaced by custom no-reference routines,
82 //  which will be faster.
83 const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
84   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
85   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92   128, 128, 128, 128, 128, 128, 128, 128
93 };
94 
95 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
96   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
97   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
98   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104   128, 128, 128, 128, 128, 128, 128, 128
105 };
106 
107 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
108   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
109   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
110   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
124 };
125 
126 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
127   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
128   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
129   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145   128 * 16, 128 * 16
146 };
147 /*!\endcond */
148 
av1_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)149 unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
150                                            const struct buf_2d *ref,
151                                            BLOCK_SIZE bs) {
152   unsigned int sse;
153   const unsigned int var =
154       cpi->ppi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
155   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
156 }
157 
av1_high_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)158 unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
159                                                 const struct buf_2d *ref,
160                                                 BLOCK_SIZE bs, int bd) {
161   unsigned int var, sse;
162   assert(bd == 8 || bd == 10 || bd == 12);
163   const int off_index = (bd - 8) >> 1;
164   const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
165                                        AV1_HIGH_VAR_OFFS_10,
166                                        AV1_HIGH_VAR_OFFS_12 };
167   var = cpi->ppi->fn_ptr[bs].vf(ref->buf, ref->stride,
168                                 CONVERT_TO_BYTEPTR(high_var_offs[off_index]), 0,
169                                 &sse);
170   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
171 }
172 
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)173 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
174                           int mi_row, int mi_col, const int num_planes,
175                           BLOCK_SIZE bsize) {
176   // Set current frame pointer.
177   x->e_mbd.cur_buf = src;
178 
179   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
180   // the static analysis warnings.
181   for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
182     const int is_uv = i > 0;
183     setup_pred_plane(
184         &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
185         src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
186         x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
187   }
188 }
189 
190 #if !CONFIG_REALTIME_ONLY
191 /*!\brief Assigns different quantization parameters to each super
192  * block based on its TPL weight.
193  *
194  * \ingroup tpl_modelling
195  *
196  * \param[in]     cpi         Top level encoder instance structure
197  * \param[in,out] td          Thread data structure
198  * \param[in,out] x           Macro block level data for this block.
199  * \param[in]     tile_info   Tile infromation / identification
200  * \param[in]     mi_row      Block row (in "MI_SIZE" units) index
201  * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
202  * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
203  *
204  * \return No return value but updates macroblock and thread data
205  * related to the q / q delta to be used.
206  */
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)207 static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
208                                      MACROBLOCK *const x,
209                                      const TileInfo *const tile_info,
210                                      int mi_row, int mi_col, int num_planes) {
211   AV1_COMMON *const cm = &cpi->common;
212   const CommonModeInfoParams *const mi_params = &cm->mi_params;
213   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
214   assert(delta_q_info->delta_q_present_flag);
215 
216   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
217   // Delta-q modulation based on variance
218   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
219 
220   const int delta_q_res = delta_q_info->delta_q_res;
221   int current_qindex = cm->quant_params.base_qindex;
222   if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL) {
223     if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
224       const int block_wavelet_energy_level =
225           av1_block_wavelet_energy_level(cpi, x, sb_size);
226       x->sb_energy_level = block_wavelet_energy_level;
227       current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
228           cpi, block_wavelet_energy_level);
229     } else {
230       const int block_var_level = av1_log_block_var(cpi, x, sb_size);
231       x->sb_energy_level = block_var_level;
232       current_qindex =
233           av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
234     }
235   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_OBJECTIVE &&
236              cpi->oxcf.algo_cfg.enable_tpl_model) {
237     // Setup deltaq based on tpl stats
238     current_qindex =
239         av1_get_q_for_deltaq_objective(cpi, sb_size, mi_row, mi_col);
240   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL_AI) {
241     current_qindex = av1_get_sbq_perceptual_ai(cpi, sb_size, mi_row, mi_col);
242   } else if (cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_USER_RATING_BASED) {
243     current_qindex = av1_get_sbq_user_rating_based(cpi, mi_row, mi_col);
244   }
245 
246   MACROBLOCKD *const xd = &x->e_mbd;
247   current_qindex = av1_adjust_q_from_delta_q_res(
248       delta_q_res, xd->current_base_qindex, current_qindex);
249 
250   x->delta_qindex = current_qindex - cm->quant_params.base_qindex;
251   av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
252   xd->mi[0]->current_qindex = current_qindex;
253   av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
254 
255   // keep track of any non-zero delta-q used
256   td->deltaq_used |= (x->delta_qindex != 0);
257 
258   if (cpi->oxcf.tool_cfg.enable_deltalf_mode) {
259     const int delta_lf_res = delta_q_info->delta_lf_res;
260     const int lfmask = ~(delta_lf_res - 1);
261     const int delta_lf_from_base =
262         ((x->delta_qindex / 4 + delta_lf_res / 2) & lfmask);
263     const int8_t delta_lf =
264         (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
265     const int frame_lf_count =
266         av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
267     const int mib_size = cm->seq_params->mib_size;
268 
269     // pre-set the delta lf for loop filter. Note that this value is set
270     // before mi is assigned for each block in current superblock
271     for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
272       for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
273         const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
274         mi_params->mi_alloc[grid_idx].delta_lf_from_base = delta_lf;
275         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
276           mi_params->mi_alloc[grid_idx].delta_lf[lf_id] = delta_lf;
277         }
278       }
279     }
280   }
281 }
282 
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)283 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
284                                  int mi_col) {
285   const AV1_COMMON *cm = &cpi->common;
286   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
287   const CommonModeInfoParams *const mi_params = &cm->mi_params;
288   MACROBLOCK *x = &td->mb;
289   const int frame_idx = cpi->gf_frame_index;
290   TplParams *const tpl_data = &cpi->ppi->tpl_data;
291   const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
292 
293   av1_zero(x->tpl_keep_ref_frame);
294 
295   if (!av1_tpl_stats_ready(tpl_data, frame_idx)) return;
296   if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
297   if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
298 
299   const int is_overlay =
300       cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
301   if (is_overlay) {
302     memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
303     return;
304   }
305 
306   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
307   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
308   const int tpl_stride = tpl_frame->stride;
309   int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
310   const int step = 1 << block_mis_log2;
311   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
312 
313   const int mi_row_end =
314       AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
315   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
316   const int mi_col_sr =
317       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
318   const int mi_col_end_sr =
319       AOMMIN(coded_to_superres_mi(mi_col + mi_size_wide[sb_size],
320                                   cm->superres_scale_denominator),
321              mi_cols_sr);
322   const int row_step = step;
323   const int col_step_sr =
324       coded_to_superres_mi(step, cm->superres_scale_denominator);
325   for (int row = mi_row; row < mi_row_end; row += row_step) {
326     for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
327       const TplDepStats *this_stats =
328           &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
329       int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
330       // Find the winner ref frame idx for the current block
331       int64_t best_inter_cost = this_stats->pred_error[0];
332       int best_rf_idx = 0;
333       for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
334         if ((this_stats->pred_error[idx] < best_inter_cost) &&
335             (this_stats->pred_error[idx] != 0)) {
336           best_inter_cost = this_stats->pred_error[idx];
337           best_rf_idx = idx;
338         }
339       }
340       // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
341       // LAST_FRAME.
342       tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
343                                     this_stats->pred_error[LAST_FRAME - 1];
344 
345       for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
346         inter_cost[rf_idx] += tpl_pred_error[rf_idx];
347     }
348   }
349 
350   int rank_index[INTER_REFS_PER_FRAME - 1];
351   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
352     rank_index[idx] = idx + 1;
353     for (int i = idx; i > 0; --i) {
354       if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
355         const int tmp = rank_index[i - 1];
356         rank_index[i - 1] = rank_index[i];
357         rank_index[i] = tmp;
358       }
359     }
360   }
361 
362   x->tpl_keep_ref_frame[INTRA_FRAME] = 1;
363   x->tpl_keep_ref_frame[LAST_FRAME] = 1;
364 
365   int cutoff_ref = 0;
366   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
367     x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
368     if (idx > 2) {
369       if (!cutoff_ref) {
370         // If the predictive coding gains are smaller than the previous more
371         // relevant frame over certain amount, discard this frame and all the
372         // frames afterwards.
373         if (llabs(inter_cost[rank_index[idx]]) <
374                 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
375             inter_cost[rank_index[idx]] == 0)
376           cutoff_ref = 1;
377       }
378 
379       if (cutoff_ref) x->tpl_keep_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
380     }
381   }
382 }
383 
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)384 static AOM_INLINE void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
385                                                int mi_row, int mi_col) {
386   const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
387   const int orig_rdmult = cpi->rd.RDMULT;
388 
389   assert(IMPLIES(cpi->ppi->gf_group.size > 0,
390                  cpi->gf_frame_index < cpi->ppi->gf_group.size));
391   const int gf_group_index = cpi->gf_frame_index;
392   if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
393       cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
394       cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
395     const int dr =
396         av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
397     x->rdmult = dr;
398   }
399 }
400 #endif  // !CONFIG_REALTIME_ONLY
401 
402 #if CONFIG_RT_ML_PARTITIONING
403 // Get a prediction(stored in x->est_pred) for the whole superblock.
get_estimated_pred(AV1_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)404 static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
405                                MACROBLOCK *x, int mi_row, int mi_col) {
406   AV1_COMMON *const cm = &cpi->common;
407   const int is_key_frame = frame_is_intra_only(cm);
408   MACROBLOCKD *xd = &x->e_mbd;
409 
410   // TODO(kyslov) Extend to 128x128
411   assert(cm->seq_params->sb_size == BLOCK_64X64);
412 
413   av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
414 
415   if (!is_key_frame) {
416     MB_MODE_INFO *mi = xd->mi[0];
417     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
418 
419     assert(yv12 != NULL);
420 
421     av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
422                          get_ref_scale_factors(cm, LAST_FRAME), 1);
423     mi->ref_frame[0] = LAST_FRAME;
424     mi->ref_frame[1] = NONE;
425     mi->bsize = BLOCK_64X64;
426     mi->mv[0].as_int = 0;
427     mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
428 
429     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
430 
431     xd->plane[0].dst.buf = x->est_pred;
432     xd->plane[0].dst.stride = 64;
433     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
434   } else {
435 #if CONFIG_AV1_HIGHBITDEPTH
436     switch (xd->bd) {
437       case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
438       case 10:
439         memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
440         break;
441       case 12:
442         memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
443         break;
444     }
445 #else
446     memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
447 #endif  // CONFIG_VP9_HIGHBITDEPTH
448   }
449 }
450 #endif  // CONFIG_RT_ML_PARTITIONING
451 
452 #define AVG_CDF_WEIGHT_LEFT 3
453 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
454 
455 /*!\brief Encode a superblock (minimal RD search involved)
456  *
457  * \ingroup partition_search
458  * Encodes the superblock by a pre-determined partition pattern, only minor
459  * rd-based searches are allowed to adjust the initial pattern. It is only used
460  * by realtime encoding.
461  */
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)462 static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
463                                        TileDataEnc *tile_data, TokenExtra **tp,
464                                        const int mi_row, const int mi_col,
465                                        const int seg_skip) {
466   AV1_COMMON *const cm = &cpi->common;
467   MACROBLOCK *const x = &td->mb;
468   const SPEED_FEATURES *const sf = &cpi->sf;
469   const TileInfo *const tile_info = &tile_data->tile_info;
470   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
471                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
472   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
473 
474   // Grade the temporal variation of the sb, the grade will be used to decide
475   // fast mode search strategy for coding blocks
476   if (sf->rt_sf.source_metrics_sb_nonrd &&
477       cpi->svc.number_spatial_layers <= 1 &&
478       cm->current_frame.frame_type != KEY_FRAME) {
479     int offset = cpi->source->y_stride * (mi_row << 2) + (mi_col << 2);
480     av1_source_content_sb(cpi, x, offset);
481   }
482 #if CONFIG_RT_ML_PARTITIONING
483   if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
484     PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
485     RD_STATS dummy_rdc;
486     get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
487     av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
488                              BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
489     av1_free_pc_tree_recursive(pc_root, av1_num_planes(cm), 0, 0);
490     return;
491   }
492 #endif
493   // Set the partition
494   if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
495     // set a fixed-size partition
496     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
497     const BLOCK_SIZE bsize =
498         seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
499     av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
500   } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
501     // set a variance-based partition
502     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
503     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
504   }
505   assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
506          sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
507   set_cb_offsets(td->mb.cb_offset, 0, 0);
508 
509   // Adjust and encode the superblock
510   PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
511 
512   // Initialize the flag to skip cdef to 1.
513   if (sf->rt_sf.skip_cdef_sb) {
514     // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
515     // "blocks".
516     const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
517     for (int r = 0; r < block64_in_sb; ++r) {
518       for (int c = 0; c < block64_in_sb; ++c) {
519         const int idx_in_sb =
520             r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
521         if (mi[idx_in_sb]) mi[idx_in_sb]->skip_cdef_curr_sb = 1;
522       }
523     }
524   }
525 
526   av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
527                           pc_root);
528 
529   if (sf->rt_sf.skip_cdef_sb) {
530     // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
531     // "blocks".
532     const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
533     const int skip = mi[0]->skip_cdef_curr_sb;
534     for (int r = 0; r < block64_in_sb; ++r) {
535       for (int c = 0; c < block64_in_sb; ++c) {
536         const int idx_in_sb =
537             r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
538         if (mi[idx_in_sb]) mi[idx_in_sb]->skip_cdef_curr_sb = skip;
539       }
540     }
541   }
542   av1_free_pc_tree_recursive(pc_root, av1_num_planes(cm), 0, 0);
543 }
544 
545 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,SIMPLE_MOTION_DATA_TREE * sms_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)546 static INLINE void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
547                                      const TileDataEnc *tile_data,
548                                      SIMPLE_MOTION_DATA_TREE *sms_root,
549                                      RD_STATS *rd_cost, int mi_row, int mi_col,
550                                      int gather_tpl_data) {
551   const AV1_COMMON *cm = &cpi->common;
552   const TileInfo *tile_info = &tile_data->tile_info;
553   MACROBLOCK *x = &td->mb;
554 
555   const SPEED_FEATURES *sf = &cpi->sf;
556   const int use_simple_motion_search =
557       (sf->part_sf.simple_motion_search_split ||
558        sf->part_sf.simple_motion_search_prune_rect ||
559        sf->part_sf.simple_motion_search_early_term_none ||
560        sf->part_sf.ml_early_term_after_part_split_level) &&
561       !frame_is_intra_only(cm);
562   if (use_simple_motion_search) {
563     av1_init_simple_motion_search_mvs_for_sb(cpi, tile_info, x, sms_root,
564                                              mi_row, mi_col);
565   }
566 
567 #if !CONFIG_REALTIME_ONLY
568   if (has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
569       cpi->oxcf.gf_cfg.lag_in_frames == 0) {
570     (void)tile_info;
571     (void)mi_row;
572     (void)mi_col;
573     (void)gather_tpl_data;
574   } else {
575     init_ref_frame_space(cpi, td, mi_row, mi_col);
576     x->sb_energy_level = 0;
577     x->part_search_info.cnn_output_valid = 0;
578     if (gather_tpl_data) {
579       if (cm->delta_q_info.delta_q_present_flag) {
580         const int num_planes = av1_num_planes(cm);
581         const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
582         setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
583         av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
584       }
585       if (cpi->oxcf.algo_cfg.enable_tpl_model) {
586         adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
587       }
588     }
589   }
590 #else
591   (void)tile_info;
592   (void)mi_row;
593   (void)mi_col;
594   (void)gather_tpl_data;
595 #endif
596 
597   // Reset hash state for transform/mode rd hash information
598   reset_hash_records(&x->txfm_search_info, cpi->sf.tx_sf.use_inter_txb_hash);
599   av1_zero(x->picked_ref_frames_mask);
600   av1_invalid_rd_stats(rd_cost);
601 }
602 
603 /*!\brief Encode a superblock (RD-search-based)
604  *
605  * \ingroup partition_search
606  * Conducts partition search for a superblock, based on rate-distortion costs,
607  * from scratch or adjusting from a pre-calculated partition pattern.
608  */
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TokenExtra ** tp,const int mi_row,const int mi_col,const int seg_skip)609 static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
610                                     TileDataEnc *tile_data, TokenExtra **tp,
611                                     const int mi_row, const int mi_col,
612                                     const int seg_skip) {
613   AV1_COMMON *const cm = &cpi->common;
614   MACROBLOCK *const x = &td->mb;
615   const SPEED_FEATURES *const sf = &cpi->sf;
616   const TileInfo *const tile_info = &tile_data->tile_info;
617   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
618                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
619   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
620   const int num_planes = av1_num_planes(cm);
621   int dummy_rate;
622   int64_t dummy_dist;
623   RD_STATS dummy_rdc;
624   SIMPLE_MOTION_DATA_TREE *const sms_root = td->sms_root;
625 
626 #if CONFIG_REALTIME_ONLY
627   (void)seg_skip;
628 #endif  // CONFIG_REALTIME_ONLY
629 
630   init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
631                     1);
632 
633   // Encode the superblock
634   if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
635 #if CONFIG_COLLECT_COMPONENT_TIMING
636     start_timing(cpi, rd_use_partition_time);
637 #endif
638     // partition search starting from a variance-based partition
639     av1_set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col,
640                                        sb_size);
641     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
642     PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
643     av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
644                          &dummy_rate, &dummy_dist, 1, pc_root);
645     av1_free_pc_tree_recursive(pc_root, num_planes, 0, 0);
646 #if CONFIG_COLLECT_COMPONENT_TIMING
647     end_timing(cpi, rd_use_partition_time);
648 #endif
649   }
650 #if !CONFIG_REALTIME_ONLY
651   else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
652     // partition search by adjusting a fixed-size partition
653     av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
654     const BLOCK_SIZE bsize =
655         seg_skip ? sb_size : sf->part_sf.fixed_partition_size;
656     av1_set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
657     PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
658     av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
659                          &dummy_rate, &dummy_dist, 1, pc_root);
660     av1_free_pc_tree_recursive(pc_root, num_planes, 0, 0);
661   } else {
662     // The most exhaustive recursive partition search
663     SuperBlockEnc *sb_enc = &x->sb_enc;
664     // No stats for overlay frames. Exclude key frame.
665     av1_get_tpl_stats_sb(cpi, sb_size, mi_row, mi_col, sb_enc);
666 
667     // Reset the tree for simple motion search data
668     av1_reset_simple_motion_tree_partition(sms_root, sb_size);
669 
670 #if CONFIG_COLLECT_COMPONENT_TIMING
671     start_timing(cpi, rd_pick_partition_time);
672 #endif
673 
674     // Estimate the maximum square partition block size, which will be used
675     // as the starting block size for partitioning the sb
676     set_max_min_partition_size(sb_enc, cpi, x, sf, sb_size, mi_row, mi_col);
677 
678     // The superblock can be searched only once, or twice consecutively for
679     // better quality. Note that the meaning of passes here is different from
680     // the general concept of 1-pass/2-pass encoders.
681     const int num_passes =
682         cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
683 
684     if (num_passes == 1) {
685 #if CONFIG_PARTITION_SEARCH_ORDER
686       if (cpi->ext_part_controller.ready && !frame_is_intra_only(cm)) {
687         av1_reset_part_sf(&cpi->sf.part_sf);
688         RD_STATS this_rdc;
689         av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row,
690                                 mi_col, sb_size, &this_rdc);
691       } else {
692         PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
693         av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
694                               &dummy_rdc, dummy_rdc, pc_root, sms_root, NULL,
695                               SB_SINGLE_PASS, NULL);
696       }
697 #else
698       PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
699       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
700                             &dummy_rdc, dummy_rdc, pc_root, sms_root, NULL,
701                             SB_SINGLE_PASS, NULL);
702 #endif  // CONFIG_PARTITION_SEARCH_ORDER
703     } else {
704       // First pass
705       SB_FIRST_PASS_STATS sb_fp_stats;
706       av1_backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
707       PC_TREE *const pc_root_p0 = av1_alloc_pc_tree_node(sb_size);
708       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
709                             &dummy_rdc, dummy_rdc, pc_root_p0, sms_root, NULL,
710                             SB_DRY_PASS, NULL);
711 
712       // Second pass
713       init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
714                         mi_col, 0);
715       av1_reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
716       av1_reset_simple_motion_tree_partition(sms_root, sb_size);
717 
718       av1_restore_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
719 
720       PC_TREE *const pc_root_p1 = av1_alloc_pc_tree_node(sb_size);
721       av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
722                             &dummy_rdc, dummy_rdc, pc_root_p1, sms_root, NULL,
723                             SB_WET_PASS, NULL);
724     }
725     // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
726     sb_enc->tpl_data_count = 0;
727 #if CONFIG_COLLECT_COMPONENT_TIMING
728     end_timing(cpi, rd_pick_partition_time);
729 #endif
730   }
731 #endif  // !CONFIG_REALTIME_ONLY
732 
733   // Update the inter rd model
734   // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
735   if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
736       cm->tiles.cols == 1 && cm->tiles.rows == 1) {
737     av1_inter_mode_data_fit(tile_data, x->rdmult);
738   }
739 }
740 
is_rtc_mode(const CostUpdateFreq * cost_upd_freq,MODE mode)741 static AOM_INLINE int is_rtc_mode(const CostUpdateFreq *cost_upd_freq,
742                                   MODE mode) {
743   return ((mode == REALTIME) && cost_upd_freq->coeff >= 2 &&
744           cost_upd_freq->mode >= 2 && cost_upd_freq->mv >= 2 &&
745           cost_upd_freq->dv >= 2);
746 }
747 
748 /*!\brief Encode a superblock row by breaking it into superblocks
749  *
750  * \ingroup partition_search
751  * \callgraph
752  * \callergraph
753  * Do partition and mode search for an sb row: one row of superblocks filling up
754  * the width of the current tile.
755  */
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TokenExtra ** tp)756 static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
757                                      TileDataEnc *tile_data, int mi_row,
758                                      TokenExtra **tp) {
759   AV1_COMMON *const cm = &cpi->common;
760   const TileInfo *const tile_info = &tile_data->tile_info;
761   MultiThreadInfo *const mt_info = &cpi->mt_info;
762   AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
763   AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
764   bool row_mt_enabled = mt_info->row_mt_enabled;
765   MACROBLOCK *const x = &td->mb;
766   MACROBLOCKD *const xd = &x->e_mbd;
767   const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_data->tile_info);
768   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
769   const int mib_size = cm->seq_params->mib_size;
770   const int mib_size_log2 = cm->seq_params->mib_size_log2;
771   const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
772   const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
773   const CostUpdateFreq *const cost_upd_freq = &cpi->oxcf.cost_upd_freq;
774   const int rtc_mode = is_rtc_mode(cost_upd_freq, cpi->oxcf.mode);
775 
776 #if CONFIG_COLLECT_COMPONENT_TIMING
777   start_timing(cpi, encode_sb_row_time);
778 #endif
779 
780   // Initialize the left context for the new SB row
781   av1_zero_left_context(xd);
782 
783   // Reset delta for quantizer and loof filters at the beginning of every tile
784   if (mi_row == tile_info->mi_row_start || row_mt_enabled) {
785     if (cm->delta_q_info.delta_q_present_flag)
786       xd->current_base_qindex = cm->quant_params.base_qindex;
787     if (cm->delta_q_info.delta_lf_present_flag) {
788       av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
789     }
790   }
791 
792   reset_thresh_freq_fact(x);
793 
794   // Code each SB in the row
795   for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
796        mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
797     // In realtime mode and when frequency of cost updates is off/tile, wait for
798     // the top superblock to finish encoding. Otherwise, wait for the top-right
799     // superblock to finish encoding.
800     (*(enc_row_mt->sync_read_ptr))(row_mt_sync, sb_row,
801                                    sb_col_in_tile - rtc_mode);
802     const int update_cdf = tile_data->allow_update_cdf && row_mt_enabled;
803     if (update_cdf && (tile_info->mi_row_start != mi_row)) {
804       if ((tile_info->mi_col_start == mi_col)) {
805         // restore frame context at the 1st column sb
806         memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
807       } else {
808         // update context
809         int wt_left = AVG_CDF_WEIGHT_LEFT;
810         int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
811         if (tile_info->mi_col_end > (mi_col + mib_size))
812           av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile,
813                               wt_left, wt_tr);
814         else
815           av1_avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
816                               wt_left, wt_tr);
817       }
818     }
819 
820     // Update the rate cost tables for some symbols
821     av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
822 
823     // Reset color coding related parameters
824     x->color_sensitivity_sb[0] = 0;
825     x->color_sensitivity_sb[1] = 0;
826     x->color_sensitivity[0] = 0;
827     x->color_sensitivity[1] = 0;
828     x->content_state_sb.source_sad = kMedSad;
829     x->content_state_sb.lighting_change = 0;
830     x->content_state_sb.low_sumdiff = 0;
831 
832     xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
833     x->source_variance = UINT_MAX;
834     td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
835 
836     // Get segment id and skip flag
837     const struct segmentation *const seg = &cm->seg;
838     int seg_skip = 0;
839     if (seg->enabled) {
840       const uint8_t *const map =
841           seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
842       const int segment_id =
843           map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
844               : 0;
845       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
846     }
847 
848     // Produce the gradient data at superblock level, when intra mode pruning
849     // based on hog is enabled.
850     if (cpi->sf.intra_sf.intra_pruning_with_hog ||
851         cpi->sf.intra_sf.chroma_intra_pruning_with_hog)
852       produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
853 
854     // encode the superblock
855     if (use_nonrd_mode) {
856       encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
857     } else {
858       encode_rd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
859     }
860 
861     // Update the top-right context in row_mt coding
862     if (update_cdf && (tile_info->mi_row_end > (mi_row + mib_size))) {
863       if (sb_cols_in_tile == 1)
864         memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
865       else if (sb_col_in_tile >= 1)
866         memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
867                sizeof(*xd->tile_ctx));
868     }
869     (*(enc_row_mt->sync_write_ptr))(row_mt_sync, sb_row, sb_col_in_tile,
870                                     sb_cols_in_tile);
871   }
872 #if CONFIG_COLLECT_COMPONENT_TIMING
873   end_timing(cpi, encode_sb_row_time);
874 #endif
875 }
876 
init_encode_frame_mb_context(AV1_COMP * cpi)877 static AOM_INLINE void init_encode_frame_mb_context(AV1_COMP *cpi) {
878   AV1_COMMON *const cm = &cpi->common;
879   const int num_planes = av1_num_planes(cm);
880   MACROBLOCK *const x = &cpi->td.mb;
881   MACROBLOCKD *const xd = &x->e_mbd;
882 
883   // Copy data over into macro block data structures.
884   av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
885                        cm->seq_params->sb_size);
886 
887   av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
888                          cm->seq_params->subsampling_y, num_planes);
889 }
890 
av1_alloc_tile_data(AV1_COMP * cpi)891 void av1_alloc_tile_data(AV1_COMP *cpi) {
892   AV1_COMMON *const cm = &cpi->common;
893   const int tile_cols = cm->tiles.cols;
894   const int tile_rows = cm->tiles.rows;
895 
896   if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
897   CHECK_MEM_ERROR(
898       cm, cpi->tile_data,
899       aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
900 
901   cpi->allocated_tiles = tile_cols * tile_rows;
902 }
903 
av1_init_tile_data(AV1_COMP * cpi)904 void av1_init_tile_data(AV1_COMP *cpi) {
905   AV1_COMMON *const cm = &cpi->common;
906   const int num_planes = av1_num_planes(cm);
907   const int tile_cols = cm->tiles.cols;
908   const int tile_rows = cm->tiles.rows;
909   int tile_col, tile_row;
910   TokenInfo *const token_info = &cpi->token_info;
911   TokenExtra *pre_tok = token_info->tile_tok[0][0];
912   TokenList *tplist = token_info->tplist[0][0];
913   unsigned int tile_tok = 0;
914   int tplist_count = 0;
915   const CostUpdateFreq *const cost_upd_freq = &cpi->oxcf.cost_upd_freq;
916   const int rtc_mode = is_rtc_mode(cost_upd_freq, cpi->oxcf.mode);
917 
918   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
919     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
920       TileDataEnc *const tile_data =
921           &cpi->tile_data[tile_row * tile_cols + tile_col];
922       TileInfo *const tile_info = &tile_data->tile_info;
923       av1_tile_init(tile_info, cm, tile_row, tile_col);
924       tile_data->firstpass_top_mv = kZeroMv;
925       tile_data->abs_sum_level = 0;
926 
927       if (pre_tok != NULL && tplist != NULL) {
928         token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
929         pre_tok = token_info->tile_tok[tile_row][tile_col];
930         tile_tok = allocated_tokens(
931             *tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
932             num_planes);
933         token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
934         tplist = token_info->tplist[tile_row][tile_col];
935         tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
936       }
937       tile_data->allow_update_cdf = !cm->tiles.large_scale;
938       tile_data->allow_update_cdf = tile_data->allow_update_cdf &&
939                                     !cm->features.disable_cdf_update &&
940                                     !rtc_mode;
941       tile_data->tctx = *cm->fc;
942     }
943   }
944 }
945 
946 /*!\brief Encode a superblock row
947  *
948  * \ingroup partition_search
949  */
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)950 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
951                        int tile_col, int mi_row) {
952   AV1_COMMON *const cm = &cpi->common;
953   const int num_planes = av1_num_planes(cm);
954   const int tile_cols = cm->tiles.cols;
955   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
956   const TileInfo *const tile_info = &this_tile->tile_info;
957   TokenExtra *tok = NULL;
958   TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
959   const int sb_row_in_tile =
960       (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
961   const int tile_mb_cols =
962       (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
963   const int num_mb_rows_in_sb =
964       ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
965 
966   get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
967                 cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
968   assert(tplist != NULL);
969   tplist[sb_row_in_tile].start = tok;
970 
971   encode_sb_row(cpi, td, this_tile, mi_row, &tok);
972 
973   tplist[sb_row_in_tile].count =
974       (unsigned int)(tok - tplist[sb_row_in_tile].start);
975 
976   assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
977          get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
978                          cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
979                          num_planes));
980 
981   (void)tile_mb_cols;
982   (void)num_mb_rows_in_sb;
983 }
984 
985 /*!\brief Encode a tile
986  *
987  * \ingroup partition_search
988  */
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)989 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
990                      int tile_col) {
991   AV1_COMMON *const cm = &cpi->common;
992   TileDataEnc *const this_tile =
993       &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
994   const TileInfo *const tile_info = &this_tile->tile_info;
995 
996   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
997 
998   av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
999                          tile_info->mi_col_end, tile_row);
1000   av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
1001                          &td->mb.e_mbd);
1002 
1003   if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
1004     cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
1005 
1006   if (td->mb.txfm_search_info.txb_rd_records != NULL) {
1007     av1_crc32c_calculator_init(
1008         &td->mb.txfm_search_info.txb_rd_records->mb_rd_record.crc_calculator);
1009   }
1010 
1011   for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
1012        mi_row += cm->seq_params->mib_size) {
1013     av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
1014   }
1015   this_tile->abs_sum_level = td->abs_sum_level;
1016 }
1017 
1018 /*!\brief Break one frame into tiles and encode the tiles
1019  *
1020  * \ingroup partition_search
1021  *
1022  * \param[in]    cpi    Top-level encoder structure
1023  */
encode_tiles(AV1_COMP * cpi)1024 static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
1025   AV1_COMMON *const cm = &cpi->common;
1026   const int tile_cols = cm->tiles.cols;
1027   const int tile_rows = cm->tiles.rows;
1028   int tile_col, tile_row;
1029 
1030   MACROBLOCK *const mb = &cpi->td.mb;
1031   assert(IMPLIES(cpi->tile_data == NULL,
1032                  cpi->allocated_tiles < tile_cols * tile_rows));
1033   if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
1034 
1035   av1_init_tile_data(cpi);
1036   av1_alloc_mb_data(cm, mb, cpi->sf.rt_sf.use_nonrd_pick_mode);
1037 
1038   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
1039     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
1040       TileDataEnc *const this_tile =
1041           &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
1042       cpi->td.intrabc_used = 0;
1043       cpi->td.deltaq_used = 0;
1044       cpi->td.abs_sum_level = 0;
1045       cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
1046       cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
1047       // Reset cyclic refresh counters.
1048       av1_init_cyclic_refresh_counters(&cpi->td.mb);
1049 
1050       av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
1051       // Accumulate cyclic refresh params.
1052       if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
1053           !frame_is_intra_only(&cpi->common))
1054         av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
1055                                                &cpi->td.mb);
1056       cpi->intrabc_used |= cpi->td.intrabc_used;
1057       cpi->deltaq_used |= cpi->td.deltaq_used;
1058     }
1059   }
1060 
1061   av1_dealloc_mb_data(cm, mb);
1062 }
1063 
1064 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(const AV1_COMMON * const cm,RefFrameDistanceInfo * const ref_frame_dist_info,const int ref_frame_flags)1065 static AOM_INLINE void set_rel_frame_dist(
1066     const AV1_COMMON *const cm, RefFrameDistanceInfo *const ref_frame_dist_info,
1067     const int ref_frame_flags) {
1068   MV_REFERENCE_FRAME ref_frame;
1069   int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
1070   ref_frame_dist_info->nearest_past_ref = NONE_FRAME;
1071   ref_frame_dist_info->nearest_future_ref = NONE_FRAME;
1072   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
1073     ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
1074     if (ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
1075       int dist = av1_encoder_get_relative_dist(
1076           cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
1077           cm->current_frame.display_order_hint);
1078       ref_frame_dist_info->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
1079       // Get the nearest ref_frame in the past
1080       if (abs(dist) < min_past_dist && dist < 0) {
1081         ref_frame_dist_info->nearest_past_ref = ref_frame;
1082         min_past_dist = abs(dist);
1083       }
1084       // Get the nearest ref_frame in the future
1085       if (dist < min_future_dist && dist > 0) {
1086         ref_frame_dist_info->nearest_future_ref = ref_frame;
1087         min_future_dist = dist;
1088       }
1089     }
1090   }
1091 }
1092 
refs_are_one_sided(const AV1_COMMON * cm)1093 static INLINE int refs_are_one_sided(const AV1_COMMON *cm) {
1094   assert(!frame_is_intra_only(cm));
1095 
1096   int one_sided_refs = 1;
1097   const int cur_display_order_hint = cm->current_frame.display_order_hint;
1098   for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
1099     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
1100     if (buf == NULL) continue;
1101     if (av1_encoder_get_relative_dist(buf->display_order_hint,
1102                                       cur_display_order_hint) > 0) {
1103       one_sided_refs = 0;  // bwd reference
1104       break;
1105     }
1106   }
1107   return one_sided_refs;
1108 }
1109 
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])1110 static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
1111                                              int ref_order_hint[2]) {
1112   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
1113   ref_order_hint[0] = ref_order_hint[1] = 0;
1114   if (!skip_mode_info->skip_mode_allowed) return;
1115 
1116   const RefCntBuffer *const buf_0 =
1117       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
1118   const RefCntBuffer *const buf_1 =
1119       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
1120   assert(buf_0 != NULL && buf_1 != NULL);
1121 
1122   ref_order_hint[0] = buf_0->order_hint;
1123   ref_order_hint[1] = buf_1->order_hint;
1124 }
1125 
check_skip_mode_enabled(AV1_COMP * const cpi)1126 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
1127   AV1_COMMON *const cm = &cpi->common;
1128 
1129   av1_setup_skip_mode_allowed(cm);
1130   if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
1131 
1132   // Turn off skip mode if the temporal distances of the reference pair to the
1133   // current frame are different by more than 1 frame.
1134   const int cur_offset = (int)cm->current_frame.order_hint;
1135   int ref_offset[2];
1136   get_skip_mode_ref_offsets(cm, ref_offset);
1137   const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
1138                                             cur_offset, ref_offset[0]);
1139   const int cur_to_ref1 = abs(get_relative_dist(
1140       &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
1141   if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
1142 
1143   // High Latency: Turn off skip mode if all refs are fwd.
1144   if (cpi->all_one_sided_refs && cpi->oxcf.gf_cfg.lag_in_frames > 0) return 0;
1145 
1146   static const int flag_list[REF_FRAMES] = { 0,
1147                                              AOM_LAST_FLAG,
1148                                              AOM_LAST2_FLAG,
1149                                              AOM_LAST3_FLAG,
1150                                              AOM_GOLD_FLAG,
1151                                              AOM_BWD_FLAG,
1152                                              AOM_ALT2_FLAG,
1153                                              AOM_ALT_FLAG };
1154   const int ref_frame[2] = {
1155     cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
1156     cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
1157   };
1158   if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) ||
1159       !(cpi->ref_frame_flags & flag_list[ref_frame[1]]))
1160     return 0;
1161 
1162   return 1;
1163 }
1164 
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)1165 static AOM_INLINE void set_default_interp_skip_flags(
1166     const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
1167   const int num_planes = av1_num_planes(cm);
1168   interp_search_flags->default_interp_skip_flags =
1169       (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
1170                         : INTERP_SKIP_LUMA_SKIP_CHROMA;
1171 }
1172 
setup_prune_ref_frame_mask(AV1_COMP * cpi)1173 static AOM_INLINE void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
1174   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
1175        cpi->sf.inter_sf.disable_onesided_comp) &&
1176       cpi->all_one_sided_refs) {
1177     // Disable all compound references
1178     cpi->prune_ref_frame_mask = (1 << MODE_CTX_REF_FRAMES) - (1 << REF_FRAMES);
1179   } else if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
1180              cpi->sf.inter_sf.selective_ref_frame >= 2) {
1181     AV1_COMMON *const cm = &cpi->common;
1182     const int cur_frame_display_order_hint =
1183         cm->current_frame.display_order_hint;
1184     unsigned int *ref_display_order_hint =
1185         cm->cur_frame->ref_display_order_hint;
1186     const int arf2_dist = av1_encoder_get_relative_dist(
1187         ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
1188         cur_frame_display_order_hint);
1189     const int bwd_dist = av1_encoder_get_relative_dist(
1190         ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
1191         cur_frame_display_order_hint);
1192 
1193     for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
1194       MV_REFERENCE_FRAME rf[2];
1195       av1_set_ref_frame(rf, ref_idx);
1196       if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
1197           !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
1198         continue;
1199       }
1200 
1201       if (!cpi->all_one_sided_refs) {
1202         int ref_dist[2];
1203         for (int i = 0; i < 2; ++i) {
1204           ref_dist[i] = av1_encoder_get_relative_dist(
1205               ref_display_order_hint[rf[i] - LAST_FRAME],
1206               cur_frame_display_order_hint);
1207         }
1208 
1209         // One-sided compound is used only when all reference frames are
1210         // one-sided.
1211         if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
1212           cpi->prune_ref_frame_mask |= 1 << ref_idx;
1213         }
1214       }
1215 
1216       if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
1217           (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
1218           (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
1219         // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
1220         if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
1221           // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
1222           // reference to the current frame than ALTREF2_FRAME
1223           cpi->prune_ref_frame_mask |= 1 << ref_idx;
1224         }
1225       }
1226     }
1227   }
1228 }
1229 
1230 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction
1231  * for a single frame
1232  *
1233  * \ingroup high_level_algo
1234  */
encode_frame_internal(AV1_COMP * cpi)1235 static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
1236   ThreadData *const td = &cpi->td;
1237   MACROBLOCK *const x = &td->mb;
1238   AV1_COMMON *const cm = &cpi->common;
1239   CommonModeInfoParams *const mi_params = &cm->mi_params;
1240   FeatureFlags *const features = &cm->features;
1241   MACROBLOCKD *const xd = &x->e_mbd;
1242   RD_COUNTS *const rdc = &cpi->td.rd_counts;
1243   FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
1244   IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
1245   MultiThreadInfo *const mt_info = &cpi->mt_info;
1246   AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
1247   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1248   const DELTAQ_MODE deltaq_mode = oxcf->q_cfg.deltaq_mode;
1249   int i;
1250 
1251   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
1252     mi_params->setup_mi(mi_params);
1253   }
1254 
1255   set_mi_offsets(mi_params, xd, 0, 0);
1256 
1257   av1_zero(*td->counts);
1258   av1_zero(rdc->comp_pred_diff);
1259   av1_zero(rdc->tx_type_used);
1260   av1_zero(rdc->obmc_used);
1261   av1_zero(rdc->warped_used);
1262 
1263   // Reset the flag.
1264   cpi->intrabc_used = 0;
1265   // Need to disable intrabc when superres is selected
1266   if (av1_superres_scaled(cm)) {
1267     features->allow_intrabc = 0;
1268   }
1269 
1270   features->allow_intrabc &= (oxcf->kf_cfg.enable_intrabc);
1271 
1272   if (features->allow_warped_motion &&
1273       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1274     const FRAME_UPDATE_TYPE update_type =
1275         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1276     if (frame_probs->warped_probs[update_type] <
1277         cpi->sf.inter_sf.prune_warped_prob_thresh)
1278       features->allow_warped_motion = 0;
1279   }
1280 
1281   int hash_table_created = 0;
1282   if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
1283       !cpi->sf.rt_sf.use_nonrd_pick_mode) {
1284     // TODO(any): move this outside of the recoding loop to avoid recalculating
1285     // the hash table.
1286     // add to hash table
1287     const int pic_width = cpi->source->y_crop_width;
1288     const int pic_height = cpi->source->y_crop_height;
1289     uint32_t *block_hash_values[2][2];
1290     int8_t *is_block_same[2][3];
1291     int k, j;
1292 
1293     for (k = 0; k < 2; k++) {
1294       for (j = 0; j < 2; j++) {
1295         CHECK_MEM_ERROR(cm, block_hash_values[k][j],
1296                         aom_malloc(sizeof(uint32_t) * pic_width * pic_height));
1297       }
1298 
1299       for (j = 0; j < 3; j++) {
1300         CHECK_MEM_ERROR(cm, is_block_same[k][j],
1301                         aom_malloc(sizeof(int8_t) * pic_width * pic_height));
1302       }
1303     }
1304 
1305     av1_hash_table_init(intrabc_hash_info);
1306     av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table);
1307     hash_table_created = 1;
1308     av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
1309                                       block_hash_values[0], is_block_same[0]);
1310     // Hash data generated for screen contents is used for intraBC ME
1311     const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
1312     const int max_sb_size =
1313         (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
1314     int src_idx = 0;
1315     for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
1316       const int dst_idx = !src_idx;
1317       av1_generate_block_hash_value(
1318           intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
1319           block_hash_values[dst_idx], is_block_same[src_idx],
1320           is_block_same[dst_idx]);
1321       if (size >= min_alloc_size) {
1322         av1_add_to_hash_map_by_row_with_precal_data(
1323             &intrabc_hash_info->intrabc_hash_table, block_hash_values[dst_idx],
1324             is_block_same[dst_idx][2], pic_width, pic_height, size);
1325       }
1326     }
1327 
1328     for (k = 0; k < 2; k++) {
1329       for (j = 0; j < 2; j++) {
1330         aom_free(block_hash_values[k][j]);
1331       }
1332 
1333       for (j = 0; j < 3; j++) {
1334         aom_free(is_block_same[k][j]);
1335       }
1336     }
1337   }
1338 
1339   const CommonQuantParams *quant_params = &cm->quant_params;
1340   for (i = 0; i < MAX_SEGMENTS; ++i) {
1341     const int qindex =
1342         cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
1343                         : quant_params->base_qindex;
1344     xd->lossless[i] =
1345         qindex == 0 && quant_params->y_dc_delta_q == 0 &&
1346         quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
1347         quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
1348     if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
1349     xd->qindex[i] = qindex;
1350     if (xd->lossless[i]) {
1351       cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
1352     } else {
1353       cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
1354     }
1355   }
1356   features->coded_lossless = is_coded_lossless(cm, xd);
1357   features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
1358 
1359   // Fix delta q resolution for the moment
1360   cm->delta_q_info.delta_q_res = 0;
1361   if (cpi->oxcf.q_cfg.aq_mode != CYCLIC_REFRESH_AQ) {
1362     if (deltaq_mode == DELTA_Q_OBJECTIVE)
1363       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
1364     else if (deltaq_mode == DELTA_Q_PERCEPTUAL)
1365       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1366     else if (deltaq_mode == DELTA_Q_PERCEPTUAL_AI)
1367       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1368     else if (deltaq_mode == DELTA_Q_USER_RATING_BASED)
1369       cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
1370 
1371     // Set delta_q_present_flag before it is used for the first time
1372     cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
1373     cm->delta_q_info.delta_q_present_flag = deltaq_mode != NO_DELTA_Q;
1374 
1375     // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q
1376     // is used for ineligible frames. That effectively will turn off row_mt
1377     // usage. Note objective delta_q and tpl eligible frames are only altref
1378     // frames currently.
1379     const GF_GROUP *gf_group = &cpi->ppi->gf_group;
1380     if (cm->delta_q_info.delta_q_present_flag) {
1381       if (deltaq_mode == DELTA_Q_OBJECTIVE &&
1382           !is_frame_tpl_eligible(gf_group, cpi->gf_frame_index))
1383         cm->delta_q_info.delta_q_present_flag = 0;
1384     }
1385 
1386     // Reset delta_q_used flag
1387     cpi->deltaq_used = 0;
1388 
1389     cm->delta_q_info.delta_lf_present_flag =
1390         cm->delta_q_info.delta_q_present_flag &&
1391         oxcf->tool_cfg.enable_deltalf_mode;
1392     cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
1393 
1394     // update delta_q_present_flag and delta_lf_present_flag based on
1395     // base_qindex
1396     cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
1397     cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
1398   } else {
1399     cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
1400     cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
1401     cpi->cyclic_refresh->cnt_zeromv = 0;
1402   }
1403 
1404   av1_frame_init_quantizer(cpi);
1405 
1406   init_encode_frame_mb_context(cpi);
1407   set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
1408   if (cm->prev_frame && cm->prev_frame->seg.enabled)
1409     cm->last_frame_seg_map = cm->prev_frame->seg_map;
1410   else
1411     cm->last_frame_seg_map = NULL;
1412   if (features->allow_intrabc || features->coded_lossless) {
1413     av1_set_default_ref_deltas(cm->lf.ref_deltas);
1414     av1_set_default_mode_deltas(cm->lf.mode_deltas);
1415   } else if (cm->prev_frame) {
1416     memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
1417     memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
1418   }
1419   memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
1420   memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
1421 
1422   cpi->all_one_sided_refs =
1423       frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
1424 
1425   cpi->prune_ref_frame_mask = 0;
1426   // Figure out which ref frames can be skipped at frame level.
1427   setup_prune_ref_frame_mask(cpi);
1428 
1429   x->txfm_search_info.txb_split_count = 0;
1430 #if CONFIG_SPEED_STATS
1431   x->txfm_search_info.tx_search_count = 0;
1432 #endif  // CONFIG_SPEED_STATS
1433 
1434 #if !CONFIG_REALTIME_ONLY
1435 #if CONFIG_COLLECT_COMPONENT_TIMING
1436   start_timing(cpi, av1_compute_global_motion_time);
1437 #endif
1438   av1_compute_global_motion_facade(cpi);
1439 #if CONFIG_COLLECT_COMPONENT_TIMING
1440   end_timing(cpi, av1_compute_global_motion_time);
1441 #endif
1442 #endif  // !CONFIG_REALTIME_ONLY
1443 
1444 #if CONFIG_COLLECT_COMPONENT_TIMING
1445   start_timing(cpi, av1_setup_motion_field_time);
1446 #endif
1447   av1_calculate_ref_frame_side(cm);
1448   if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
1449 #if CONFIG_COLLECT_COMPONENT_TIMING
1450   end_timing(cpi, av1_setup_motion_field_time);
1451 #endif
1452 
1453   cm->current_frame.skip_mode_info.skip_mode_flag =
1454       check_skip_mode_enabled(cpi);
1455 
1456   // Initialization of skip mode cost depends on the value of
1457   // 'skip_mode_flag'. This initialization happens in the function
1458   // av1_fill_mode_rates(), which is in turn called in
1459   // av1_initialize_rd_consts(). Thus, av1_initialize_rd_consts()
1460   // has to be called after 'skip_mode_flag' is initialized.
1461   av1_initialize_rd_consts(cpi);
1462   av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
1463 
1464   enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
1465   enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
1466   mt_info->row_mt_enabled = 0;
1467 
1468   if (oxcf->row_mt && (mt_info->num_workers > 1)) {
1469     mt_info->row_mt_enabled = 1;
1470     enc_row_mt->sync_read_ptr = av1_row_mt_sync_read;
1471     enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
1472     av1_encode_tiles_row_mt(cpi);
1473   } else {
1474     if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1)
1475       av1_encode_tiles_mt(cpi);
1476     else
1477       encode_tiles(cpi);
1478   }
1479 
1480   // If intrabc is allowed but never selected, reset the allow_intrabc flag.
1481   if (features->allow_intrabc && !cpi->intrabc_used) {
1482     features->allow_intrabc = 0;
1483   }
1484   if (features->allow_intrabc) {
1485     cm->delta_q_info.delta_lf_present_flag = 0;
1486   }
1487 
1488   if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
1489     cm->delta_q_info.delta_q_present_flag = 0;
1490   }
1491 
1492   // Set the transform size appropriately before bitstream creation
1493   const MODE_EVAL_TYPE eval_type =
1494       cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
1495           ? WINNER_MODE_EVAL
1496           : DEFAULT_EVAL;
1497   const TX_SIZE_SEARCH_METHOD tx_search_type =
1498       cpi->winner_mode_params.tx_size_search_methods[eval_type];
1499   assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
1500   features->tx_mode = select_tx_mode(cm, tx_search_type);
1501 
1502 #if CONFIG_FRAME_PARALLEL_ENCODE
1503   // Retain the frame level probability update conditions for parallel frames.
1504   // These conditions will be consumed during postencode stage to update the
1505   // probability.
1506   if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1507     cpi->do_update_frame_probs_txtype[cpi->num_frame_recode] =
1508         cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats;
1509     cpi->do_update_frame_probs_obmc[cpi->num_frame_recode] =
1510         (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
1511          cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX);
1512     cpi->do_update_frame_probs_warp[cpi->num_frame_recode] =
1513         (features->allow_warped_motion &&
1514          cpi->sf.inter_sf.prune_warped_prob_thresh > 0);
1515     cpi->do_update_frame_probs_interpfilter[cpi->num_frame_recode] =
1516         (cm->current_frame.frame_type != KEY_FRAME &&
1517          cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
1518          features->interp_filter == SWITCHABLE);
1519   }
1520 #endif
1521 
1522   if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
1523       ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
1524         INT_MAX) &&
1525        (cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh != 0))) {
1526     const FRAME_UPDATE_TYPE update_type =
1527         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1528     for (i = 0; i < TX_SIZES_ALL; i++) {
1529       int sum = 0;
1530       int j;
1531       int left = MAX_TX_TYPE_PROB;
1532 
1533       for (j = 0; j < TX_TYPES; j++)
1534         sum += cpi->td.rd_counts.tx_type_used[i][j];
1535 
1536       for (j = TX_TYPES - 1; j >= 0; j--) {
1537         int update_txtype_frameprobs = 1;
1538         const int new_prob =
1539             sum ? MAX_TX_TYPE_PROB * cpi->td.rd_counts.tx_type_used[i][j] / sum
1540                 : (j ? 0 : MAX_TX_TYPE_PROB);
1541 #if CONFIG_FRAME_PARALLEL_ENCODE
1542         // Track the frame probabilities of parallel encode frames to update
1543         // during postencode stage.
1544         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1545           update_txtype_frameprobs = 0;
1546           cpi->frame_new_probs[cpi->num_frame_recode]
1547               .tx_type_probs[update_type][i][j] = new_prob;
1548         }
1549 #endif  // CONFIG_FRAME_PARALLEL_ENCODE
1550         if (update_txtype_frameprobs) {
1551           int prob =
1552               (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
1553           left -= prob;
1554           if (j == 0) prob += left;
1555           frame_probs->tx_type_probs[update_type][i][j] = prob;
1556         }
1557       }
1558     }
1559   }
1560 
1561   if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
1562       cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
1563     const FRAME_UPDATE_TYPE update_type =
1564         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1565 
1566     for (i = 0; i < BLOCK_SIZES_ALL; i++) {
1567       int sum = 0;
1568       int update_obmc_frameprobs = 1;
1569       for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
1570 
1571       const int new_prob =
1572           sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
1573 #if CONFIG_FRAME_PARALLEL_ENCODE
1574       // Track the frame probabilities of parallel encode frames to update
1575       // during postencode stage.
1576       if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1577         update_obmc_frameprobs = 0;
1578         cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
1579             new_prob;
1580       }
1581 #endif  // CONFIG_FRAME_PARALLEL_ENCODE
1582       if (update_obmc_frameprobs) {
1583         frame_probs->obmc_probs[update_type][i] =
1584             (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
1585       }
1586     }
1587   }
1588 
1589   if (features->allow_warped_motion &&
1590       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
1591     const FRAME_UPDATE_TYPE update_type =
1592         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1593     int update_warp_frameprobs = 1;
1594     int sum = 0;
1595     for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
1596     const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
1597 #if CONFIG_FRAME_PARALLEL_ENCODE
1598     // Track the frame probabilities of parallel encode frames to update
1599     // during postencode stage.
1600     if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1601       update_warp_frameprobs = 0;
1602       cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
1603           new_prob;
1604     }
1605 #endif  // CONFIG_FRAME_PARALLEL_ENCODE
1606     if (update_warp_frameprobs) {
1607       frame_probs->warped_probs[update_type] =
1608           (frame_probs->warped_probs[update_type] + new_prob) >> 1;
1609     }
1610   }
1611 
1612   if (cm->current_frame.frame_type != KEY_FRAME &&
1613       cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
1614       features->interp_filter == SWITCHABLE) {
1615     const FRAME_UPDATE_TYPE update_type =
1616         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1617 
1618     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
1619       int sum = 0;
1620       int j;
1621       int left = 1536;
1622 
1623       for (j = 0; j < SWITCHABLE_FILTERS; j++) {
1624         sum += cpi->td.counts->switchable_interp[i][j];
1625       }
1626 
1627       for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
1628         int update_interpfilter_frameprobs = 1;
1629         const int new_prob =
1630             sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
1631                 : (j ? 0 : 1536);
1632 #if CONFIG_FRAME_PARALLEL_ENCODE
1633         // Track the frame probabilities of parallel encode frames to update
1634         // during postencode stage.
1635         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
1636           update_interpfilter_frameprobs = 0;
1637           cpi->frame_new_probs[cpi->num_frame_recode]
1638               .switchable_interp_probs[update_type][i][j] = new_prob;
1639         }
1640 #endif  // CONFIG_FRAME_PARALLEL_ENCODE
1641         if (update_interpfilter_frameprobs) {
1642           int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
1643                       new_prob) >>
1644                      1;
1645           left -= prob;
1646           if (j == 0) prob += left;
1647           frame_probs->switchable_interp_probs[update_type][i][j] = prob;
1648         }
1649       }
1650     }
1651   }
1652   if (hash_table_created) {
1653     av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
1654   }
1655 }
1656 
1657 /*!\brief Setup reference frame buffers and encode a frame
1658  *
1659  * \ingroup high_level_algo
1660  * \callgraph
1661  * \callergraph
1662  *
1663  * \param[in]    cpi    Top-level encoder structure
1664  */
av1_encode_frame(AV1_COMP * cpi)1665 void av1_encode_frame(AV1_COMP *cpi) {
1666   AV1_COMMON *const cm = &cpi->common;
1667   CurrentFrame *const current_frame = &cm->current_frame;
1668   FeatureFlags *const features = &cm->features;
1669   const int num_planes = av1_num_planes(cm);
1670   // Indicates whether or not to use a default reduced set for ext-tx
1671   // rather than the potential full set of 16 transforms
1672   features->reduced_tx_set_used = cpi->oxcf.txfm_cfg.reduced_tx_type_set;
1673 
1674   // Make sure segment_id is no larger than last_active_segid.
1675   if (cm->seg.enabled && cm->seg.update_map) {
1676     const int mi_rows = cm->mi_params.mi_rows;
1677     const int mi_cols = cm->mi_params.mi_cols;
1678     const int last_active_segid = cm->seg.last_active_segid;
1679     uint8_t *map = cpi->enc_seg.map;
1680     for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
1681       for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
1682         map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
1683       }
1684       map += mi_cols;
1685     }
1686   }
1687 
1688   av1_setup_frame_buf_refs(cm);
1689   enforce_max_ref_frames(cpi, &cpi->ref_frame_flags,
1690                          cm->cur_frame->ref_display_order_hint,
1691                          cm->current_frame.display_order_hint);
1692   set_rel_frame_dist(&cpi->common, &cpi->ref_frame_dist_info,
1693                      cpi->ref_frame_flags);
1694   av1_setup_frame_sign_bias(cm);
1695 
1696 #if CONFIG_MISMATCH_DEBUG
1697   mismatch_reset_frame(num_planes);
1698 #else
1699   (void)num_planes;
1700 #endif
1701 
1702   if (cpi->sf.hl_sf.frame_parameter_update ||
1703       cpi->sf.rt_sf.use_comp_ref_nonrd) {
1704     RD_COUNTS *const rdc = &cpi->td.rd_counts;
1705 
1706     if (frame_is_intra_only(cm))
1707       current_frame->reference_mode = SINGLE_REFERENCE;
1708     else
1709       current_frame->reference_mode = REFERENCE_MODE_SELECT;
1710 
1711     features->interp_filter = SWITCHABLE;
1712     if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
1713 
1714     features->switchable_motion_mode = 1;
1715 
1716     rdc->compound_ref_used_flag = 0;
1717     rdc->skip_mode_used_flag = 0;
1718 
1719     encode_frame_internal(cpi);
1720 
1721     if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1722       // Use a flag that includes 4x4 blocks
1723       if (rdc->compound_ref_used_flag == 0) {
1724         current_frame->reference_mode = SINGLE_REFERENCE;
1725 #if CONFIG_ENTROPY_STATS
1726         av1_zero(cpi->td.counts->comp_inter);
1727 #endif  // CONFIG_ENTROPY_STATS
1728       }
1729     }
1730     // Re-check on the skip mode status as reference mode may have been
1731     // changed.
1732     SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
1733     if (frame_is_intra_only(cm) ||
1734         current_frame->reference_mode == SINGLE_REFERENCE) {
1735       skip_mode_info->skip_mode_allowed = 0;
1736       skip_mode_info->skip_mode_flag = 0;
1737     }
1738     if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
1739       skip_mode_info->skip_mode_flag = 0;
1740 
1741     if (!cm->tiles.large_scale) {
1742       if (features->tx_mode == TX_MODE_SELECT &&
1743           cpi->td.mb.txfm_search_info.txb_split_count == 0)
1744         features->tx_mode = TX_MODE_LARGEST;
1745     }
1746   } else {
1747     // This is needed if real-time speed setting is changed on the fly
1748     // from one using compound prediction to one using single reference.
1749     if (current_frame->reference_mode == REFERENCE_MODE_SELECT)
1750       current_frame->reference_mode = SINGLE_REFERENCE;
1751     encode_frame_internal(cpi);
1752   }
1753 }
1754