• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17 
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21 
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_ports/system_state.h"
27 
28 #if CONFIG_MISMATCH_DEBUG
29 #include "aom_util/debug_util.h"
30 #endif  // CONFIG_MISMATCH_DEBUG
31 
32 #include "av1/common/cfl.h"
33 #include "av1/common/common.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46 
47 #include "av1/encoder/aq_complexity.h"
48 #include "av1/encoder/aq_cyclicrefresh.h"
49 #include "av1/encoder/aq_variance.h"
50 #include "av1/encoder/corner_detect.h"
51 #include "av1/encoder/global_motion.h"
52 #include "av1/encoder/encodeframe.h"
53 #include "av1/encoder/encodemb.h"
54 #include "av1/encoder/encodemv.h"
55 #include "av1/encoder/encodetxb.h"
56 #include "av1/encoder/ethread.h"
57 #include "av1/encoder/extend.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/motion_search_facade.h"
60 #include "av1/encoder/partition_strategy.h"
61 #if !CONFIG_REALTIME_ONLY
62 #include "av1/encoder/partition_model_weights.h"
63 #endif
64 #include "av1/encoder/rd.h"
65 #include "av1/encoder/rdopt.h"
66 #include "av1/encoder/reconinter_enc.h"
67 #include "av1/encoder/segmentation.h"
68 #include "av1/encoder/tokenize.h"
69 #include "av1/encoder/tpl_model.h"
70 #include "av1/encoder/var_based_part.h"
71 
72 #if CONFIG_TUNE_VMAF
73 #include "av1/encoder/tune_vmaf.h"
74 #endif
75 
76 static AOM_INLINE void encode_superblock(const AV1_COMP *const cpi,
77                                          TileDataEnc *tile_data, ThreadData *td,
78                                          TOKENEXTRA **t, RUN_TYPE dry_run,
79                                          BLOCK_SIZE bsize, int *rate);
80 
81 // This is used as a reference when computing the source variance for the
82 //  purposes of activity masking.
83 // Eventually this should be replaced by custom no-reference routines,
84 //  which will be faster.
85 const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
86   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94   128, 128, 128, 128, 128, 128, 128, 128
95 };
96 
97 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
98   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106   128, 128, 128, 128, 128, 128, 128, 128
107 };
108 
109 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
110   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
126 };
127 
128 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
129   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147   128 * 16, 128 * 16
148 };
149 
150 typedef struct {
151   ENTROPY_CONTEXT a[MAX_MIB_SIZE * MAX_MB_PLANE];
152   ENTROPY_CONTEXT l[MAX_MIB_SIZE * MAX_MB_PLANE];
153   PARTITION_CONTEXT sa[MAX_MIB_SIZE];
154   PARTITION_CONTEXT sl[MAX_MIB_SIZE];
155   TXFM_CONTEXT *p_ta;
156   TXFM_CONTEXT *p_tl;
157   TXFM_CONTEXT ta[MAX_MIB_SIZE];
158   TXFM_CONTEXT tl[MAX_MIB_SIZE];
159 } RD_SEARCH_MACROBLOCK_CONTEXT;
160 
161 enum { PICK_MODE_RD = 0, PICK_MODE_NONRD };
162 
163 enum {
164   SB_SINGLE_PASS,  // Single pass encoding: all ctxs get updated normally
165   SB_DRY_PASS,     // First pass of multi-pass: does not update the ctxs
166   SB_WET_PASS      // Second pass of multi-pass: finalize and update the ctx
167 } UENUM1BYTE(SB_MULTI_PASS_MODE);
168 
169 // This struct is used to store the statistics used by sb-level multi-pass
170 // encoding. Currently, this is only used to make a copy of the state before we
171 // perform the first pass
172 typedef struct SB_FIRST_PASS_STATS {
173   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
174   RD_COUNTS rd_count;
175 
176   int split_count;
177   FRAME_COUNTS fc;
178   InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL];
179   int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES];
180   int current_qindex;
181 
182 #if CONFIG_INTERNAL_STATS
183   unsigned int mode_chosen_counts[MAX_MODES];
184 #endif  // CONFIG_INTERNAL_STATS
185 } SB_FIRST_PASS_STATS;
186 
av1_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)187 unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
188                                            const struct buf_2d *ref,
189                                            BLOCK_SIZE bs) {
190   unsigned int sse;
191   const unsigned int var =
192       cpi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
193   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
194 }
195 
av1_high_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)196 unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
197                                                 const struct buf_2d *ref,
198                                                 BLOCK_SIZE bs, int bd) {
199   unsigned int var, sse;
200   assert(bd == 8 || bd == 10 || bd == 12);
201   const int off_index = (bd - 8) >> 1;
202   const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
203                                        AV1_HIGH_VAR_OFFS_10,
204                                        AV1_HIGH_VAR_OFFS_12 };
205   var =
206       cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
207                          CONVERT_TO_BYTEPTR(high_var_offs[off_index]), 0, &sse);
208   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
209 }
210 
get_sby_perpixel_diff_variance(const AV1_COMP * const cpi,const struct buf_2d * ref,int mi_row,int mi_col,BLOCK_SIZE bs)211 static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi,
212                                                    const struct buf_2d *ref,
213                                                    int mi_row, int mi_col,
214                                                    BLOCK_SIZE bs) {
215   unsigned int sse, var;
216   uint8_t *last_y;
217   const YV12_BUFFER_CONFIG *last =
218       get_ref_frame_yv12_buf(&cpi->common, LAST_FRAME);
219 
220   assert(last != NULL);
221   last_y =
222       &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
223   var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
224   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
225 }
226 
get_rd_var_based_fixed_partition(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)227 static BLOCK_SIZE get_rd_var_based_fixed_partition(AV1_COMP *cpi, MACROBLOCK *x,
228                                                    int mi_row, int mi_col) {
229   unsigned int var = get_sby_perpixel_diff_variance(
230       cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
231   if (var < 8)
232     return BLOCK_64X64;
233   else if (var < 128)
234     return BLOCK_32X32;
235   else if (var < 2048)
236     return BLOCK_16X16;
237   else
238     return BLOCK_8X8;
239 }
240 
set_deltaq_rdmult(const AV1_COMP * const cpi,MACROBLOCKD * const xd)241 static int set_deltaq_rdmult(const AV1_COMP *const cpi, MACROBLOCKD *const xd) {
242   const AV1_COMMON *const cm = &cpi->common;
243   const CommonQuantParams *quant_params = &cm->quant_params;
244   return av1_compute_rd_mult(cpi, quant_params->base_qindex + xd->delta_qindex +
245                                       quant_params->y_dc_delta_q);
246 }
247 
set_ssim_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,const int mi_row,const int mi_col,int * const rdmult)248 static AOM_INLINE void set_ssim_rdmult(const AV1_COMP *const cpi,
249                                        MACROBLOCK *const x,
250                                        const BLOCK_SIZE bsize, const int mi_row,
251                                        const int mi_col, int *const rdmult) {
252   const AV1_COMMON *const cm = &cpi->common;
253 
254   const int bsize_base = BLOCK_16X16;
255   const int num_mi_w = mi_size_wide[bsize_base];
256   const int num_mi_h = mi_size_high[bsize_base];
257   const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
258   const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
259   const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
260   const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
261   int row, col;
262   double num_of_mi = 0.0;
263   double geom_mean_of_scale = 0.0;
264 
265   assert(cpi->oxcf.tuning == AOM_TUNE_SSIM);
266 
267   aom_clear_system_state();
268   for (row = mi_row / num_mi_w;
269        row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
270     for (col = mi_col / num_mi_h;
271          col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
272       const int index = row * num_cols + col;
273       geom_mean_of_scale += log(cpi->ssim_rdmult_scaling_factors[index]);
274       num_of_mi += 1.0;
275     }
276   }
277   geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi);
278 
279   *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
280   *rdmult = AOMMAX(*rdmult, 0);
281   set_error_per_bit(x, *rdmult);
282   aom_clear_system_state();
283 }
284 
get_hier_tpl_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,const int mi_row,const int mi_col,int orig_rdmult)285 static int get_hier_tpl_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
286                                const BLOCK_SIZE bsize, const int mi_row,
287                                const int mi_col, int orig_rdmult) {
288   const AV1_COMMON *const cm = &cpi->common;
289   assert(IMPLIES(cpi->gf_group.size > 0,
290                  cpi->gf_group.index < cpi->gf_group.size));
291   const int tpl_idx = cpi->gf_group.index;
292   const TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
293   MACROBLOCKD *const xd = &x->e_mbd;
294   const int deltaq_rdmult = set_deltaq_rdmult(cpi, xd);
295   if (tpl_frame->is_valid == 0) return deltaq_rdmult;
296   if (!is_frame_tpl_eligible((AV1_COMP *)cpi)) return deltaq_rdmult;
297   if (tpl_idx >= MAX_LAG_BUFFERS) return deltaq_rdmult;
298   if (cpi->superres_mode != SUPERRES_NONE) return deltaq_rdmult;
299   if (cpi->oxcf.aq_mode != NO_AQ) return deltaq_rdmult;
300 
301   const int bsize_base = BLOCK_16X16;
302   const int num_mi_w = mi_size_wide[bsize_base];
303   const int num_mi_h = mi_size_high[bsize_base];
304   const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
305   const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
306   const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
307   const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
308   int row, col;
309   double base_block_count = 0.0;
310   double geom_mean_of_scale = 0.0;
311   aom_clear_system_state();
312   for (row = mi_row / num_mi_w;
313        row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
314     for (col = mi_col / num_mi_h;
315          col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
316       const int index = row * num_cols + col;
317       geom_mean_of_scale += log(cpi->tpl_sb_rdmult_scaling_factors[index]);
318       base_block_count += 1.0;
319     }
320   }
321   geom_mean_of_scale = exp(geom_mean_of_scale / base_block_count);
322   int rdmult = (int)((double)orig_rdmult * geom_mean_of_scale + 0.5);
323   rdmult = AOMMAX(rdmult, 0);
324   set_error_per_bit(x, rdmult);
325   aom_clear_system_state();
326   if (bsize == cm->seq_params.sb_size) {
327     const int rdmult_sb = set_deltaq_rdmult(cpi, xd);
328     assert(rdmult_sb == rdmult);
329     (void)rdmult_sb;
330   }
331   return rdmult;
332 }
333 
set_segment_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,int8_t segment_id)334 static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
335                               int8_t segment_id) {
336   const AV1_COMMON *const cm = &cpi->common;
337   av1_init_plane_quantizers(cpi, x, segment_id);
338   aom_clear_system_state();
339   const int segment_qindex =
340       av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
341   return av1_compute_rd_mult(cpi,
342                              segment_qindex + cm->quant_params.y_dc_delta_q);
343 }
344 
setup_block_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize,AQ_MODE aq_mode,MB_MODE_INFO * mbmi)345 static AOM_INLINE void setup_block_rdmult(const AV1_COMP *const cpi,
346                                           MACROBLOCK *const x, int mi_row,
347                                           int mi_col, BLOCK_SIZE bsize,
348                                           AQ_MODE aq_mode, MB_MODE_INFO *mbmi) {
349   x->rdmult = cpi->rd.RDMULT;
350 
351   if (aq_mode != NO_AQ) {
352     assert(mbmi != NULL);
353     if (aq_mode == VARIANCE_AQ) {
354       if (cpi->vaq_refresh) {
355         const int energy = bsize <= BLOCK_16X16
356                                ? x->mb_energy
357                                : av1_log_block_var(cpi, x, bsize);
358         mbmi->segment_id = energy;
359       }
360       x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
361     } else if (aq_mode == COMPLEXITY_AQ) {
362       x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
363     } else if (aq_mode == CYCLIC_REFRESH_AQ) {
364       // If segment is boosted, use rdmult for that segment.
365       if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
366         x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
367     }
368   }
369 
370   const AV1_COMMON *const cm = &cpi->common;
371   if (cm->delta_q_info.delta_q_present_flag &&
372       !cpi->sf.rt_sf.use_nonrd_pick_mode) {
373     x->rdmult = get_hier_tpl_rdmult(cpi, x, bsize, mi_row, mi_col, x->rdmult);
374   }
375 
376   if (cpi->oxcf.tuning == AOM_TUNE_SSIM) {
377     set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
378   }
379 #if CONFIG_TUNE_VMAF
380   if (cpi->oxcf.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
381       cpi->oxcf.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
382     av1_set_vmaf_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
383   }
384 #endif
385 }
386 
set_offsets_without_segment_id(const AV1_COMP * const cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)387 static AOM_INLINE void set_offsets_without_segment_id(
388     const AV1_COMP *const cpi, const TileInfo *const tile, MACROBLOCK *const x,
389     int mi_row, int mi_col, BLOCK_SIZE bsize) {
390   const AV1_COMMON *const cm = &cpi->common;
391   const int num_planes = av1_num_planes(cm);
392   MACROBLOCKD *const xd = &x->e_mbd;
393   assert(bsize < BLOCK_SIZES_ALL);
394   const int mi_width = mi_size_wide[bsize];
395   const int mi_height = mi_size_high[bsize];
396 
397   set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
398                         mi_row, mi_col);
399 
400   set_entropy_context(xd, mi_row, mi_col, num_planes);
401   xd->above_txfm_context = cm->above_contexts.txfm[tile->tile_row] + mi_col;
402   xd->left_txfm_context =
403       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
404 
405   // Set up destination pointers.
406   av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0,
407                        num_planes);
408 
409   // Set up limit values for MV components.
410   // Mv beyond the range do not produce new/different prediction block.
411   av1_set_mv_limits(&cm->mi_params, &x->mv_limits, mi_row, mi_col, mi_height,
412                     mi_width, cpi->oxcf.border_in_pixels);
413 
414   set_plane_n4(xd, mi_width, mi_height, num_planes);
415 
416   // Set up distance of MB to edge of frame in 1/8th pel units.
417   assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
418   set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
419                  cm->mi_params.mi_rows, cm->mi_params.mi_cols);
420 
421   // Set up source buffers.
422   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
423 
424   // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
425   xd->tile = *tile;
426 }
427 
set_offsets(const AV1_COMP * const cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)428 static AOM_INLINE void set_offsets(const AV1_COMP *const cpi,
429                                    const TileInfo *const tile,
430                                    MACROBLOCK *const x, int mi_row, int mi_col,
431                                    BLOCK_SIZE bsize) {
432   const AV1_COMMON *const cm = &cpi->common;
433   const struct segmentation *const seg = &cm->seg;
434   MACROBLOCKD *const xd = &x->e_mbd;
435   MB_MODE_INFO *mbmi;
436 
437   set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
438 
439   // Setup segment ID.
440   mbmi = xd->mi[0];
441   mbmi->segment_id = 0;
442   if (seg->enabled) {
443     if (seg->enabled && !cpi->vaq_refresh) {
444       const uint8_t *const map =
445           seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
446       mbmi->segment_id =
447           map ? get_segment_id(&cm->mi_params, map, bsize, mi_row, mi_col) : 0;
448     }
449     av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
450   }
451 }
452 
update_filter_type_count(FRAME_COUNTS * counts,const MACROBLOCKD * xd,const MB_MODE_INFO * mbmi)453 static AOM_INLINE void update_filter_type_count(FRAME_COUNTS *counts,
454                                                 const MACROBLOCKD *xd,
455                                                 const MB_MODE_INFO *mbmi) {
456   int dir;
457   for (dir = 0; dir < 2; ++dir) {
458     const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
459     InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir);
460     ++counts->switchable_interp[ctx][filter];
461   }
462 }
463 
update_filter_type_cdf(const MACROBLOCKD * xd,const MB_MODE_INFO * mbmi)464 static AOM_INLINE void update_filter_type_cdf(const MACROBLOCKD *xd,
465                                               const MB_MODE_INFO *mbmi) {
466   int dir;
467   for (dir = 0; dir < 2; ++dir) {
468     const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
469     InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir);
470     update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx], filter,
471                SWITCHABLE_FILTERS);
472   }
473 }
474 
update_global_motion_used(PREDICTION_MODE mode,BLOCK_SIZE bsize,const MB_MODE_INFO * mbmi,RD_COUNTS * rdc)475 static AOM_INLINE void update_global_motion_used(PREDICTION_MODE mode,
476                                                  BLOCK_SIZE bsize,
477                                                  const MB_MODE_INFO *mbmi,
478                                                  RD_COUNTS *rdc) {
479   if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) {
480     const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize];
481     int ref;
482     for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
483       rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s;
484     }
485   }
486 }
487 
reset_tx_size(MACROBLOCK * x,MB_MODE_INFO * mbmi,const TX_MODE tx_mode)488 static AOM_INLINE void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi,
489                                      const TX_MODE tx_mode) {
490   MACROBLOCKD *const xd = &x->e_mbd;
491   if (xd->lossless[mbmi->segment_id]) {
492     mbmi->tx_size = TX_4X4;
493   } else if (tx_mode != TX_MODE_SELECT) {
494     mbmi->tx_size = tx_size_from_tx_mode(mbmi->sb_type, tx_mode);
495   } else {
496     BLOCK_SIZE bsize = mbmi->sb_type;
497     TX_SIZE min_tx_size = depth_to_tx_size(MAX_TX_DEPTH, bsize);
498     mbmi->tx_size = (TX_SIZE)TXSIZEMAX(mbmi->tx_size, min_tx_size);
499   }
500   if (is_inter_block(mbmi)) {
501     memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
502   }
503   const int stride = xd->tx_type_map_stride;
504   const int bw = mi_size_wide[mbmi->sb_type];
505   for (int row = 0; row < mi_size_high[mbmi->sb_type]; ++row) {
506     memset(xd->tx_type_map + row * stride, DCT_DCT,
507            bw * sizeof(xd->tx_type_map[0]));
508   }
509   av1_zero(x->blk_skip);
510   x->force_skip = 0;
511 }
512 
513 // This function will copy the best reference mode information from
514 // MB_MODE_INFO_EXT_FRAME to MB_MODE_INFO_EXT.
copy_mbmi_ext_frame_to_mbmi_ext(MB_MODE_INFO_EXT * mbmi_ext,const MB_MODE_INFO_EXT_FRAME * const mbmi_ext_best,uint8_t ref_frame_type)515 static INLINE void copy_mbmi_ext_frame_to_mbmi_ext(
516     MB_MODE_INFO_EXT *mbmi_ext,
517     const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_best, uint8_t ref_frame_type) {
518   memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack,
519          sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
520   memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight,
521          sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
522   mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context;
523   mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count;
524   memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs,
525          sizeof(mbmi_ext->global_mvs));
526 }
527 
update_state(const AV1_COMP * const cpi,ThreadData * td,const PICK_MODE_CONTEXT * const ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,RUN_TYPE dry_run)528 static AOM_INLINE void update_state(const AV1_COMP *const cpi, ThreadData *td,
529                                     const PICK_MODE_CONTEXT *const ctx,
530                                     int mi_row, int mi_col, BLOCK_SIZE bsize,
531                                     RUN_TYPE dry_run) {
532   int i, x_idx, y;
533   const AV1_COMMON *const cm = &cpi->common;
534   const CommonModeInfoParams *const mi_params = &cm->mi_params;
535   const int num_planes = av1_num_planes(cm);
536   RD_COUNTS *const rdc = &td->rd_counts;
537   MACROBLOCK *const x = &td->mb;
538   MACROBLOCKD *const xd = &x->e_mbd;
539   struct macroblock_plane *const p = x->plane;
540   struct macroblockd_plane *const pd = xd->plane;
541   const MB_MODE_INFO *const mi = &ctx->mic;
542   MB_MODE_INFO *const mi_addr = xd->mi[0];
543   const struct segmentation *const seg = &cm->seg;
544   const int bw = mi_size_wide[mi->sb_type];
545   const int bh = mi_size_high[mi->sb_type];
546   const int mis = mi_params->mi_stride;
547   const int mi_width = mi_size_wide[bsize];
548   const int mi_height = mi_size_high[bsize];
549 
550   assert(mi->sb_type == bsize);
551 
552   *mi_addr = *mi;
553   copy_mbmi_ext_frame_to_mbmi_ext(x->mbmi_ext, &ctx->mbmi_ext_best,
554                                   av1_ref_frame_type(ctx->mic.ref_frame));
555 
556   memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
557 
558   x->force_skip = ctx->rd_stats.skip;
559 
560   xd->tx_type_map = ctx->tx_type_map;
561   xd->tx_type_map_stride = mi_size_wide[bsize];
562   // If not dry_run, copy the transform type data into the frame level buffer.
563   // Encoder will fetch tx types when writing bitstream.
564   if (!dry_run) {
565     const int grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col);
566     uint8_t *const tx_type_map = mi_params->tx_type_map + grid_idx;
567     const int mi_stride = mi_params->mi_stride;
568     for (int blk_row = 0; blk_row < bh; ++blk_row) {
569       av1_copy_array(tx_type_map + blk_row * mi_stride,
570                      xd->tx_type_map + blk_row * xd->tx_type_map_stride, bw);
571     }
572     xd->tx_type_map = tx_type_map;
573     xd->tx_type_map_stride = mi_stride;
574   }
575 
576   // If segmentation in use
577   if (seg->enabled) {
578     // For in frame complexity AQ copy the segment id from the segment map.
579     if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
580       const uint8_t *const map =
581           seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
582       mi_addr->segment_id =
583           map ? get_segment_id(mi_params, map, bsize, mi_row, mi_col) : 0;
584       reset_tx_size(x, mi_addr, x->tx_mode_search_type);
585     }
586     // Else for cyclic refresh mode update the segment map, set the segment id
587     // and then update the quantizer.
588     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
589       av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize,
590                                         ctx->rd_stats.rate, ctx->rd_stats.dist,
591                                         x->force_skip);
592     }
593     if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd))
594       mi_addr->uv_mode = UV_DC_PRED;
595   }
596 
597   for (i = 0; i < num_planes; ++i) {
598     p[i].coeff = ctx->coeff[i];
599     p[i].qcoeff = ctx->qcoeff[i];
600     pd[i].dqcoeff = ctx->dqcoeff[i];
601     p[i].eobs = ctx->eobs[i];
602     p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
603   }
604   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
605   // Restore the coding context of the MB to that that was in place
606   // when the mode was picked for it
607   for (y = 0; y < mi_height; y++) {
608     for (x_idx = 0; x_idx < mi_width; x_idx++) {
609       if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
610           (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
611         xd->mi[x_idx + y * mis] = mi_addr;
612       }
613     }
614   }
615 
616   if (cpi->oxcf.aq_mode) av1_init_plane_quantizers(cpi, x, mi_addr->segment_id);
617 
618   if (dry_run) return;
619 
620 #if CONFIG_INTERNAL_STATS
621   {
622     unsigned int *const mode_chosen_counts =
623         (unsigned int *)cpi->mode_chosen_counts;  // Cast const away.
624     if (frame_is_intra_only(cm)) {
625       static const int kf_mode_index[] = {
626         THR_DC /*DC_PRED*/,
627         THR_V_PRED /*V_PRED*/,
628         THR_H_PRED /*H_PRED*/,
629         THR_D45_PRED /*D45_PRED*/,
630         THR_D135_PRED /*D135_PRED*/,
631         THR_D113_PRED /*D113_PRED*/,
632         THR_D157_PRED /*D157_PRED*/,
633         THR_D203_PRED /*D203_PRED*/,
634         THR_D67_PRED /*D67_PRED*/,
635         THR_SMOOTH,   /*SMOOTH_PRED*/
636         THR_SMOOTH_V, /*SMOOTH_V_PRED*/
637         THR_SMOOTH_H, /*SMOOTH_H_PRED*/
638         THR_PAETH /*PAETH_PRED*/,
639       };
640       ++mode_chosen_counts[kf_mode_index[mi_addr->mode]];
641     } else {
642       // Note how often each mode chosen as best
643       ++mode_chosen_counts[ctx->best_mode_index];
644     }
645   }
646 #endif
647   if (!frame_is_intra_only(cm)) {
648     if (is_inter_block(mi_addr)) {
649       // TODO(sarahparker): global motion stats need to be handled per-tile
650       // to be compatible with tile-based threading.
651       update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc);
652     }
653 
654     if (cm->features.interp_filter == SWITCHABLE &&
655         mi_addr->motion_mode != WARPED_CAUSAL &&
656         !is_nontrans_global_motion(xd, xd->mi[0])) {
657       update_filter_type_count(td->counts, xd, mi_addr);
658     }
659 
660     rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
661     rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
662     rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
663   }
664 
665   const int x_mis = AOMMIN(bw, mi_params->mi_cols - mi_col);
666   const int y_mis = AOMMIN(bh, mi_params->mi_rows - mi_row);
667   if (cm->seq_params.order_hint_info.enable_ref_frame_mvs)
668     av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
669 }
670 
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)671 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
672                           int mi_row, int mi_col, const int num_planes,
673                           BLOCK_SIZE bsize) {
674   // Set current frame pointer.
675   x->e_mbd.cur_buf = src;
676 
677   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
678   // the static analysis warnings.
679   for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
680     const int is_uv = i > 0;
681     setup_pred_plane(
682         &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
683         src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
684         x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
685   }
686 }
687 
edge_info(const struct buf_2d * ref,const BLOCK_SIZE bsize,const bool high_bd,const int bd)688 static EdgeInfo edge_info(const struct buf_2d *ref, const BLOCK_SIZE bsize,
689                           const bool high_bd, const int bd) {
690   const int width = block_size_wide[bsize];
691   const int height = block_size_high[bsize];
692   // Implementation requires width to be a multiple of 8. It also requires
693   // height to be a multiple of 4, but this is always the case.
694   assert(height % 4 == 0);
695   if (width % 8 != 0) {
696     EdgeInfo ei = { .magnitude = 0, .x = 0, .y = 0 };
697     return ei;
698   }
699   return av1_edge_exists(ref->buf, ref->stride, width, height, high_bd, bd);
700 }
701 
use_pb_simple_motion_pred_sse(const AV1_COMP * const cpi)702 static int use_pb_simple_motion_pred_sse(const AV1_COMP *const cpi) {
703   // TODO(debargha, yuec): Not in use, need to implement a speed feature
704   // utilizing this data point, and replace '0' by the corresponding speed
705   // feature flag.
706   return 0 && !frame_is_intra_only(&cpi->common);
707 }
708 
hybrid_intra_mode_search(AV1_COMP * cpi,MACROBLOCK * const x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)709 static void hybrid_intra_mode_search(AV1_COMP *cpi, MACROBLOCK *const x,
710                                      RD_STATS *rd_cost, BLOCK_SIZE bsize,
711                                      PICK_MODE_CONTEXT *ctx) {
712   // TODO(jianj): Investigate the failure of ScalabilityTest in AOM_Q mode,
713   // which sets base_qindex to 0 on keyframe.
714   if (cpi->oxcf.rc_mode != AOM_CBR || !cpi->sf.rt_sf.hybrid_intra_pickmode ||
715       bsize < BLOCK_16X16)
716     av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
717   else
718     av1_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
719 }
720 
pick_sb_modes(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_STATS * rd_cost,PARTITION_TYPE partition,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,RD_STATS best_rd,int pick_mode_type)721 static AOM_INLINE void pick_sb_modes(AV1_COMP *const cpi,
722                                      TileDataEnc *tile_data,
723                                      MACROBLOCK *const x, int mi_row,
724                                      int mi_col, RD_STATS *rd_cost,
725                                      PARTITION_TYPE partition, BLOCK_SIZE bsize,
726                                      PICK_MODE_CONTEXT *ctx, RD_STATS best_rd,
727                                      int pick_mode_type) {
728   if (best_rd.rdcost < 0) {
729     ctx->rd_stats.rdcost = INT64_MAX;
730     ctx->rd_stats.skip = 0;
731     av1_invalid_rd_stats(rd_cost);
732     return;
733   }
734 
735   set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
736 
737   if (ctx->rd_mode_is_ready) {
738     assert(ctx->mic.sb_type == bsize);
739     assert(ctx->mic.partition == partition);
740     rd_cost->rate = ctx->rd_stats.rate;
741     rd_cost->dist = ctx->rd_stats.dist;
742     rd_cost->rdcost = ctx->rd_stats.rdcost;
743     return;
744   }
745 
746   AV1_COMMON *const cm = &cpi->common;
747   const int num_planes = av1_num_planes(cm);
748   MACROBLOCKD *const xd = &x->e_mbd;
749   MB_MODE_INFO *mbmi;
750   struct macroblock_plane *const p = x->plane;
751   struct macroblockd_plane *const pd = xd->plane;
752   const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
753   int i;
754 
755 #if CONFIG_COLLECT_COMPONENT_TIMING
756   start_timing(cpi, rd_pick_sb_modes_time);
757 #endif
758 
759   aom_clear_system_state();
760 
761   mbmi = xd->mi[0];
762   mbmi->sb_type = bsize;
763   mbmi->partition = partition;
764 
765 #if CONFIG_RD_DEBUG
766   mbmi->mi_row = mi_row;
767   mbmi->mi_col = mi_col;
768 #endif
769 
770   xd->tx_type_map = x->tx_type_map;
771   xd->tx_type_map_stride = mi_size_wide[bsize];
772 
773   for (i = 0; i < num_planes; ++i) {
774     p[i].coeff = ctx->coeff[i];
775     p[i].qcoeff = ctx->qcoeff[i];
776     pd[i].dqcoeff = ctx->dqcoeff[i];
777     p[i].eobs = ctx->eobs[i];
778     p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
779   }
780 
781   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
782 
783   ctx->skippable = 0;
784   // Set to zero to make sure we do not use the previous encoded frame stats
785   mbmi->skip = 0;
786   // Reset skip mode flag.
787   mbmi->skip_mode = 0;
788 
789   if (is_cur_buf_hbd(xd)) {
790     x->source_variance = av1_high_get_sby_perpixel_variance(
791         cpi, &x->plane[0].src, bsize, xd->bd);
792   } else {
793     x->source_variance =
794         av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
795   }
796   if (use_pb_simple_motion_pred_sse(cpi)) {
797     const FULLPEL_MV start_mv = kZeroFullMv;
798     unsigned int var = 0;
799     av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, start_mv, 0,
800                               &x->simple_motion_pred_sse, &var);
801   }
802 
803   // If the threshold for disabling wedge search is zero, it means the feature
804   // should not be used. Use a value that will always succeed in the check.
805   if (cpi->sf.inter_sf.disable_wedge_search_edge_thresh == 0) {
806     x->edge_strength = UINT16_MAX;
807     x->edge_strength_x = UINT16_MAX;
808     x->edge_strength_y = UINT16_MAX;
809   } else {
810     EdgeInfo ei =
811         edge_info(&x->plane[0].src, bsize, is_cur_buf_hbd(xd), xd->bd);
812     x->edge_strength = ei.magnitude;
813     x->edge_strength_x = ei.x;
814     x->edge_strength_y = ei.y;
815   }
816 
817   // Initialize default mode evaluation params
818   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
819 
820   // Save rdmult before it might be changed, so it can be restored later.
821   const int orig_rdmult = x->rdmult;
822   setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi);
823   // Set error per bit for current rdmult
824   set_error_per_bit(x, x->rdmult);
825   av1_rd_cost_update(x->rdmult, &best_rd);
826 
827   // Find best coding mode & reconstruct the MB so it is available
828   // as a predictor for MBs that follow in the SB
829   if (frame_is_intra_only(cm)) {
830 #if CONFIG_COLLECT_COMPONENT_TIMING
831     start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
832 #endif
833     switch (pick_mode_type) {
834       case PICK_MODE_RD:
835         av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost);
836         break;
837       case PICK_MODE_NONRD:
838         hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
839         break;
840       default: assert(0 && "Unknown pick mode type.");
841     }
842 #if CONFIG_COLLECT_COMPONENT_TIMING
843     end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
844 #endif
845   } else {
846 #if CONFIG_COLLECT_COMPONENT_TIMING
847     start_timing(cpi, av1_rd_pick_inter_mode_sb_time);
848 #endif
849     if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
850       av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
851                                          rd_cost, bsize, ctx, best_rd.rdcost);
852     } else {
853       // TODO(kyslov): do the same for pick_inter_mode_sb_seg_skip
854       switch (pick_mode_type) {
855         case PICK_MODE_RD:
856           av1_rd_pick_inter_mode_sb(cpi, tile_data, x, rd_cost, bsize, ctx,
857                                     best_rd.rdcost);
858           break;
859         case PICK_MODE_NONRD:
860           av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, rd_cost, bsize, ctx,
861                                        best_rd.rdcost);
862           break;
863         default: assert(0 && "Unknown pick mode type.");
864       }
865     }
866 #if CONFIG_COLLECT_COMPONENT_TIMING
867     end_timing(cpi, av1_rd_pick_inter_mode_sb_time);
868 #endif
869   }
870 
871   // Examine the resulting rate and for AQ mode 2 make a segment choice.
872   if (rd_cost->rate != INT_MAX && aq_mode == COMPLEXITY_AQ &&
873       bsize >= BLOCK_16X16) {
874     av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
875   }
876 
877   x->rdmult = orig_rdmult;
878 
879   // TODO(jingning) The rate-distortion optimization flow needs to be
880   // refactored to provide proper exit/return handle.
881   if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
882 
883   ctx->rd_stats.rate = rd_cost->rate;
884   ctx->rd_stats.dist = rd_cost->dist;
885   ctx->rd_stats.rdcost = rd_cost->rdcost;
886 
887 #if CONFIG_COLLECT_COMPONENT_TIMING
888   end_timing(cpi, rd_pick_sb_modes_time);
889 #endif
890 }
891 
update_inter_mode_stats(FRAME_CONTEXT * fc,FRAME_COUNTS * counts,PREDICTION_MODE mode,int16_t mode_context)892 static AOM_INLINE void update_inter_mode_stats(FRAME_CONTEXT *fc,
893                                                FRAME_COUNTS *counts,
894                                                PREDICTION_MODE mode,
895                                                int16_t mode_context) {
896   (void)counts;
897 
898   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
899   if (mode == NEWMV) {
900 #if CONFIG_ENTROPY_STATS
901     ++counts->newmv_mode[mode_ctx][0];
902 #endif
903     update_cdf(fc->newmv_cdf[mode_ctx], 0, 2);
904     return;
905   }
906 
907 #if CONFIG_ENTROPY_STATS
908   ++counts->newmv_mode[mode_ctx][1];
909 #endif
910   update_cdf(fc->newmv_cdf[mode_ctx], 1, 2);
911 
912   mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
913   if (mode == GLOBALMV) {
914 #if CONFIG_ENTROPY_STATS
915     ++counts->zeromv_mode[mode_ctx][0];
916 #endif
917     update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2);
918     return;
919   }
920 
921 #if CONFIG_ENTROPY_STATS
922   ++counts->zeromv_mode[mode_ctx][1];
923 #endif
924   update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2);
925 
926   mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
927 #if CONFIG_ENTROPY_STATS
928   ++counts->refmv_mode[mode_ctx][mode != NEARESTMV];
929 #endif
930   update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2);
931 }
932 
update_palette_cdf(MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,FRAME_COUNTS * counts)933 static AOM_INLINE void update_palette_cdf(MACROBLOCKD *xd,
934                                           const MB_MODE_INFO *const mbmi,
935                                           FRAME_COUNTS *counts) {
936   FRAME_CONTEXT *fc = xd->tile_ctx;
937   const BLOCK_SIZE bsize = mbmi->sb_type;
938   const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
939   const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize);
940 
941   (void)counts;
942 
943   if (mbmi->mode == DC_PRED) {
944     const int n = pmi->palette_size[0];
945     const int palette_mode_ctx = av1_get_palette_mode_ctx(xd);
946 
947 #if CONFIG_ENTROPY_STATS
948     ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0];
949 #endif
950     update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx],
951                n > 0, 2);
952     if (n > 0) {
953 #if CONFIG_ENTROPY_STATS
954       ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
955 #endif
956       update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx],
957                  n - PALETTE_MIN_SIZE, PALETTE_SIZES);
958     }
959   }
960 
961   if (mbmi->uv_mode == UV_DC_PRED) {
962     const int n = pmi->palette_size[1];
963     const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
964 
965 #if CONFIG_ENTROPY_STATS
966     ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0];
967 #endif
968     update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2);
969 
970     if (n > 0) {
971 #if CONFIG_ENTROPY_STATS
972       ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
973 #endif
974       update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx],
975                  n - PALETTE_MIN_SIZE, PALETTE_SIZES);
976     }
977   }
978 }
979 
sum_intra_stats(const AV1_COMMON * const cm,FRAME_COUNTS * counts,MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,const MB_MODE_INFO * above_mi,const MB_MODE_INFO * left_mi,const int intraonly)980 static AOM_INLINE void sum_intra_stats(const AV1_COMMON *const cm,
981                                        FRAME_COUNTS *counts, MACROBLOCKD *xd,
982                                        const MB_MODE_INFO *const mbmi,
983                                        const MB_MODE_INFO *above_mi,
984                                        const MB_MODE_INFO *left_mi,
985                                        const int intraonly) {
986   FRAME_CONTEXT *fc = xd->tile_ctx;
987   const PREDICTION_MODE y_mode = mbmi->mode;
988   (void)counts;
989   const BLOCK_SIZE bsize = mbmi->sb_type;
990 
991   if (intraonly) {
992 #if CONFIG_ENTROPY_STATS
993     const PREDICTION_MODE above = av1_above_block_mode(above_mi);
994     const PREDICTION_MODE left = av1_left_block_mode(left_mi);
995     const int above_ctx = intra_mode_context[above];
996     const int left_ctx = intra_mode_context[left];
997     ++counts->kf_y_mode[above_ctx][left_ctx][y_mode];
998 #endif  // CONFIG_ENTROPY_STATS
999     update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES);
1000   } else {
1001 #if CONFIG_ENTROPY_STATS
1002     ++counts->y_mode[size_group_lookup[bsize]][y_mode];
1003 #endif  // CONFIG_ENTROPY_STATS
1004     update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES);
1005   }
1006 
1007   if (av1_filter_intra_allowed(cm, mbmi)) {
1008     const int use_filter_intra_mode =
1009         mbmi->filter_intra_mode_info.use_filter_intra;
1010 #if CONFIG_ENTROPY_STATS
1011     ++counts->filter_intra[mbmi->sb_type][use_filter_intra_mode];
1012     if (use_filter_intra_mode) {
1013       ++counts
1014             ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode];
1015     }
1016 #endif  // CONFIG_ENTROPY_STATS
1017     update_cdf(fc->filter_intra_cdfs[mbmi->sb_type], use_filter_intra_mode, 2);
1018     if (use_filter_intra_mode) {
1019       update_cdf(fc->filter_intra_mode_cdf,
1020                  mbmi->filter_intra_mode_info.filter_intra_mode,
1021                  FILTER_INTRA_MODES);
1022     }
1023   }
1024   if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
1025 #if CONFIG_ENTROPY_STATS
1026     ++counts->angle_delta[mbmi->mode - V_PRED]
1027                          [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA];
1028 #endif
1029     update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED],
1030                mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA,
1031                2 * MAX_ANGLE_DELTA + 1);
1032   }
1033 
1034   if (!xd->is_chroma_ref) return;
1035 
1036   const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
1037   const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd);
1038 #if CONFIG_ENTROPY_STATS
1039   ++counts->uv_mode[cfl_allowed][y_mode][uv_mode];
1040 #endif  // CONFIG_ENTROPY_STATS
1041   update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode,
1042              UV_INTRA_MODES - !cfl_allowed);
1043   if (uv_mode == UV_CFL_PRED) {
1044     const int8_t joint_sign = mbmi->cfl_alpha_signs;
1045     const uint8_t idx = mbmi->cfl_alpha_idx;
1046 
1047 #if CONFIG_ENTROPY_STATS
1048     ++counts->cfl_sign[joint_sign];
1049 #endif
1050     update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS);
1051     if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
1052       aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
1053 
1054 #if CONFIG_ENTROPY_STATS
1055       ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)];
1056 #endif
1057       update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE);
1058     }
1059     if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
1060       aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
1061 
1062 #if CONFIG_ENTROPY_STATS
1063       ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)];
1064 #endif
1065       update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE);
1066     }
1067   }
1068   if (av1_is_directional_mode(get_uv_mode(uv_mode)) &&
1069       av1_use_angle_delta(bsize)) {
1070 #if CONFIG_ENTROPY_STATS
1071     ++counts->angle_delta[uv_mode - UV_V_PRED]
1072                          [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA];
1073 #endif
1074     update_cdf(fc->angle_delta_cdf[uv_mode - UV_V_PRED],
1075                mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA,
1076                2 * MAX_ANGLE_DELTA + 1);
1077   }
1078   if (av1_allow_palette(cm->features.allow_screen_content_tools, bsize)) {
1079     update_palette_cdf(xd, mbmi, counts);
1080   }
1081 }
1082 
update_stats(const AV1_COMMON * const cm,ThreadData * td)1083 static AOM_INLINE void update_stats(const AV1_COMMON *const cm,
1084                                     ThreadData *td) {
1085   MACROBLOCK *x = &td->mb;
1086   MACROBLOCKD *const xd = &x->e_mbd;
1087   const MB_MODE_INFO *const mbmi = xd->mi[0];
1088   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1089   const CurrentFrame *const current_frame = &cm->current_frame;
1090   const BLOCK_SIZE bsize = mbmi->sb_type;
1091   FRAME_CONTEXT *fc = xd->tile_ctx;
1092   const int seg_ref_active =
1093       segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1094 
1095   if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active &&
1096       is_comp_ref_allowed(bsize)) {
1097     const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1098 #if CONFIG_ENTROPY_STATS
1099     td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++;
1100 #endif
1101     update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2);
1102   }
1103 
1104   if (!mbmi->skip_mode && !seg_ref_active) {
1105     const int skip_ctx = av1_get_skip_context(xd);
1106 #if CONFIG_ENTROPY_STATS
1107     td->counts->skip[skip_ctx][mbmi->skip]++;
1108 #endif
1109     update_cdf(fc->skip_cdfs[skip_ctx], mbmi->skip, 2);
1110   }
1111 
1112 #if CONFIG_ENTROPY_STATS
1113   // delta quant applies to both intra and inter
1114   const int super_block_upper_left =
1115       ((xd->mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
1116       ((xd->mi_col & (cm->seq_params.mib_size - 1)) == 0);
1117   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
1118   if (delta_q_info->delta_q_present_flag &&
1119       (bsize != cm->seq_params.sb_size || !mbmi->skip) &&
1120       super_block_upper_left) {
1121     const int dq =
1122         (mbmi->current_qindex - xd->current_qindex) / delta_q_info->delta_q_res;
1123     const int absdq = abs(dq);
1124     for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) {
1125       td->counts->delta_q[i][1]++;
1126     }
1127     if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
1128     if (delta_q_info->delta_lf_present_flag) {
1129       if (delta_q_info->delta_lf_multi) {
1130         const int frame_lf_count =
1131             av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1132         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
1133           const int delta_lf = (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) /
1134                                delta_q_info->delta_lf_res;
1135           const int abs_delta_lf = abs(delta_lf);
1136           for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
1137             td->counts->delta_lf_multi[lf_id][i][1]++;
1138           }
1139           if (abs_delta_lf < DELTA_LF_SMALL)
1140             td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++;
1141         }
1142       } else {
1143         const int delta_lf =
1144             (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
1145             delta_q_info->delta_lf_res;
1146         const int abs_delta_lf = abs(delta_lf);
1147         for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
1148           td->counts->delta_lf[i][1]++;
1149         }
1150         if (abs_delta_lf < DELTA_LF_SMALL)
1151           td->counts->delta_lf[abs_delta_lf][0]++;
1152       }
1153     }
1154   }
1155 #endif
1156 
1157   if (!is_inter_block(mbmi)) {
1158     sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi,
1159                     frame_is_intra_only(cm));
1160   }
1161 
1162   if (av1_allow_intrabc(cm)) {
1163     update_cdf(fc->intrabc_cdf, is_intrabc_block(mbmi), 2);
1164 #if CONFIG_ENTROPY_STATS
1165     ++td->counts->intrabc[is_intrabc_block(mbmi)];
1166 #endif  // CONFIG_ENTROPY_STATS
1167   }
1168 
1169   if (frame_is_intra_only(cm) || mbmi->skip_mode) return;
1170 
1171   FRAME_COUNTS *const counts = td->counts;
1172   const int inter_block = is_inter_block(mbmi);
1173 
1174   if (!seg_ref_active) {
1175 #if CONFIG_ENTROPY_STATS
1176     counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
1177 #endif
1178     update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
1179                inter_block, 2);
1180     // If the segment reference feature is enabled we have only a single
1181     // reference frame allowed for the segment so exclude it from
1182     // the reference frame counts used to work out probabilities.
1183     if (inter_block) {
1184       const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
1185       const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
1186       if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1187         if (is_comp_ref_allowed(bsize)) {
1188 #if CONFIG_ENTROPY_STATS
1189           counts->comp_inter[av1_get_reference_mode_context(xd)]
1190                             [has_second_ref(mbmi)]++;
1191 #endif  // CONFIG_ENTROPY_STATS
1192           update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi), 2);
1193         }
1194       }
1195 
1196       if (has_second_ref(mbmi)) {
1197         const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
1198                                                       ? UNIDIR_COMP_REFERENCE
1199                                                       : BIDIR_COMP_REFERENCE;
1200         update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type,
1201                    COMP_REFERENCE_TYPES);
1202 #if CONFIG_ENTROPY_STATS
1203         counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
1204                              [comp_ref_type]++;
1205 #endif  // CONFIG_ENTROPY_STATS
1206 
1207         if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
1208           const int bit = (ref0 == BWDREF_FRAME);
1209           update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2);
1210 #if CONFIG_ENTROPY_STATS
1211           counts
1212               ->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0][bit]++;
1213 #endif  // CONFIG_ENTROPY_STATS
1214           if (!bit) {
1215             const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
1216             update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2);
1217 #if CONFIG_ENTROPY_STATS
1218             counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
1219                                 [bit1]++;
1220 #endif  // CONFIG_ENTROPY_STATS
1221             if (bit1) {
1222               update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd),
1223                          ref1 == GOLDEN_FRAME, 2);
1224 #if CONFIG_ENTROPY_STATS
1225               counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)][2]
1226                                   [ref1 == GOLDEN_FRAME]++;
1227 #endif  // CONFIG_ENTROPY_STATS
1228             }
1229           }
1230         } else {
1231           const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
1232           update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2);
1233 #if CONFIG_ENTROPY_STATS
1234           counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++;
1235 #endif  // CONFIG_ENTROPY_STATS
1236           if (!bit) {
1237             update_cdf(av1_get_pred_cdf_comp_ref_p1(xd), ref0 == LAST2_FRAME,
1238                        2);
1239 #if CONFIG_ENTROPY_STATS
1240             counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1]
1241                             [ref0 == LAST2_FRAME]++;
1242 #endif  // CONFIG_ENTROPY_STATS
1243           } else {
1244             update_cdf(av1_get_pred_cdf_comp_ref_p2(xd), ref0 == GOLDEN_FRAME,
1245                        2);
1246 #if CONFIG_ENTROPY_STATS
1247             counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2]
1248                             [ref0 == GOLDEN_FRAME]++;
1249 #endif  // CONFIG_ENTROPY_STATS
1250           }
1251           update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd), ref1 == ALTREF_FRAME,
1252                      2);
1253 #if CONFIG_ENTROPY_STATS
1254           counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0]
1255                              [ref1 == ALTREF_FRAME]++;
1256 #endif  // CONFIG_ENTROPY_STATS
1257           if (ref1 != ALTREF_FRAME) {
1258             update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd),
1259                        ref1 == ALTREF2_FRAME, 2);
1260 #if CONFIG_ENTROPY_STATS
1261             counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1]
1262                                [ref1 == ALTREF2_FRAME]++;
1263 #endif  // CONFIG_ENTROPY_STATS
1264           }
1265         }
1266       } else {
1267         const int bit = (ref0 >= BWDREF_FRAME);
1268         update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2);
1269 #if CONFIG_ENTROPY_STATS
1270         counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++;
1271 #endif  // CONFIG_ENTROPY_STATS
1272         if (bit) {
1273           assert(ref0 <= ALTREF_FRAME);
1274           update_cdf(av1_get_pred_cdf_single_ref_p2(xd), ref0 == ALTREF_FRAME,
1275                      2);
1276 #if CONFIG_ENTROPY_STATS
1277           counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1]
1278                             [ref0 == ALTREF_FRAME]++;
1279 #endif  // CONFIG_ENTROPY_STATS
1280           if (ref0 != ALTREF_FRAME) {
1281             update_cdf(av1_get_pred_cdf_single_ref_p6(xd),
1282                        ref0 == ALTREF2_FRAME, 2);
1283 #if CONFIG_ENTROPY_STATS
1284             counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5]
1285                               [ref0 == ALTREF2_FRAME]++;
1286 #endif  // CONFIG_ENTROPY_STATS
1287           }
1288         } else {
1289           const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
1290           update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2);
1291 #if CONFIG_ENTROPY_STATS
1292           counts->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++;
1293 #endif  // CONFIG_ENTROPY_STATS
1294           if (!bit1) {
1295             update_cdf(av1_get_pred_cdf_single_ref_p4(xd), ref0 != LAST_FRAME,
1296                        2);
1297 #if CONFIG_ENTROPY_STATS
1298             counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3]
1299                               [ref0 != LAST_FRAME]++;
1300 #endif  // CONFIG_ENTROPY_STATS
1301           } else {
1302             update_cdf(av1_get_pred_cdf_single_ref_p5(xd), ref0 != LAST3_FRAME,
1303                        2);
1304 #if CONFIG_ENTROPY_STATS
1305             counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4]
1306                               [ref0 != LAST3_FRAME]++;
1307 #endif  // CONFIG_ENTROPY_STATS
1308           }
1309         }
1310       }
1311 
1312       if (cm->seq_params.enable_interintra_compound &&
1313           is_interintra_allowed(mbmi)) {
1314         const int bsize_group = size_group_lookup[bsize];
1315         if (mbmi->ref_frame[1] == INTRA_FRAME) {
1316 #if CONFIG_ENTROPY_STATS
1317           counts->interintra[bsize_group][1]++;
1318 #endif
1319           update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
1320 #if CONFIG_ENTROPY_STATS
1321           counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
1322 #endif
1323           update_cdf(fc->interintra_mode_cdf[bsize_group],
1324                      mbmi->interintra_mode, INTERINTRA_MODES);
1325           if (av1_is_wedge_used(bsize)) {
1326 #if CONFIG_ENTROPY_STATS
1327             counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
1328 #endif
1329             update_cdf(fc->wedge_interintra_cdf[bsize],
1330                        mbmi->use_wedge_interintra, 2);
1331             if (mbmi->use_wedge_interintra) {
1332 #if CONFIG_ENTROPY_STATS
1333               counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++;
1334 #endif
1335               update_cdf(fc->wedge_idx_cdf[bsize], mbmi->interintra_wedge_index,
1336                          16);
1337             }
1338           }
1339         } else {
1340 #if CONFIG_ENTROPY_STATS
1341           counts->interintra[bsize_group][0]++;
1342 #endif
1343           update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
1344         }
1345       }
1346 
1347       const MOTION_MODE motion_allowed =
1348           cm->features.switchable_motion_mode
1349               ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1350                                     cm->features.allow_warped_motion)
1351               : SIMPLE_TRANSLATION;
1352       if (mbmi->ref_frame[1] != INTRA_FRAME) {
1353         if (motion_allowed == WARPED_CAUSAL) {
1354 #if CONFIG_ENTROPY_STATS
1355           counts->motion_mode[bsize][mbmi->motion_mode]++;
1356 #endif
1357           update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode,
1358                      MOTION_MODES);
1359         } else if (motion_allowed == OBMC_CAUSAL) {
1360 #if CONFIG_ENTROPY_STATS
1361           counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1362 #endif
1363           update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL, 2);
1364         }
1365       }
1366 
1367       if (has_second_ref(mbmi)) {
1368         assert(current_frame->reference_mode != SINGLE_REFERENCE &&
1369                is_inter_compound_mode(mbmi->mode) &&
1370                mbmi->motion_mode == SIMPLE_TRANSLATION);
1371 
1372         const int masked_compound_used = is_any_masked_compound_used(bsize) &&
1373                                          cm->seq_params.enable_masked_compound;
1374         if (masked_compound_used) {
1375           const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
1376 #if CONFIG_ENTROPY_STATS
1377           ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx];
1378 #endif
1379           update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx],
1380                      mbmi->comp_group_idx, 2);
1381         }
1382 
1383         if (mbmi->comp_group_idx == 0) {
1384           const int comp_index_ctx = get_comp_index_context(cm, xd);
1385 #if CONFIG_ENTROPY_STATS
1386           ++counts->compound_index[comp_index_ctx][mbmi->compound_idx];
1387 #endif
1388           update_cdf(fc->compound_index_cdf[comp_index_ctx], mbmi->compound_idx,
1389                      2);
1390         } else {
1391           assert(masked_compound_used);
1392           if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1393 #if CONFIG_ENTROPY_STATS
1394             ++counts->compound_type[bsize][mbmi->interinter_comp.type -
1395                                            COMPOUND_WEDGE];
1396 #endif
1397             update_cdf(fc->compound_type_cdf[bsize],
1398                        mbmi->interinter_comp.type - COMPOUND_WEDGE,
1399                        MASKED_COMPOUND_TYPES);
1400           }
1401         }
1402       }
1403       if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
1404         if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1405 #if CONFIG_ENTROPY_STATS
1406           counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++;
1407 #endif
1408           update_cdf(fc->wedge_idx_cdf[bsize],
1409                      mbmi->interinter_comp.wedge_index, 16);
1410         }
1411       }
1412     }
1413   }
1414 
1415   if (inter_block && cm->features.interp_filter == SWITCHABLE &&
1416       mbmi->motion_mode != WARPED_CAUSAL &&
1417       !is_nontrans_global_motion(xd, mbmi)) {
1418     update_filter_type_cdf(xd, mbmi);
1419   }
1420   if (inter_block &&
1421       !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
1422     const PREDICTION_MODE mode = mbmi->mode;
1423     const int16_t mode_ctx =
1424         av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1425     if (has_second_ref(mbmi)) {
1426 #if CONFIG_ENTROPY_STATS
1427       ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
1428 #endif
1429       update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
1430                  INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
1431     } else {
1432       update_inter_mode_stats(fc, counts, mode, mode_ctx);
1433     }
1434 
1435     const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV;
1436     if (new_mv) {
1437       const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1438       for (int idx = 0; idx < 2; ++idx) {
1439         if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1440           const uint8_t drl_ctx =
1441               av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1442           update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx, 2);
1443 #if CONFIG_ENTROPY_STATS
1444           ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
1445 #endif
1446           if (mbmi->ref_mv_idx == idx) break;
1447         }
1448       }
1449     }
1450 
1451     if (have_nearmv_in_inter_mode(mbmi->mode)) {
1452       const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1453       for (int idx = 1; idx < 3; ++idx) {
1454         if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1455           const uint8_t drl_ctx =
1456               av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1457           update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx - 1, 2);
1458 #if CONFIG_ENTROPY_STATS
1459           ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
1460 #endif
1461           if (mbmi->ref_mv_idx == idx - 1) break;
1462         }
1463       }
1464     }
1465     if (have_newmv_in_inter_mode(mbmi->mode)) {
1466       const int allow_hp = cm->features.cur_frame_force_integer_mv
1467                                ? MV_SUBPEL_NONE
1468                                : cm->features.allow_high_precision_mv;
1469       if (new_mv) {
1470         for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
1471           const int_mv ref_mv = av1_get_ref_mv(x, ref);
1472           av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1473                               allow_hp);
1474         }
1475       } else if (mbmi->mode == NEAREST_NEWMV || mbmi->mode == NEAR_NEWMV) {
1476         const int ref = 1;
1477         const int_mv ref_mv = av1_get_ref_mv(x, ref);
1478         av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1479                             allow_hp);
1480       } else if (mbmi->mode == NEW_NEARESTMV || mbmi->mode == NEW_NEARMV) {
1481         const int ref = 0;
1482         const int_mv ref_mv = av1_get_ref_mv(x, ref);
1483         av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1484                             allow_hp);
1485       }
1486     }
1487   }
1488 }
1489 
restore_context(MACROBLOCK * x,const RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)1490 static AOM_INLINE void restore_context(MACROBLOCK *x,
1491                                        const RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
1492                                        int mi_row, int mi_col, BLOCK_SIZE bsize,
1493                                        const int num_planes) {
1494   MACROBLOCKD *xd = &x->e_mbd;
1495   int p;
1496   const int num_4x4_blocks_wide = mi_size_wide[bsize];
1497   const int num_4x4_blocks_high = mi_size_high[bsize];
1498   int mi_width = mi_size_wide[bsize];
1499   int mi_height = mi_size_high[bsize];
1500   for (p = 0; p < num_planes; p++) {
1501     int tx_col = mi_col;
1502     int tx_row = mi_row & MAX_MIB_MASK;
1503     memcpy(
1504         xd->above_entropy_context[p] + (tx_col >> xd->plane[p].subsampling_x),
1505         ctx->a + num_4x4_blocks_wide * p,
1506         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1507             xd->plane[p].subsampling_x);
1508     memcpy(xd->left_entropy_context[p] + (tx_row >> xd->plane[p].subsampling_y),
1509            ctx->l + num_4x4_blocks_high * p,
1510            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1511                xd->plane[p].subsampling_y);
1512   }
1513   memcpy(xd->above_partition_context + mi_col, ctx->sa,
1514          sizeof(*xd->above_partition_context) * mi_width);
1515   memcpy(xd->left_partition_context + (mi_row & MAX_MIB_MASK), ctx->sl,
1516          sizeof(xd->left_partition_context[0]) * mi_height);
1517   xd->above_txfm_context = ctx->p_ta;
1518   xd->left_txfm_context = ctx->p_tl;
1519   memcpy(xd->above_txfm_context, ctx->ta,
1520          sizeof(*xd->above_txfm_context) * mi_width);
1521   memcpy(xd->left_txfm_context, ctx->tl,
1522          sizeof(*xd->left_txfm_context) * mi_height);
1523 }
1524 
save_context(const MACROBLOCK * x,RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)1525 static AOM_INLINE void save_context(const MACROBLOCK *x,
1526                                     RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
1527                                     int mi_row, int mi_col, BLOCK_SIZE bsize,
1528                                     const int num_planes) {
1529   const MACROBLOCKD *xd = &x->e_mbd;
1530   int p;
1531   int mi_width = mi_size_wide[bsize];
1532   int mi_height = mi_size_high[bsize];
1533 
1534   // buffer the above/left context information of the block in search.
1535   for (p = 0; p < num_planes; ++p) {
1536     int tx_col = mi_col;
1537     int tx_row = mi_row & MAX_MIB_MASK;
1538     memcpy(
1539         ctx->a + mi_width * p,
1540         xd->above_entropy_context[p] + (tx_col >> xd->plane[p].subsampling_x),
1541         (sizeof(ENTROPY_CONTEXT) * mi_width) >> xd->plane[p].subsampling_x);
1542     memcpy(ctx->l + mi_height * p,
1543            xd->left_entropy_context[p] + (tx_row >> xd->plane[p].subsampling_y),
1544            (sizeof(ENTROPY_CONTEXT) * mi_height) >> xd->plane[p].subsampling_y);
1545   }
1546   memcpy(ctx->sa, xd->above_partition_context + mi_col,
1547          sizeof(*xd->above_partition_context) * mi_width);
1548   memcpy(ctx->sl, xd->left_partition_context + (mi_row & MAX_MIB_MASK),
1549          sizeof(xd->left_partition_context[0]) * mi_height);
1550   memcpy(ctx->ta, xd->above_txfm_context,
1551          sizeof(*xd->above_txfm_context) * mi_width);
1552   memcpy(ctx->tl, xd->left_txfm_context,
1553          sizeof(*xd->left_txfm_context) * mi_height);
1554   ctx->p_ta = xd->above_txfm_context;
1555   ctx->p_tl = xd->left_txfm_context;
1556 }
1557 
encode_b(const AV1_COMP * const cpi,TileDataEnc * tile_data,ThreadData * td,TOKENEXTRA ** tp,int mi_row,int mi_col,RUN_TYPE dry_run,BLOCK_SIZE bsize,PARTITION_TYPE partition,PICK_MODE_CONTEXT * const ctx,int * rate)1558 static AOM_INLINE void encode_b(const AV1_COMP *const cpi,
1559                                 TileDataEnc *tile_data, ThreadData *td,
1560                                 TOKENEXTRA **tp, int mi_row, int mi_col,
1561                                 RUN_TYPE dry_run, BLOCK_SIZE bsize,
1562                                 PARTITION_TYPE partition,
1563                                 PICK_MODE_CONTEXT *const ctx, int *rate) {
1564   TileInfo *const tile = &tile_data->tile_info;
1565   MACROBLOCK *const x = &td->mb;
1566   MACROBLOCKD *xd = &x->e_mbd;
1567 
1568   set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
1569   const int origin_mult = x->rdmult;
1570   setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
1571   MB_MODE_INFO *mbmi = xd->mi[0];
1572   mbmi->partition = partition;
1573   update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run);
1574 
1575   if (!dry_run) {
1576     x->mbmi_ext_frame->cb_offset = x->cb_offset;
1577     assert(x->cb_offset <
1578            (1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size]));
1579   }
1580 
1581   encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate);
1582 
1583   if (!dry_run) {
1584     const AV1_COMMON *const cm = &cpi->common;
1585     x->cb_offset += block_size_wide[bsize] * block_size_high[bsize];
1586     if (bsize == cpi->common.seq_params.sb_size && mbmi->skip == 1 &&
1587         cm->delta_q_info.delta_lf_present_flag) {
1588       const int frame_lf_count =
1589           av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1590       for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
1591         mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
1592       mbmi->delta_lf_from_base = xd->delta_lf_from_base;
1593     }
1594     if (has_second_ref(mbmi)) {
1595       if (mbmi->compound_idx == 0 ||
1596           mbmi->interinter_comp.type == COMPOUND_AVERAGE)
1597         mbmi->comp_group_idx = 0;
1598       else
1599         mbmi->comp_group_idx = 1;
1600     }
1601 
1602     // delta quant applies to both intra and inter
1603     const int super_block_upper_left =
1604         ((mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
1605         ((mi_col & (cm->seq_params.mib_size - 1)) == 0);
1606     const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
1607     if (delta_q_info->delta_q_present_flag &&
1608         (bsize != cm->seq_params.sb_size || !mbmi->skip) &&
1609         super_block_upper_left) {
1610       xd->current_qindex = mbmi->current_qindex;
1611       if (delta_q_info->delta_lf_present_flag) {
1612         if (delta_q_info->delta_lf_multi) {
1613           const int frame_lf_count =
1614               av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1615           for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
1616             xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
1617           }
1618         } else {
1619           xd->delta_lf_from_base = mbmi->delta_lf_from_base;
1620         }
1621       }
1622     }
1623 
1624     RD_COUNTS *rdc = &td->rd_counts;
1625     if (mbmi->skip_mode) {
1626       assert(!frame_is_intra_only(cm));
1627       rdc->skip_mode_used_flag = 1;
1628       if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
1629         assert(has_second_ref(mbmi));
1630         rdc->compound_ref_used_flag = 1;
1631       }
1632       set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1633     } else {
1634       const int seg_ref_active =
1635           segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1636       if (!seg_ref_active) {
1637         // If the segment reference feature is enabled we have only a single
1638         // reference frame allowed for the segment so exclude it from
1639         // the reference frame counts used to work out probabilities.
1640         if (is_inter_block(mbmi)) {
1641           av1_collect_neighbors_ref_counts(xd);
1642           if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
1643             if (has_second_ref(mbmi)) {
1644               // This flag is also updated for 4x4 blocks
1645               rdc->compound_ref_used_flag = 1;
1646             }
1647           }
1648           set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1649         }
1650       }
1651     }
1652 
1653     if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
1654 
1655     // Gather obmc and warped motion count to update the probability.
1656     if ((!cpi->sf.inter_sf.disable_obmc &&
1657          cpi->sf.inter_sf.prune_obmc_prob_thresh > 0) ||
1658         (cm->features.allow_warped_motion &&
1659          cpi->sf.inter_sf.prune_warped_prob_thresh > 0)) {
1660       const int inter_block = is_inter_block(mbmi);
1661       const int seg_ref_active =
1662           segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1663       if (!seg_ref_active && inter_block) {
1664         const MOTION_MODE motion_allowed =
1665             cm->features.switchable_motion_mode
1666                 ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1667                                       cm->features.allow_warped_motion)
1668                 : SIMPLE_TRANSLATION;
1669 
1670         if (mbmi->ref_frame[1] != INTRA_FRAME) {
1671           if (motion_allowed >= OBMC_CAUSAL) {
1672             td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1673           }
1674           if (motion_allowed == WARPED_CAUSAL) {
1675             td->rd_counts.warped_used[mbmi->motion_mode == WARPED_CAUSAL]++;
1676           }
1677         }
1678       }
1679     }
1680   }
1681   // TODO(Ravi/Remya): Move this copy function to a better logical place
1682   // This function will copy the best mode information from block
1683   // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This
1684   // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during
1685   // bitstream preparation.
1686   av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, x->mbmi_ext,
1687                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
1688   x->rdmult = origin_mult;
1689 }
1690 
encode_sb(const AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,RUN_TYPE dry_run,BLOCK_SIZE bsize,PC_TREE * pc_tree,int * rate)1691 static AOM_INLINE void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
1692                                  TileDataEnc *tile_data, TOKENEXTRA **tp,
1693                                  int mi_row, int mi_col, RUN_TYPE dry_run,
1694                                  BLOCK_SIZE bsize, PC_TREE *pc_tree,
1695                                  int *rate) {
1696   assert(bsize < BLOCK_SIZES_ALL);
1697   const AV1_COMMON *const cm = &cpi->common;
1698   const CommonModeInfoParams *const mi_params = &cm->mi_params;
1699   MACROBLOCK *const x = &td->mb;
1700   MACROBLOCKD *const xd = &x->e_mbd;
1701   assert(bsize < BLOCK_SIZES_ALL);
1702   const int hbs = mi_size_wide[bsize] / 2;
1703   const int is_partition_root = bsize >= BLOCK_8X8;
1704   const int ctx = is_partition_root
1705                       ? partition_plane_context(xd, mi_row, mi_col, bsize)
1706                       : -1;
1707   const PARTITION_TYPE partition = pc_tree->partitioning;
1708   const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1709   int quarter_step = mi_size_wide[bsize] / 4;
1710   int i;
1711   BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
1712 
1713   if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
1714 
1715   if (!dry_run && ctx >= 0) {
1716     const int has_rows = (mi_row + hbs) < mi_params->mi_rows;
1717     const int has_cols = (mi_col + hbs) < mi_params->mi_cols;
1718 
1719     if (has_rows && has_cols) {
1720 #if CONFIG_ENTROPY_STATS
1721       td->counts->partition[ctx][partition]++;
1722 #endif
1723 
1724       if (tile_data->allow_update_cdf) {
1725         FRAME_CONTEXT *fc = xd->tile_ctx;
1726         update_cdf(fc->partition_cdf[ctx], partition,
1727                    partition_cdf_length(bsize));
1728       }
1729     }
1730   }
1731 
1732   switch (partition) {
1733     case PARTITION_NONE:
1734       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1735                partition, &pc_tree->none, rate);
1736       break;
1737     case PARTITION_VERT:
1738       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1739                partition, &pc_tree->vertical[0], rate);
1740       if (mi_col + hbs < mi_params->mi_cols) {
1741         encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1742                  partition, &pc_tree->vertical[1], rate);
1743       }
1744       break;
1745     case PARTITION_HORZ:
1746       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1747                partition, &pc_tree->horizontal[0], rate);
1748       if (mi_row + hbs < mi_params->mi_rows) {
1749         encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1750                  partition, &pc_tree->horizontal[1], rate);
1751       }
1752       break;
1753     case PARTITION_SPLIT:
1754       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
1755                 pc_tree->split[0], rate);
1756       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize,
1757                 pc_tree->split[1], rate);
1758       encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize,
1759                 pc_tree->split[2], rate);
1760       encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run,
1761                 subsize, pc_tree->split[3], rate);
1762       break;
1763 
1764     case PARTITION_HORZ_A:
1765       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1766                partition, &pc_tree->horizontala[0], rate);
1767       encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1768                partition, &pc_tree->horizontala[1], rate);
1769       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1770                partition, &pc_tree->horizontala[2], rate);
1771       break;
1772     case PARTITION_HORZ_B:
1773       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1774                partition, &pc_tree->horizontalb[0], rate);
1775       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1776                partition, &pc_tree->horizontalb[1], rate);
1777       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1778                bsize2, partition, &pc_tree->horizontalb[2], rate);
1779       break;
1780     case PARTITION_VERT_A:
1781       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1782                partition, &pc_tree->verticala[0], rate);
1783       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1784                partition, &pc_tree->verticala[1], rate);
1785       encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1786                partition, &pc_tree->verticala[2], rate);
1787 
1788       break;
1789     case PARTITION_VERT_B:
1790       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1791                partition, &pc_tree->verticalb[0], rate);
1792       encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1793                partition, &pc_tree->verticalb[1], rate);
1794       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1795                bsize2, partition, &pc_tree->verticalb[2], rate);
1796       break;
1797     case PARTITION_HORZ_4:
1798       for (i = 0; i < 4; ++i) {
1799         int this_mi_row = mi_row + i * quarter_step;
1800         if (i > 0 && this_mi_row >= mi_params->mi_rows) break;
1801 
1802         encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize,
1803                  partition, &pc_tree->horizontal4[i], rate);
1804       }
1805       break;
1806     case PARTITION_VERT_4:
1807       for (i = 0; i < 4; ++i) {
1808         int this_mi_col = mi_col + i * quarter_step;
1809         if (i > 0 && this_mi_col >= mi_params->mi_cols) break;
1810         encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize,
1811                  partition, &pc_tree->vertical4[i], rate);
1812       }
1813       break;
1814     default: assert(0 && "Invalid partition type."); break;
1815   }
1816 
1817   update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
1818 }
1819 
set_partial_sb_partition(const AV1_COMMON * const cm,MB_MODE_INFO * mi,int bh_in,int bw_in,int mi_rows_remaining,int mi_cols_remaining,BLOCK_SIZE bsize,MB_MODE_INFO ** mib)1820 static AOM_INLINE void set_partial_sb_partition(
1821     const AV1_COMMON *const cm, MB_MODE_INFO *mi, int bh_in, int bw_in,
1822     int mi_rows_remaining, int mi_cols_remaining, BLOCK_SIZE bsize,
1823     MB_MODE_INFO **mib) {
1824   int bh = bh_in;
1825   int r, c;
1826   for (r = 0; r < cm->seq_params.mib_size; r += bh) {
1827     int bw = bw_in;
1828     for (c = 0; c < cm->seq_params.mib_size; c += bw) {
1829       const int grid_index = get_mi_grid_idx(&cm->mi_params, r, c);
1830       const int mi_index = get_alloc_mi_idx(&cm->mi_params, r, c);
1831       mib[grid_index] = mi + mi_index;
1832       mib[grid_index]->sb_type = find_partition_size(
1833           bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw);
1834     }
1835   }
1836 }
1837 
1838 // This function attempts to set all mode info entries in a given superblock
1839 // to the same block partition size.
1840 // However, at the bottom and right borders of the image the requested size
1841 // may not be allowed in which case this code attempts to choose the largest
1842 // allowable partition.
set_fixed_partitioning(AV1_COMP * cpi,const TileInfo * const tile,MB_MODE_INFO ** mib,int mi_row,int mi_col,BLOCK_SIZE bsize)1843 static AOM_INLINE void set_fixed_partitioning(AV1_COMP *cpi,
1844                                               const TileInfo *const tile,
1845                                               MB_MODE_INFO **mib, int mi_row,
1846                                               int mi_col, BLOCK_SIZE bsize) {
1847   AV1_COMMON *const cm = &cpi->common;
1848   const CommonModeInfoParams *const mi_params = &cm->mi_params;
1849   const int mi_rows_remaining = tile->mi_row_end - mi_row;
1850   const int mi_cols_remaining = tile->mi_col_end - mi_col;
1851   MB_MODE_INFO *const mi_upper_left =
1852       mi_params->mi_alloc + get_alloc_mi_idx(mi_params, mi_row, mi_col);
1853   int bh = mi_size_high[bsize];
1854   int bw = mi_size_wide[bsize];
1855 
1856   assert(bsize >= mi_params->mi_alloc_bsize &&
1857          "Attempted to use bsize < mi_params->mi_alloc_bsize");
1858   assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
1859 
1860   // Apply the requested partition size to the SB if it is all "in image"
1861   if ((mi_cols_remaining >= cm->seq_params.mib_size) &&
1862       (mi_rows_remaining >= cm->seq_params.mib_size)) {
1863     for (int block_row = 0; block_row < cm->seq_params.mib_size;
1864          block_row += bh) {
1865       for (int block_col = 0; block_col < cm->seq_params.mib_size;
1866            block_col += bw) {
1867         const int grid_index = get_mi_grid_idx(mi_params, block_row, block_col);
1868         const int mi_index = get_alloc_mi_idx(mi_params, block_row, block_col);
1869         mib[grid_index] = mi_upper_left + mi_index;
1870         mib[grid_index]->sb_type = bsize;
1871       }
1872     }
1873   } else {
1874     // Else this is a partial SB.
1875     set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining,
1876                              mi_cols_remaining, bsize, mib);
1877   }
1878 }
1879 
rd_use_partition(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MB_MODE_INFO ** mib,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate,int64_t * dist,int do_recon,PC_TREE * pc_tree)1880 static AOM_INLINE void rd_use_partition(
1881     AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, MB_MODE_INFO **mib,
1882     TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate,
1883     int64_t *dist, int do_recon, PC_TREE *pc_tree) {
1884   AV1_COMMON *const cm = &cpi->common;
1885   const CommonModeInfoParams *const mi_params = &cm->mi_params;
1886   const int num_planes = av1_num_planes(cm);
1887   TileInfo *const tile_info = &tile_data->tile_info;
1888   MACROBLOCK *const x = &td->mb;
1889   MACROBLOCKD *const xd = &x->e_mbd;
1890   const int bs = mi_size_wide[bsize];
1891   const int hbs = bs / 2;
1892   int i;
1893   const int pl = (bsize >= BLOCK_8X8)
1894                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
1895                      : 0;
1896   const PARTITION_TYPE partition =
1897       (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
1898                            : PARTITION_NONE;
1899   const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1900   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
1901   RD_STATS last_part_rdc, none_rdc, chosen_rdc, invalid_rdc;
1902   BLOCK_SIZE sub_subsize = BLOCK_4X4;
1903   int splits_below = 0;
1904   BLOCK_SIZE bs_type = mib[0]->sb_type;
1905   PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
1906 
1907   if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
1908 
1909   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
1910 
1911   av1_invalid_rd_stats(&last_part_rdc);
1912   av1_invalid_rd_stats(&none_rdc);
1913   av1_invalid_rd_stats(&chosen_rdc);
1914   av1_invalid_rd_stats(&invalid_rdc);
1915 
1916   pc_tree->partitioning = partition;
1917 
1918   xd->above_txfm_context =
1919       cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
1920   xd->left_txfm_context =
1921       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
1922   save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1923 
1924   if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
1925     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1926     x->mb_energy = av1_log_block_var(cpi, x, bsize);
1927   }
1928 
1929   // Save rdmult before it might be changed, so it can be restored later.
1930   const int orig_rdmult = x->rdmult;
1931   setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
1932 
1933   if (cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION &&
1934       (cpi->sf.part_sf.adjust_var_based_rd_partitioning == 2 ||
1935        (cpi->sf.part_sf.adjust_var_based_rd_partitioning == 1 &&
1936         cm->quant_params.base_qindex > 190 && bsize <= BLOCK_32X32 &&
1937         !frame_is_intra_only(cm)))) {
1938     // Check if any of the sub blocks are further split.
1939     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
1940       sub_subsize = get_partition_subsize(subsize, PARTITION_SPLIT);
1941       splits_below = 1;
1942       for (i = 0; i < 4; i++) {
1943         int jj = i >> 1, ii = i & 0x01;
1944         MB_MODE_INFO *this_mi = mib[jj * hbs * mi_params->mi_stride + ii * hbs];
1945         if (this_mi && this_mi->sb_type >= sub_subsize) {
1946           splits_below = 0;
1947         }
1948       }
1949     }
1950 
1951     // If partition is not none try none unless each of the 4 splits are split
1952     // even further..
1953     if (partition != PARTITION_NONE && !splits_below &&
1954         mi_row + hbs < mi_params->mi_rows &&
1955         mi_col + hbs < mi_params->mi_cols) {
1956       pc_tree->partitioning = PARTITION_NONE;
1957       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
1958                     PARTITION_NONE, bsize, ctx_none, invalid_rdc, PICK_MODE_RD);
1959 
1960       if (none_rdc.rate < INT_MAX) {
1961         none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
1962         none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
1963       }
1964 
1965       restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1966       mib[0]->sb_type = bs_type;
1967       pc_tree->partitioning = partition;
1968     }
1969   }
1970 
1971   switch (partition) {
1972     case PARTITION_NONE:
1973       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1974                     PARTITION_NONE, bsize, ctx_none, invalid_rdc, PICK_MODE_RD);
1975       break;
1976     case PARTITION_HORZ:
1977       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1978                     PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
1979                     invalid_rdc, PICK_MODE_RD);
1980       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1981           mi_row + hbs < mi_params->mi_rows) {
1982         RD_STATS tmp_rdc;
1983         const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
1984         av1_init_rd_stats(&tmp_rdc);
1985         update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
1986         encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
1987                           NULL);
1988         pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
1989                       PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
1990                       invalid_rdc, PICK_MODE_RD);
1991         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1992           av1_invalid_rd_stats(&last_part_rdc);
1993           break;
1994         }
1995         last_part_rdc.rate += tmp_rdc.rate;
1996         last_part_rdc.dist += tmp_rdc.dist;
1997         last_part_rdc.rdcost += tmp_rdc.rdcost;
1998       }
1999       break;
2000     case PARTITION_VERT:
2001       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2002                     PARTITION_VERT, subsize, &pc_tree->vertical[0], invalid_rdc,
2003                     PICK_MODE_RD);
2004       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2005           mi_col + hbs < mi_params->mi_cols) {
2006         RD_STATS tmp_rdc;
2007         const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
2008         av1_init_rd_stats(&tmp_rdc);
2009         update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1);
2010         encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
2011                           NULL);
2012         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
2013                       PARTITION_VERT, subsize,
2014                       &pc_tree->vertical[bsize > BLOCK_8X8], invalid_rdc,
2015                       PICK_MODE_RD);
2016         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2017           av1_invalid_rd_stats(&last_part_rdc);
2018           break;
2019         }
2020         last_part_rdc.rate += tmp_rdc.rate;
2021         last_part_rdc.dist += tmp_rdc.dist;
2022         last_part_rdc.rdcost += tmp_rdc.rdcost;
2023       }
2024       break;
2025     case PARTITION_SPLIT:
2026       if (cpi->sf.part_sf.adjust_var_based_rd_partitioning == 1 &&
2027           none_rdc.rate < INT_MAX && none_rdc.skip == 1) {
2028         av1_invalid_rd_stats(&last_part_rdc);
2029         break;
2030       }
2031       last_part_rdc.rate = 0;
2032       last_part_rdc.dist = 0;
2033       last_part_rdc.rdcost = 0;
2034       for (i = 0; i < 4; i++) {
2035         int x_idx = (i & 1) * hbs;
2036         int y_idx = (i >> 1) * hbs;
2037         int jj = i >> 1, ii = i & 0x01;
2038         RD_STATS tmp_rdc;
2039         if ((mi_row + y_idx >= mi_params->mi_rows) ||
2040             (mi_col + x_idx >= mi_params->mi_cols))
2041           continue;
2042 
2043         av1_init_rd_stats(&tmp_rdc);
2044         rd_use_partition(cpi, td, tile_data,
2045                          mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp,
2046                          mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
2047                          &tmp_rdc.dist, i != 3, pc_tree->split[i]);
2048         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2049           av1_invalid_rd_stats(&last_part_rdc);
2050           break;
2051         }
2052         last_part_rdc.rate += tmp_rdc.rate;
2053         last_part_rdc.dist += tmp_rdc.dist;
2054       }
2055       break;
2056     case PARTITION_VERT_A:
2057     case PARTITION_VERT_B:
2058     case PARTITION_HORZ_A:
2059     case PARTITION_HORZ_B:
2060     case PARTITION_HORZ_4:
2061     case PARTITION_VERT_4:
2062       assert(0 && "Cannot handle extended partition types");
2063     default: assert(0); break;
2064   }
2065 
2066   if (last_part_rdc.rate < INT_MAX) {
2067     last_part_rdc.rate += x->partition_cost[pl][partition];
2068     last_part_rdc.rdcost =
2069         RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
2070   }
2071 
2072   if ((cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION &&
2073        cpi->sf.part_sf.adjust_var_based_rd_partitioning > 2) &&
2074       partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
2075       (mi_row + bs < mi_params->mi_rows ||
2076        mi_row + hbs == mi_params->mi_rows) &&
2077       (mi_col + bs < mi_params->mi_cols ||
2078        mi_col + hbs == mi_params->mi_cols)) {
2079     BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2080     chosen_rdc.rate = 0;
2081     chosen_rdc.dist = 0;
2082 
2083     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2084     pc_tree->partitioning = PARTITION_SPLIT;
2085 
2086     // Split partition.
2087     for (i = 0; i < 4; i++) {
2088       int x_idx = (i & 1) * hbs;
2089       int y_idx = (i >> 1) * hbs;
2090       RD_STATS tmp_rdc;
2091 
2092       if ((mi_row + y_idx >= mi_params->mi_rows) ||
2093           (mi_col + x_idx >= mi_params->mi_cols))
2094         continue;
2095 
2096       save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2097       pc_tree->split[i]->partitioning = PARTITION_NONE;
2098       pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
2099                     PARTITION_SPLIT, split_subsize, &pc_tree->split[i]->none,
2100                     invalid_rdc, PICK_MODE_RD);
2101 
2102       restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2103       if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2104         av1_invalid_rd_stats(&chosen_rdc);
2105         break;
2106       }
2107 
2108       chosen_rdc.rate += tmp_rdc.rate;
2109       chosen_rdc.dist += tmp_rdc.dist;
2110 
2111       if (i != 3)
2112         encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
2113                   OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL);
2114 
2115       chosen_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
2116     }
2117     if (chosen_rdc.rate < INT_MAX) {
2118       chosen_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
2119       chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
2120     }
2121   }
2122 
2123   // If last_part is better set the partitioning to that.
2124   if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
2125     mib[0]->sb_type = bsize;
2126     if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
2127     chosen_rdc = last_part_rdc;
2128   }
2129   // If none was better set the partitioning to that.
2130   if (none_rdc.rdcost < chosen_rdc.rdcost) {
2131     if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
2132     chosen_rdc = none_rdc;
2133   }
2134 
2135   restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2136 
2137   // We must have chosen a partitioning and encoding or we'll fail later on.
2138   // No other opportunities for success.
2139   if (bsize == cm->seq_params.sb_size)
2140     assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
2141 
2142   if (do_recon) {
2143     if (bsize == cm->seq_params.sb_size) {
2144       // NOTE: To get estimate for rate due to the tokens, use:
2145       // int rate_coeffs = 0;
2146       // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
2147       //           bsize, pc_tree, &rate_coeffs);
2148       x->cb_offset = 0;
2149       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
2150                 pc_tree, NULL);
2151     } else {
2152       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
2153                 pc_tree, NULL);
2154     }
2155   }
2156 
2157   *rate = chosen_rdc.rate;
2158   *dist = chosen_rdc.dist;
2159   x->rdmult = orig_rdmult;
2160 }
2161 
is_leaf_split_partition(AV1_COMMON * cm,int mi_row,int mi_col,BLOCK_SIZE bsize)2162 static int is_leaf_split_partition(AV1_COMMON *cm, int mi_row, int mi_col,
2163                                    BLOCK_SIZE bsize) {
2164   const int bs = mi_size_wide[bsize];
2165   const int hbs = bs / 2;
2166   assert(bsize >= BLOCK_8X8);
2167   const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2168 
2169   for (int i = 0; i < 4; i++) {
2170     int x_idx = (i & 1) * hbs;
2171     int y_idx = (i >> 1) * hbs;
2172     if ((mi_row + y_idx >= cm->mi_params.mi_rows) ||
2173         (mi_col + x_idx >= cm->mi_params.mi_cols))
2174       return 0;
2175     if (get_partition(cm, mi_row + y_idx, mi_col + x_idx, subsize) !=
2176             PARTITION_NONE &&
2177         subsize != BLOCK_8X8)
2178       return 0;
2179   }
2180   return 1;
2181 }
2182 
do_slipt_check(BLOCK_SIZE bsize)2183 static AOM_INLINE int do_slipt_check(BLOCK_SIZE bsize) {
2184   return (bsize == BLOCK_16X16 || bsize == BLOCK_32X32);
2185 }
2186 
nonrd_use_partition(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MB_MODE_INFO ** mib,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,PC_TREE * pc_tree)2187 static AOM_INLINE void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
2188                                            TileDataEnc *tile_data,
2189                                            MB_MODE_INFO **mib, TOKENEXTRA **tp,
2190                                            int mi_row, int mi_col,
2191                                            BLOCK_SIZE bsize, PC_TREE *pc_tree) {
2192   AV1_COMMON *const cm = &cpi->common;
2193   const CommonModeInfoParams *const mi_params = &cm->mi_params;
2194   TileInfo *const tile_info = &tile_data->tile_info;
2195   MACROBLOCK *const x = &td->mb;
2196   MACROBLOCKD *const xd = &x->e_mbd;
2197   // Only square blocks from 8x8 to 128x128 are supported
2198   assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_128X128);
2199   const int bs = mi_size_wide[bsize];
2200   const int hbs = bs / 2;
2201   const PARTITION_TYPE partition =
2202       (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
2203                            : PARTITION_NONE;
2204   BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
2205   assert(subsize <= BLOCK_LARGEST);
2206   const int pl = (bsize >= BLOCK_8X8)
2207                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
2208                      : 0;
2209 
2210   RD_STATS dummy_cost;
2211   av1_invalid_rd_stats(&dummy_cost);
2212   RD_STATS invalid_rd;
2213   av1_invalid_rd_stats(&invalid_rd);
2214 
2215   if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
2216 
2217   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2218 
2219   pc_tree->partitioning = partition;
2220 
2221   xd->above_txfm_context =
2222       cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2223   xd->left_txfm_context =
2224       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2225 
2226   switch (partition) {
2227     case PARTITION_NONE:
2228       if (cpi->sf.rt_sf.nonrd_check_partition_split && do_slipt_check(bsize) &&
2229           !frame_is_intra_only(cm)) {
2230         RD_STATS split_rdc, none_rdc, block_rdc;
2231         RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2232 
2233         av1_init_rd_stats(&split_rdc);
2234         av1_invalid_rd_stats(&none_rdc);
2235 
2236         save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2237         subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2238         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
2239                       PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
2240                       PICK_MODE_NONRD);
2241         none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
2242         none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
2243         restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2244 
2245         for (int i = 0; i < 4; i++) {
2246           av1_invalid_rd_stats(&block_rdc);
2247           const int x_idx = (i & 1) * hbs;
2248           const int y_idx = (i >> 1) * hbs;
2249           if (mi_row + y_idx >= mi_params->mi_rows ||
2250               mi_col + x_idx >= mi_params->mi_cols)
2251             continue;
2252           xd->above_txfm_context =
2253               cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
2254           xd->left_txfm_context =
2255               xd->left_txfm_context_buffer + ((mi_row + y_idx) & MAX_MIB_MASK);
2256           pc_tree->split[i]->partitioning = PARTITION_NONE;
2257           pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2258                         &block_rdc, PARTITION_NONE, subsize,
2259                         &pc_tree->split[i]->none, invalid_rd, PICK_MODE_NONRD);
2260           split_rdc.rate += block_rdc.rate;
2261           split_rdc.dist += block_rdc.dist;
2262 
2263           encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
2264                    subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
2265         }
2266         split_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
2267         split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
2268         restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2269 
2270         if (none_rdc.rdcost < split_rdc.rdcost) {
2271           mib[0]->sb_type = bsize;
2272           pc_tree->partitioning = PARTITION_NONE;
2273           encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
2274                    &pc_tree->none, NULL);
2275         } else {
2276           mib[0]->sb_type = subsize;
2277           pc_tree->partitioning = PARTITION_SPLIT;
2278           for (int i = 0; i < 4; i++) {
2279             const int x_idx = (i & 1) * hbs;
2280             const int y_idx = (i >> 1) * hbs;
2281             if (mi_row + y_idx >= mi_params->mi_rows ||
2282                 mi_col + x_idx >= mi_params->mi_cols)
2283               continue;
2284 
2285             encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0,
2286                      subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
2287           }
2288         }
2289 
2290       } else {
2291         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
2292                       PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
2293                       PICK_MODE_NONRD);
2294         encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
2295                  &pc_tree->none, NULL);
2296       }
2297       break;
2298     case PARTITION_VERT:
2299       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
2300                     PARTITION_VERT, subsize, &pc_tree->vertical[0], invalid_rd,
2301                     PICK_MODE_NONRD);
2302       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
2303                PARTITION_VERT, &pc_tree->vertical[0], NULL);
2304       if (mi_col + hbs < mi_params->mi_cols && bsize > BLOCK_8X8) {
2305         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &dummy_cost,
2306                       PARTITION_VERT, subsize, &pc_tree->vertical[1],
2307                       invalid_rd, PICK_MODE_NONRD);
2308         encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, 0, subsize,
2309                  PARTITION_VERT, &pc_tree->vertical[1], NULL);
2310       }
2311       break;
2312     case PARTITION_HORZ:
2313       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
2314                     PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
2315                     invalid_rd, PICK_MODE_NONRD);
2316       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
2317                PARTITION_HORZ, &pc_tree->horizontal[0], NULL);
2318 
2319       if (mi_row + hbs < mi_params->mi_rows && bsize > BLOCK_8X8) {
2320         pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &dummy_cost,
2321                       PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
2322                       invalid_rd, PICK_MODE_NONRD);
2323         encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, 0, subsize,
2324                  PARTITION_HORZ, &pc_tree->horizontal[1], NULL);
2325       }
2326       break;
2327     case PARTITION_SPLIT:
2328       if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode &&
2329           is_leaf_split_partition(cm, mi_row, mi_col, bsize) &&
2330           !frame_is_intra_only(cm) && bsize <= BLOCK_32X32) {
2331         RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2332         RD_STATS split_rdc, none_rdc;
2333         av1_invalid_rd_stats(&split_rdc);
2334         av1_invalid_rd_stats(&none_rdc);
2335         save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2336         xd->above_txfm_context =
2337             cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2338         xd->left_txfm_context =
2339             xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2340         pc_tree->partitioning = PARTITION_NONE;
2341         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
2342                       PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
2343                       PICK_MODE_NONRD);
2344         none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
2345         none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
2346         restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2347         if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode != 2 ||
2348             none_rdc.skip != 1 || pc_tree->none.mic.mode == NEWMV) {
2349           av1_init_rd_stats(&split_rdc);
2350           for (int i = 0; i < 4; i++) {
2351             RD_STATS block_rdc;
2352             av1_invalid_rd_stats(&block_rdc);
2353             int x_idx = (i & 1) * hbs;
2354             int y_idx = (i >> 1) * hbs;
2355             if ((mi_row + y_idx >= mi_params->mi_rows) ||
2356                 (mi_col + x_idx >= mi_params->mi_cols))
2357               continue;
2358             xd->above_txfm_context =
2359                 cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
2360             xd->left_txfm_context = xd->left_txfm_context_buffer +
2361                                     ((mi_row + y_idx) & MAX_MIB_MASK);
2362             pc_tree->split[i]->partitioning = PARTITION_NONE;
2363             pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2364                           &block_rdc, PARTITION_NONE, subsize,
2365                           &pc_tree->split[i]->none, invalid_rd,
2366                           PICK_MODE_NONRD);
2367             split_rdc.rate += block_rdc.rate;
2368             split_rdc.dist += block_rdc.dist;
2369 
2370             encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
2371                      subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
2372           }
2373           restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2374           split_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
2375           split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
2376         }
2377         if (none_rdc.rdcost < split_rdc.rdcost) {
2378           mib[0]->sb_type = bsize;
2379           pc_tree->partitioning = PARTITION_NONE;
2380           encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
2381                    &pc_tree->none, NULL);
2382         } else {
2383           mib[0]->sb_type = subsize;
2384           pc_tree->partitioning = PARTITION_SPLIT;
2385           for (int i = 0; i < 4; i++) {
2386             int x_idx = (i & 1) * hbs;
2387             int y_idx = (i >> 1) * hbs;
2388             if ((mi_row + y_idx >= mi_params->mi_rows) ||
2389                 (mi_col + x_idx >= mi_params->mi_cols))
2390               continue;
2391 
2392             encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0,
2393                      subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
2394           }
2395         }
2396       } else {
2397         for (int i = 0; i < 4; i++) {
2398           int x_idx = (i & 1) * hbs;
2399           int y_idx = (i >> 1) * hbs;
2400           int jj = i >> 1, ii = i & 0x01;
2401           if ((mi_row + y_idx >= mi_params->mi_rows) ||
2402               (mi_col + x_idx >= mi_params->mi_cols))
2403             continue;
2404           nonrd_use_partition(cpi, td, tile_data,
2405                               mib + jj * hbs * mi_params->mi_stride + ii * hbs,
2406                               tp, mi_row + y_idx, mi_col + x_idx, subsize,
2407                               pc_tree->split[i]);
2408         }
2409       }
2410       break;
2411     case PARTITION_VERT_A:
2412     case PARTITION_VERT_B:
2413     case PARTITION_HORZ_A:
2414     case PARTITION_HORZ_B:
2415     case PARTITION_HORZ_4:
2416     case PARTITION_VERT_4:
2417       assert(0 && "Cannot handle extended partition types");
2418     default: assert(0); break;
2419   }
2420 }
2421 
2422 #if !CONFIG_REALTIME_ONLY
read_one_frame_stats(const TWO_PASS * p,int frm)2423 static const FIRSTPASS_STATS *read_one_frame_stats(const TWO_PASS *p, int frm) {
2424   assert(frm >= 0);
2425   if (frm < 0 ||
2426       p->stats_buf_ctx->stats_in_start + frm > p->stats_buf_ctx->stats_in_end) {
2427     return NULL;
2428   }
2429 
2430   return &p->stats_buf_ctx->stats_in_start[frm];
2431 }
2432 // Checks to see if a super block is on a horizontal image edge.
2433 // In most cases this is the "real" edge unless there are formatting
2434 // bars embedded in the stream.
active_h_edge(const AV1_COMP * cpi,int mi_row,int mi_step)2435 static int active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
2436   int top_edge = 0;
2437   int bottom_edge = cpi->common.mi_params.mi_rows;
2438   int is_active_h_edge = 0;
2439 
2440   // For two pass account for any formatting bars detected.
2441   if (is_stat_consumption_stage_twopass(cpi)) {
2442     const AV1_COMMON *const cm = &cpi->common;
2443     const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats(
2444         &cpi->twopass, cm->current_frame.display_order_hint);
2445     if (this_frame_stats == NULL) return AOM_CODEC_ERROR;
2446 
2447     // The inactive region is specified in MBs not mi units.
2448     // The image edge is in the following MB row.
2449     top_edge += (int)(this_frame_stats->inactive_zone_rows * 4);
2450 
2451     bottom_edge -= (int)(this_frame_stats->inactive_zone_rows * 4);
2452     bottom_edge = AOMMAX(top_edge, bottom_edge);
2453   }
2454 
2455   if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
2456       ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
2457     is_active_h_edge = 1;
2458   }
2459   return is_active_h_edge;
2460 }
2461 
2462 // Checks to see if a super block is on a vertical image edge.
2463 // In most cases this is the "real" edge unless there are formatting
2464 // bars embedded in the stream.
active_v_edge(const AV1_COMP * cpi,int mi_col,int mi_step)2465 static int active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
2466   int left_edge = 0;
2467   int right_edge = cpi->common.mi_params.mi_cols;
2468   int is_active_v_edge = 0;
2469 
2470   // For two pass account for any formatting bars detected.
2471   if (is_stat_consumption_stage_twopass(cpi)) {
2472     const AV1_COMMON *const cm = &cpi->common;
2473     const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats(
2474         &cpi->twopass, cm->current_frame.display_order_hint);
2475     if (this_frame_stats == NULL) return AOM_CODEC_ERROR;
2476 
2477     // The inactive region is specified in MBs not mi units.
2478     // The image edge is in the following MB row.
2479     left_edge += (int)(this_frame_stats->inactive_zone_cols * 4);
2480 
2481     right_edge -= (int)(this_frame_stats->inactive_zone_cols * 4);
2482     right_edge = AOMMAX(left_edge, right_edge);
2483   }
2484 
2485   if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
2486       ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
2487     is_active_v_edge = 1;
2488   }
2489   return is_active_v_edge;
2490 }
2491 #endif  // !CONFIG_REALTIME_ONLY
2492 
store_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)2493 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2494   memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
2495 }
2496 
load_pred_mv(MACROBLOCK * x,const PICK_MODE_CONTEXT * const ctx)2497 static INLINE void load_pred_mv(MACROBLOCK *x,
2498                                 const PICK_MODE_CONTEXT *const ctx) {
2499   memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
2500 }
2501 
2502 #if !CONFIG_REALTIME_ONLY
2503 // Try searching for an encoding for the given subblock. Returns zero if the
2504 // rdcost is already too high (to tell the caller not to bother searching for
2505 // encodings of further subblocks)
rd_try_subblock(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int is_last,int mi_row,int mi_col,BLOCK_SIZE subsize,RD_STATS best_rdcost,RD_STATS * sum_rdc,PARTITION_TYPE partition,PICK_MODE_CONTEXT * prev_ctx,PICK_MODE_CONTEXT * this_ctx)2506 static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
2507                            TileDataEnc *tile_data, TOKENEXTRA **tp, int is_last,
2508                            int mi_row, int mi_col, BLOCK_SIZE subsize,
2509                            RD_STATS best_rdcost, RD_STATS *sum_rdc,
2510                            PARTITION_TYPE partition,
2511                            PICK_MODE_CONTEXT *prev_ctx,
2512                            PICK_MODE_CONTEXT *this_ctx) {
2513   MACROBLOCK *const x = &td->mb;
2514   const int orig_mult = x->rdmult;
2515   setup_block_rdmult(cpi, x, mi_row, mi_col, subsize, NO_AQ, NULL);
2516 
2517   av1_rd_cost_update(x->rdmult, &best_rdcost);
2518   if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, prev_ctx);
2519 
2520   RD_STATS rdcost_remaining;
2521   av1_rd_stats_subtraction(x->rdmult, &best_rdcost, sum_rdc, &rdcost_remaining);
2522   RD_STATS this_rdc;
2523   pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, partition,
2524                 subsize, this_ctx, rdcost_remaining, PICK_MODE_RD);
2525 
2526   if (this_rdc.rate == INT_MAX) {
2527     sum_rdc->rdcost = INT64_MAX;
2528   } else {
2529     sum_rdc->rate += this_rdc.rate;
2530     sum_rdc->dist += this_rdc.dist;
2531     av1_rd_cost_update(x->rdmult, sum_rdc);
2532   }
2533 
2534   if (sum_rdc->rdcost >= best_rdcost.rdcost) {
2535     x->rdmult = orig_mult;
2536     return 0;
2537   }
2538 
2539   if (!is_last) {
2540     update_state(cpi, td, this_ctx, mi_row, mi_col, subsize, 1);
2541     encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL);
2542   }
2543 
2544   x->rdmult = orig_mult;
2545   return 1;
2546 }
2547 
rd_test_partition3(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,PC_TREE * pc_tree,RD_STATS * best_rdc,PICK_MODE_CONTEXT ctxs[3],PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,PARTITION_TYPE partition,int mi_row0,int mi_col0,BLOCK_SIZE subsize0,int mi_row1,int mi_col1,BLOCK_SIZE subsize1,int mi_row2,int mi_col2,BLOCK_SIZE subsize2)2548 static bool rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
2549                                TileDataEnc *tile_data, TOKENEXTRA **tp,
2550                                PC_TREE *pc_tree, RD_STATS *best_rdc,
2551                                PICK_MODE_CONTEXT ctxs[3],
2552                                PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
2553                                BLOCK_SIZE bsize, PARTITION_TYPE partition,
2554                                int mi_row0, int mi_col0, BLOCK_SIZE subsize0,
2555                                int mi_row1, int mi_col1, BLOCK_SIZE subsize1,
2556                                int mi_row2, int mi_col2, BLOCK_SIZE subsize2) {
2557   const MACROBLOCK *const x = &td->mb;
2558   const MACROBLOCKD *const xd = &x->e_mbd;
2559   const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2560   RD_STATS sum_rdc;
2561   av1_init_rd_stats(&sum_rdc);
2562   sum_rdc.rate = x->partition_cost[pl][partition];
2563   sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
2564   if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0,
2565                        *best_rdc, &sum_rdc, partition, ctx, &ctxs[0]))
2566     return false;
2567 
2568   if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1,
2569                        *best_rdc, &sum_rdc, partition, &ctxs[0], &ctxs[1]))
2570     return false;
2571 
2572   if (!rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2,
2573                        *best_rdc, &sum_rdc, partition, &ctxs[1], &ctxs[2]))
2574     return false;
2575 
2576   av1_rd_cost_update(x->rdmult, &sum_rdc);
2577   if (sum_rdc.rdcost >= best_rdc->rdcost) return false;
2578   sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
2579   if (sum_rdc.rdcost >= best_rdc->rdcost) return false;
2580 
2581   *best_rdc = sum_rdc;
2582   pc_tree->partitioning = partition;
2583   return true;
2584 }
2585 
reset_partition(PC_TREE * pc_tree,BLOCK_SIZE bsize)2586 static AOM_INLINE void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
2587   pc_tree->partitioning = PARTITION_NONE;
2588   pc_tree->none.rd_stats.skip = 0;
2589 
2590   if (bsize >= BLOCK_8X8) {
2591     BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2592     for (int idx = 0; idx < 4; ++idx)
2593       reset_partition(pc_tree->split[idx], subsize);
2594   }
2595 }
2596 
2597 // Record the ref frames that have been selected by square partition blocks.
update_picked_ref_frames_mask(MACROBLOCK * const x,int ref_type,BLOCK_SIZE bsize,int mib_size,int mi_row,int mi_col)2598 static AOM_INLINE void update_picked_ref_frames_mask(MACROBLOCK *const x,
2599                                                      int ref_type,
2600                                                      BLOCK_SIZE bsize,
2601                                                      int mib_size, int mi_row,
2602                                                      int mi_col) {
2603   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2604   const int sb_size_mask = mib_size - 1;
2605   const int mi_row_in_sb = mi_row & sb_size_mask;
2606   const int mi_col_in_sb = mi_col & sb_size_mask;
2607   const int mi_size = mi_size_wide[bsize];
2608   for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_size; ++i) {
2609     for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_size; ++j) {
2610       x->picked_ref_frames_mask[i * 32 + j] |= 1 << ref_type;
2611     }
2612   }
2613 }
2614 
2615 // Structure to keep win flags for HORZ and VERT partition evaluations
2616 typedef struct {
2617   bool horz_win;
2618   bool vert_win;
2619 } RD_RECT_PART_WIN_INFO;
2620 
2621 // Decide whether to evaluate the AB partition specified by part_type based on
2622 // split and HORZ/VERT info
evaluate_ab_partition_based_on_split(PC_TREE * pc_tree,PARTITION_TYPE rect_part,RD_RECT_PART_WIN_INFO * rect_part_win_info,int qindex,int split_idx1,int split_idx2)2623 int evaluate_ab_partition_based_on_split(
2624     PC_TREE *pc_tree, PARTITION_TYPE rect_part,
2625     RD_RECT_PART_WIN_INFO *rect_part_win_info, int qindex, int split_idx1,
2626     int split_idx2) {
2627   int num_win = 0;
2628   // Threshold for number of winners
2629   // Conservative pruning for high quantizers
2630   const int num_win_thresh = AOMMIN(3 * (2 * (MAXQ - qindex) / MAXQ), 3);
2631   bool sub_part_win = (rect_part_win_info == NULL)
2632                           ? (pc_tree->partitioning == rect_part)
2633                           : (rect_part == PARTITION_HORZ)
2634                                 ? rect_part_win_info->horz_win
2635                                 : rect_part_win_info->vert_win;
2636   num_win += (sub_part_win) ? 1 : 0;
2637   num_win +=
2638       (pc_tree->split[split_idx1]->partitioning == PARTITION_NONE) ? 1 : 0;
2639   num_win +=
2640       (pc_tree->split[split_idx2]->partitioning == PARTITION_NONE) ? 1 : 0;
2641   if (num_win < num_win_thresh) {
2642     return 0;
2643   }
2644   return 1;
2645 }
2646 
2647 // Searches for the best partition pattern for a block based on the
2648 // rate-distortion cost, and returns a bool value to indicate whether a valid
2649 // partition pattern is found. The partition can recursively go down to
2650 // the smallest block size.
2651 //
2652 // Inputs:
2653 //     cpi: the global compressor setting
2654 //     td: thread data
2655 //     tile_data: tile data
2656 //     tp: the pointer to the start token
2657 //     mi_row: row coordinate of the block in a step size of MI_SIZE
2658 //     mi_col: column coordinate of the block in a step size of MI_SIZE
2659 //     bsize: block size
2660 //     max_sq_part: the largest square block size for prediction blocks
2661 //     min_sq_part: the smallest square block size for prediction blocks
2662 //     rd_cost: the pointer to the final rd cost of the current block
2663 //     best_rdc: the upper bound of rd cost for a valid partition
2664 //     pc_tree: the pointer to the PC_TREE node storing the picked partitions
2665 //              and mode info for the current block
2666 //     none_rd: the pointer to the rd cost in the case of not splitting the
2667 //              current block
2668 //     multi_pass_mode: SB_SINGLE_PASS/SB_DRY_PASS/SB_WET_PASS
2669 //     rect_part_win_info: the pointer to a struct storing whether horz/vert
2670 //                         partition outperforms previously tested partitions
2671 //
2672 // Output:
2673 //     a bool value indicating whether a valid partition is found
rd_pick_partition(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,BLOCK_SIZE max_sq_part,BLOCK_SIZE min_sq_part,RD_STATS * rd_cost,RD_STATS best_rdc,PC_TREE * pc_tree,int64_t * none_rd,SB_MULTI_PASS_MODE multi_pass_mode,RD_RECT_PART_WIN_INFO * rect_part_win_info)2674 static bool rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
2675                               TileDataEnc *tile_data, TOKENEXTRA **tp,
2676                               int mi_row, int mi_col, BLOCK_SIZE bsize,
2677                               BLOCK_SIZE max_sq_part, BLOCK_SIZE min_sq_part,
2678                               RD_STATS *rd_cost, RD_STATS best_rdc,
2679                               PC_TREE *pc_tree, int64_t *none_rd,
2680                               SB_MULTI_PASS_MODE multi_pass_mode,
2681                               RD_RECT_PART_WIN_INFO *rect_part_win_info) {
2682   const AV1_COMMON *const cm = &cpi->common;
2683   const CommonModeInfoParams *const mi_params = &cm->mi_params;
2684   const int num_planes = av1_num_planes(cm);
2685   TileInfo *const tile_info = &tile_data->tile_info;
2686   MACROBLOCK *const x = &td->mb;
2687   MACROBLOCKD *const xd = &x->e_mbd;
2688   const int mi_step = mi_size_wide[bsize] / 2;
2689   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2690   const TOKENEXTRA *const tp_orig = *tp;
2691   PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
2692   int tmp_partition_cost[PARTITION_TYPES];
2693   BLOCK_SIZE subsize;
2694   RD_STATS this_rdc, sum_rdc;
2695   const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
2696   int do_square_split = bsize_at_least_8x8;
2697   const int pl = bsize_at_least_8x8
2698                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
2699                      : 0;
2700   const int *partition_cost = x->partition_cost[pl];
2701 
2702   int do_rectangular_split = cpi->oxcf.enable_rect_partitions;
2703   int64_t cur_none_rd = 0;
2704   int64_t split_rd[4] = { 0, 0, 0, 0 };
2705   int64_t horz_rd[2] = { 0, 0 };
2706   int64_t vert_rd[2] = { 0, 0 };
2707   int prune_horz = 0;
2708   int prune_vert = 0;
2709   int terminate_partition_search = 0;
2710 
2711   int split_ctx_is_ready[2] = { 0, 0 };
2712   int horz_ctx_is_ready = 0;
2713   int vert_ctx_is_ready = 0;
2714   BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
2715   // Initialise HORZ and VERT win flags as true for all split partitions
2716   RD_RECT_PART_WIN_INFO split_part_rect_win[4] = {
2717     { true, true }, { true, true }, { true, true }, { true, true }
2718   };
2719 
2720   bool found_best_partition = false;
2721   if (best_rdc.rdcost < 0) {
2722     av1_invalid_rd_stats(rd_cost);
2723     return found_best_partition;
2724   }
2725 
2726   if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) {
2727     x->quad_tree_idx = 0;
2728     x->cnn_output_valid = 0;
2729   }
2730 
2731   if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0;
2732 
2733   // Override skipping rectangular partition operations for edge blocks
2734   const int has_rows = (mi_row + mi_step < mi_params->mi_rows);
2735   const int has_cols = (mi_col + mi_step < mi_params->mi_cols);
2736   const int xss = x->e_mbd.plane[1].subsampling_x;
2737   const int yss = x->e_mbd.plane[1].subsampling_y;
2738 
2739   if (none_rd) *none_rd = 0;
2740   int partition_none_allowed = has_rows && has_cols;
2741   int partition_horz_allowed =
2742       has_cols && bsize_at_least_8x8 && cpi->oxcf.enable_rect_partitions &&
2743       get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ), xss,
2744                            yss) != BLOCK_INVALID;
2745   int partition_vert_allowed =
2746       has_rows && bsize_at_least_8x8 && cpi->oxcf.enable_rect_partitions &&
2747       get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT), xss,
2748                            yss) != BLOCK_INVALID;
2749 
2750   (void)*tp_orig;
2751 
2752 #if CONFIG_COLLECT_PARTITION_STATS
2753   int partition_decisions[EXT_PARTITION_TYPES] = { 0 };
2754   int partition_attempts[EXT_PARTITION_TYPES] = { 0 };
2755   int64_t partition_times[EXT_PARTITION_TYPES] = { 0 };
2756   struct aom_usec_timer partition_timer = { 0 };
2757   int partition_timer_on = 0;
2758 #if CONFIG_COLLECT_PARTITION_STATS == 2
2759   PartitionStats *part_stats = &cpi->partition_stats;
2760 #endif
2761 #endif
2762 
2763   // Override partition costs at the edges of the frame in the same
2764   // way as in read_partition (see decodeframe.c)
2765   if (!(has_rows && has_cols)) {
2766     assert(bsize_at_least_8x8 && pl >= 0);
2767     const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
2768     const int max_cost = av1_cost_symbol(0);
2769     for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = max_cost;
2770     if (has_cols) {
2771       // At the bottom, the two possibilities are HORZ and SPLIT
2772       aom_cdf_prob bot_cdf[2];
2773       partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
2774       static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
2775       av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
2776     } else if (has_rows) {
2777       // At the right, the two possibilities are VERT and SPLIT
2778       aom_cdf_prob rhs_cdf[2];
2779       partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
2780       static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
2781       av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
2782     } else {
2783       // At the bottom right, we always split
2784       tmp_partition_cost[PARTITION_SPLIT] = 0;
2785     }
2786 
2787     partition_cost = tmp_partition_cost;
2788   }
2789 
2790 #ifndef NDEBUG
2791   // Nothing should rely on the default value of this array (which is just
2792   // leftover from encoding the previous block. Setting it to fixed pattern
2793   // when debugging.
2794   // bit 0, 1, 2 are blk_skip of each plane
2795   // bit 4, 5, 6 are initialization checking of each plane
2796   memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
2797 #endif  // NDEBUG
2798 
2799   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2800 
2801   av1_init_rd_stats(&this_rdc);
2802 
2803   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2804 
2805   // Save rdmult before it might be changed, so it can be restored later.
2806   const int orig_rdmult = x->rdmult;
2807   setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
2808 
2809   av1_rd_cost_update(x->rdmult, &best_rdc);
2810 
2811   if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
2812     x->mb_energy = av1_log_block_var(cpi, x, bsize);
2813 
2814   if (bsize > cpi->sf.part_sf.use_square_partition_only_threshold) {
2815     partition_horz_allowed &= !has_rows;
2816     partition_vert_allowed &= !has_cols;
2817   }
2818 
2819   xd->above_txfm_context =
2820       cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2821   xd->left_txfm_context =
2822       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2823   save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2824 
2825   const int try_intra_cnn_split =
2826       !cpi->is_screen_content_type && frame_is_intra_only(cm) &&
2827       cpi->sf.part_sf.intra_cnn_split &&
2828       cm->seq_params.sb_size >= BLOCK_64X64 && bsize <= BLOCK_64X64 &&
2829       bsize >= BLOCK_8X8 &&
2830       mi_row + mi_size_high[bsize] <= mi_params->mi_rows &&
2831       mi_col + mi_size_wide[bsize] <= mi_params->mi_cols;
2832 
2833   if (try_intra_cnn_split) {
2834     av1_intra_mode_cnn_partition(
2835         &cpi->common, x, bsize, x->quad_tree_idx, &partition_none_allowed,
2836         &partition_horz_allowed, &partition_vert_allowed, &do_rectangular_split,
2837         &do_square_split);
2838   }
2839 
2840   // Use simple_motion_search to prune partitions. This must be done prior to
2841   // PARTITION_SPLIT to propagate the initial mvs to a smaller blocksize.
2842   const int try_split_only =
2843       !cpi->is_screen_content_type &&
2844       cpi->sf.part_sf.simple_motion_search_split && do_square_split &&
2845       bsize >= BLOCK_8X8 &&
2846       mi_row + mi_size_high[bsize] <= mi_params->mi_rows &&
2847       mi_col + mi_size_wide[bsize] <= mi_params->mi_cols &&
2848       !frame_is_intra_only(cm) && !av1_superres_scaled(cm);
2849 
2850   if (try_split_only) {
2851     av1_simple_motion_search_based_split(
2852         cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_none_allowed,
2853         &partition_horz_allowed, &partition_vert_allowed, &do_rectangular_split,
2854         &do_square_split);
2855   }
2856 
2857   const int try_prune_rect =
2858       !cpi->is_screen_content_type &&
2859       cpi->sf.part_sf.simple_motion_search_prune_rect &&
2860       !frame_is_intra_only(cm) && do_rectangular_split &&
2861       (do_square_split || partition_none_allowed ||
2862        (prune_horz && prune_vert)) &&
2863       (partition_horz_allowed || partition_vert_allowed) && bsize >= BLOCK_8X8;
2864 
2865   if (try_prune_rect) {
2866     av1_simple_motion_search_prune_rect(
2867         cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_horz_allowed,
2868         &partition_vert_allowed, &prune_horz, &prune_vert);
2869   }
2870 
2871   // Max and min square partition levels are defined as the partition nodes that
2872   // the recursive function rd_pick_partition() can reach. To implement this:
2873   // only PARTITION_NONE is allowed if the current node equals min_sq_part,
2874   // only PARTITION_SPLIT is allowed if the current node exceeds max_sq_part.
2875   assert(block_size_wide[min_sq_part] == block_size_high[min_sq_part]);
2876   assert(block_size_wide[max_sq_part] == block_size_high[max_sq_part]);
2877   assert(min_sq_part <= max_sq_part);
2878   assert(block_size_wide[bsize] == block_size_high[bsize]);
2879   const int max_partition_size = block_size_wide[max_sq_part];
2880   const int min_partition_size = block_size_wide[min_sq_part];
2881   const int blksize = block_size_wide[bsize];
2882   assert(min_partition_size <= max_partition_size);
2883   const int is_le_min_sq_part = blksize <= min_partition_size;
2884   const int is_gt_max_sq_part = blksize > max_partition_size;
2885   if (is_gt_max_sq_part) {
2886     // If current block size is larger than max, only allow split.
2887     partition_none_allowed = 0;
2888     partition_horz_allowed = 0;
2889     partition_vert_allowed = 0;
2890     do_square_split = 1;
2891   } else if (is_le_min_sq_part) {
2892     // If current block size is less or equal to min, only allow none if valid
2893     // block large enough; only allow split otherwise.
2894     partition_horz_allowed = 0;
2895     partition_vert_allowed = 0;
2896     // only disable square split when current block is not at the picture
2897     // boundary. otherwise, inherit the square split flag from previous logic
2898     if (has_rows && has_cols) do_square_split = 0;
2899     partition_none_allowed = !do_square_split;
2900   }
2901 
2902 BEGIN_PARTITION_SEARCH:
2903   if (x->must_find_valid_partition) {
2904     do_square_split = bsize_at_least_8x8 && (blksize > min_partition_size);
2905     partition_none_allowed =
2906         has_rows && has_cols && (blksize >= min_partition_size);
2907     partition_horz_allowed =
2908         has_cols && bsize_at_least_8x8 && cpi->oxcf.enable_rect_partitions &&
2909         (blksize > min_partition_size) &&
2910         get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ), xss,
2911                              yss) != BLOCK_INVALID;
2912     partition_vert_allowed =
2913         has_rows && bsize_at_least_8x8 && cpi->oxcf.enable_rect_partitions &&
2914         (blksize > min_partition_size) &&
2915         get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT), xss,
2916                              yss) != BLOCK_INVALID;
2917     terminate_partition_search = 0;
2918   }
2919 
2920   // Partition block source pixel variance.
2921   unsigned int pb_source_variance = UINT_MAX;
2922 
2923   // Partition block sse after simple motion compensation, not in use now,
2924   // but will be used for upcoming speed features
2925   unsigned int pb_simple_motion_pred_sse = UINT_MAX;
2926   (void)pb_simple_motion_pred_sse;
2927 
2928   // PARTITION_NONE
2929   if (is_le_min_sq_part && has_rows && has_cols) partition_none_allowed = 1;
2930   assert(terminate_partition_search == 0);
2931   int64_t part_none_rd = INT64_MAX;
2932   if (cpi->is_screen_content_type)
2933     partition_none_allowed = has_rows && has_cols;
2934   if (partition_none_allowed && !is_gt_max_sq_part) {
2935     int pt_cost = 0;
2936     if (bsize_at_least_8x8) {
2937       pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
2938                     ? partition_cost[PARTITION_NONE]
2939                     : 0;
2940     }
2941     RD_STATS partition_rdcost;
2942     av1_init_rd_stats(&partition_rdcost);
2943     partition_rdcost.rate = pt_cost;
2944     av1_rd_cost_update(x->rdmult, &partition_rdcost);
2945     RD_STATS best_remain_rdcost;
2946     av1_rd_stats_subtraction(x->rdmult, &best_rdc, &partition_rdcost,
2947                              &best_remain_rdcost);
2948 #if CONFIG_COLLECT_PARTITION_STATS
2949     if (best_remain_rdcost >= 0) {
2950       partition_attempts[PARTITION_NONE] += 1;
2951       aom_usec_timer_start(&partition_timer);
2952       partition_timer_on = 1;
2953     }
2954 #endif
2955     pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE,
2956                   bsize, ctx_none, best_remain_rdcost, PICK_MODE_RD);
2957     av1_rd_cost_update(x->rdmult, &this_rdc);
2958 #if CONFIG_COLLECT_PARTITION_STATS
2959     if (partition_timer_on) {
2960       aom_usec_timer_mark(&partition_timer);
2961       int64_t time = aom_usec_timer_elapsed(&partition_timer);
2962       partition_times[PARTITION_NONE] += time;
2963       partition_timer_on = 0;
2964     }
2965 #endif
2966     pb_source_variance = x->source_variance;
2967     pb_simple_motion_pred_sse = x->simple_motion_pred_sse;
2968     if (none_rd) *none_rd = this_rdc.rdcost;
2969     cur_none_rd = this_rdc.rdcost;
2970     if (this_rdc.rate != INT_MAX) {
2971       if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions) {
2972         const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame);
2973         update_picked_ref_frames_mask(x, ref_type, bsize,
2974                                       cm->seq_params.mib_size, mi_row, mi_col);
2975       }
2976       if (bsize_at_least_8x8) {
2977         this_rdc.rate += pt_cost;
2978         this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
2979       }
2980 
2981       part_none_rd = this_rdc.rdcost;
2982       if (this_rdc.rdcost < best_rdc.rdcost) {
2983         // Adjust dist breakout threshold according to the partition size.
2984         const int64_t dist_breakout_thr =
2985             cpi->sf.part_sf.partition_search_breakout_dist_thr >>
2986             ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
2987              (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
2988         const int rate_breakout_thr =
2989             cpi->sf.part_sf.partition_search_breakout_rate_thr *
2990             num_pels_log2_lookup[bsize];
2991 
2992         best_rdc = this_rdc;
2993         found_best_partition = true;
2994         if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
2995 
2996         if (!frame_is_intra_only(cm) &&
2997             (do_square_split || do_rectangular_split) &&
2998             !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
2999           const int use_ml_based_breakout =
3000               bsize <= cpi->sf.part_sf.use_square_partition_only_threshold &&
3001               bsize > BLOCK_4X4 && xd->bd == 8;
3002           if (use_ml_based_breakout) {
3003             if (av1_ml_predict_breakout(cpi, bsize, x, &this_rdc,
3004                                         pb_source_variance)) {
3005               do_square_split = 0;
3006               do_rectangular_split = 0;
3007             }
3008           }
3009 
3010           // If all y, u, v transform blocks in this partition are skippable,
3011           // and the dist & rate are within the thresholds, the partition
3012           // search is terminated for current branch of the partition search
3013           // tree. The dist & rate thresholds are set to 0 at speed 0 to
3014           // disable the early termination at that speed.
3015           if (best_rdc.dist < dist_breakout_thr &&
3016               best_rdc.rate < rate_breakout_thr) {
3017             do_square_split = 0;
3018             do_rectangular_split = 0;
3019           }
3020         }
3021 
3022         if (cpi->sf.part_sf.simple_motion_search_early_term_none &&
3023             cm->show_frame && !frame_is_intra_only(cm) &&
3024             bsize >= BLOCK_16X16 && mi_row + mi_step < mi_params->mi_rows &&
3025             mi_col + mi_step < mi_params->mi_cols &&
3026             this_rdc.rdcost < INT64_MAX && this_rdc.rdcost >= 0 &&
3027             this_rdc.rate < INT_MAX && this_rdc.rate >= 0 &&
3028             (do_square_split || do_rectangular_split)) {
3029           av1_simple_motion_search_early_term_none(cpi, x, pc_tree, mi_row,
3030                                                    mi_col, bsize, &this_rdc,
3031                                                    &terminate_partition_search);
3032         }
3033       }
3034     }
3035 
3036     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3037   }
3038 
3039   // store estimated motion vector
3040   if (cpi->sf.mv_sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
3041 
3042   // PARTITION_SPLIT
3043   int64_t part_split_rd = INT64_MAX;
3044   if ((!terminate_partition_search && do_square_split) || is_gt_max_sq_part) {
3045     av1_init_rd_stats(&sum_rdc);
3046     subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
3047     sum_rdc.rate = partition_cost[PARTITION_SPLIT];
3048     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3049 
3050     int idx;
3051 #if CONFIG_COLLECT_PARTITION_STATS
3052     if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
3053       partition_attempts[PARTITION_SPLIT] += 1;
3054       aom_usec_timer_start(&partition_timer);
3055       partition_timer_on = 1;
3056     }
3057 #endif
3058     for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
3059       const int x_idx = (idx & 1) * mi_step;
3060       const int y_idx = (idx >> 1) * mi_step;
3061 
3062       if (mi_row + y_idx >= mi_params->mi_rows ||
3063           mi_col + x_idx >= mi_params->mi_cols)
3064         continue;
3065 
3066       if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3067 
3068       pc_tree->split[idx]->index = idx;
3069       int64_t *p_split_rd = &split_rd[idx];
3070 
3071       RD_STATS best_remain_rdcost;
3072       av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3073                                &best_remain_rdcost);
3074 
3075       int curr_quad_tree_idx = 0;
3076       if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
3077         curr_quad_tree_idx = x->quad_tree_idx;
3078         x->quad_tree_idx = 4 * curr_quad_tree_idx + idx + 1;
3079       }
3080       if (!rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
3081                              mi_col + x_idx, subsize, max_sq_part, min_sq_part,
3082                              &this_rdc, best_remain_rdcost, pc_tree->split[idx],
3083                              p_split_rd, multi_pass_mode,
3084                              &split_part_rect_win[idx])) {
3085         av1_invalid_rd_stats(&sum_rdc);
3086         break;
3087       }
3088       if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
3089         x->quad_tree_idx = curr_quad_tree_idx;
3090       }
3091 
3092       sum_rdc.rate += this_rdc.rate;
3093       sum_rdc.dist += this_rdc.dist;
3094       av1_rd_cost_update(x->rdmult, &sum_rdc);
3095       if (idx <= 1 && (bsize <= BLOCK_8X8 ||
3096                        pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
3097         const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic;
3098         const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3099         // Neither palette mode nor cfl predicted
3100         if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3101           if (mbmi->uv_mode != UV_CFL_PRED) split_ctx_is_ready[idx] = 1;
3102         }
3103       }
3104     }
3105 #if CONFIG_COLLECT_PARTITION_STATS
3106     if (partition_timer_on) {
3107       aom_usec_timer_mark(&partition_timer);
3108       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3109       partition_times[PARTITION_SPLIT] += time;
3110       partition_timer_on = 0;
3111     }
3112 #endif
3113     const int reached_last_index = (idx == 4);
3114 
3115     part_split_rd = sum_rdc.rdcost;
3116     if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
3117       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3118       if (sum_rdc.rdcost < best_rdc.rdcost) {
3119         best_rdc = sum_rdc;
3120         found_best_partition = true;
3121         pc_tree->partitioning = PARTITION_SPLIT;
3122       }
3123     } else if (cpi->sf.part_sf.less_rectangular_check_level > 0) {
3124       // Skip rectangular partition test when partition type none gives better
3125       // rd than partition type split.
3126       if (cpi->sf.part_sf.less_rectangular_check_level == 2 || idx <= 2) {
3127         const int partition_none_valid = cur_none_rd > 0;
3128         const int partition_none_better = cur_none_rd < sum_rdc.rdcost;
3129         do_rectangular_split &=
3130             !(partition_none_valid && partition_none_better);
3131       }
3132     }
3133 
3134     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3135   }  // if (do_split)
3136 
3137   if (cpi->sf.part_sf.ml_early_term_after_part_split_level &&
3138       !frame_is_intra_only(cm) && !terminate_partition_search &&
3139       do_rectangular_split &&
3140       (partition_horz_allowed || partition_vert_allowed)) {
3141     av1_ml_early_term_after_split(cpi, x, pc_tree, bsize, best_rdc.rdcost,
3142                                   part_none_rd, part_split_rd, split_rd, mi_row,
3143                                   mi_col, &terminate_partition_search);
3144   }
3145 
3146   if (!cpi->sf.part_sf.ml_early_term_after_part_split_level &&
3147       cpi->sf.part_sf.ml_prune_rect_partition && !frame_is_intra_only(cm) &&
3148       (partition_horz_allowed || partition_vert_allowed) &&
3149       !(prune_horz || prune_vert) && !terminate_partition_search) {
3150     av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
3151     av1_ml_prune_rect_partition(cpi, x, bsize, best_rdc.rdcost, cur_none_rd,
3152                                 split_rd, &prune_horz, &prune_vert);
3153   }
3154 
3155   // PARTITION_HORZ
3156   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz_allowed));
3157   if (!terminate_partition_search && partition_horz_allowed && !prune_horz &&
3158       (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
3159       !is_gt_max_sq_part) {
3160     av1_init_rd_stats(&sum_rdc);
3161     subsize = get_partition_subsize(bsize, PARTITION_HORZ);
3162     if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3163     sum_rdc.rate = partition_cost[PARTITION_HORZ];
3164     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3165     RD_STATS best_remain_rdcost;
3166     av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3167                              &best_remain_rdcost);
3168 #if CONFIG_COLLECT_PARTITION_STATS
3169     if (best_remain_rdcost >= 0) {
3170       partition_attempts[PARTITION_HORZ] += 1;
3171       aom_usec_timer_start(&partition_timer);
3172       partition_timer_on = 1;
3173     }
3174 #endif
3175     pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_HORZ,
3176                   subsize, &pc_tree->horizontal[0], best_remain_rdcost,
3177                   PICK_MODE_RD);
3178     av1_rd_cost_update(x->rdmult, &this_rdc);
3179 
3180     if (this_rdc.rate == INT_MAX) {
3181       sum_rdc.rdcost = INT64_MAX;
3182     } else {
3183       sum_rdc.rate += this_rdc.rate;
3184       sum_rdc.dist += this_rdc.dist;
3185       av1_rd_cost_update(x->rdmult, &sum_rdc);
3186     }
3187     horz_rd[0] = this_rdc.rdcost;
3188 
3189     if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) {
3190       const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
3191       const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic;
3192       const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3193       // Neither palette mode nor cfl predicted
3194       if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3195         if (mbmi->uv_mode != UV_CFL_PRED) horz_ctx_is_ready = 1;
3196       }
3197       update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
3198       encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL);
3199 
3200       if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
3201 
3202       av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3203                                &best_remain_rdcost);
3204 
3205       pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
3206                     PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
3207                     best_remain_rdcost, PICK_MODE_RD);
3208       av1_rd_cost_update(x->rdmult, &this_rdc);
3209       horz_rd[1] = this_rdc.rdcost;
3210 
3211       if (this_rdc.rate == INT_MAX) {
3212         sum_rdc.rdcost = INT64_MAX;
3213       } else {
3214         sum_rdc.rate += this_rdc.rate;
3215         sum_rdc.dist += this_rdc.dist;
3216         av1_rd_cost_update(x->rdmult, &sum_rdc);
3217       }
3218     }
3219 #if CONFIG_COLLECT_PARTITION_STATS
3220     if (partition_timer_on) {
3221       aom_usec_timer_mark(&partition_timer);
3222       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3223       partition_times[PARTITION_HORZ] += time;
3224       partition_timer_on = 0;
3225     }
3226 #endif
3227 
3228     if (sum_rdc.rdcost < best_rdc.rdcost) {
3229       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3230       if (sum_rdc.rdcost < best_rdc.rdcost) {
3231         best_rdc = sum_rdc;
3232         found_best_partition = true;
3233         pc_tree->partitioning = PARTITION_HORZ;
3234       }
3235     } else {
3236       // Update HORZ win flag
3237       if (rect_part_win_info != NULL) {
3238         rect_part_win_info->horz_win = false;
3239       }
3240     }
3241 
3242     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3243   }
3244 
3245   // PARTITION_VERT
3246   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert_allowed));
3247   if (!terminate_partition_search && partition_vert_allowed && !prune_vert &&
3248       (do_rectangular_split || active_v_edge(cpi, mi_col, mi_step)) &&
3249       !is_gt_max_sq_part) {
3250     av1_init_rd_stats(&sum_rdc);
3251     subsize = get_partition_subsize(bsize, PARTITION_VERT);
3252 
3253     if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3254 
3255     sum_rdc.rate = partition_cost[PARTITION_VERT];
3256     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3257     RD_STATS best_remain_rdcost;
3258     av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3259                              &best_remain_rdcost);
3260 #if CONFIG_COLLECT_PARTITION_STATS
3261     if (best_remain_rdcost >= 0) {
3262       partition_attempts[PARTITION_VERT] += 1;
3263       aom_usec_timer_start(&partition_timer);
3264       partition_timer_on = 1;
3265     }
3266 #endif
3267     pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_VERT,
3268                   subsize, &pc_tree->vertical[0], best_remain_rdcost,
3269                   PICK_MODE_RD);
3270     av1_rd_cost_update(x->rdmult, &this_rdc);
3271 
3272     if (this_rdc.rate == INT_MAX) {
3273       sum_rdc.rdcost = INT64_MAX;
3274     } else {
3275       sum_rdc.rate += this_rdc.rate;
3276       sum_rdc.dist += this_rdc.dist;
3277       av1_rd_cost_update(x->rdmult, &sum_rdc);
3278     }
3279     vert_rd[0] = this_rdc.rdcost;
3280     if (sum_rdc.rdcost < best_rdc.rdcost && has_cols) {
3281       const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic;
3282       const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3283       // Neither palette mode nor cfl predicted
3284       if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3285         if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1;
3286       }
3287       update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 1);
3288       encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL);
3289 
3290       if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3291 
3292       av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3293                                &best_remain_rdcost);
3294       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
3295                     PARTITION_VERT, subsize, &pc_tree->vertical[1],
3296                     best_remain_rdcost, PICK_MODE_RD);
3297       av1_rd_cost_update(x->rdmult, &this_rdc);
3298       vert_rd[1] = this_rdc.rdcost;
3299 
3300       if (this_rdc.rate == INT_MAX) {
3301         sum_rdc.rdcost = INT64_MAX;
3302       } else {
3303         sum_rdc.rate += this_rdc.rate;
3304         sum_rdc.dist += this_rdc.dist;
3305         av1_rd_cost_update(x->rdmult, &sum_rdc);
3306       }
3307     }
3308 #if CONFIG_COLLECT_PARTITION_STATS
3309     if (partition_timer_on) {
3310       aom_usec_timer_mark(&partition_timer);
3311       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3312       partition_times[PARTITION_VERT] += time;
3313       partition_timer_on = 0;
3314     }
3315 #endif
3316 
3317     av1_rd_cost_update(x->rdmult, &sum_rdc);
3318     if (sum_rdc.rdcost < best_rdc.rdcost) {
3319       best_rdc = sum_rdc;
3320       found_best_partition = true;
3321       pc_tree->partitioning = PARTITION_VERT;
3322     } else {
3323       // Update VERT win flag
3324       if (rect_part_win_info != NULL) {
3325         rect_part_win_info->vert_win = false;
3326       }
3327     }
3328 
3329     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3330   }
3331 
3332   if (pb_source_variance == UINT_MAX) {
3333     av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
3334     if (is_cur_buf_hbd(xd)) {
3335       pb_source_variance = av1_high_get_sby_perpixel_variance(
3336           cpi, &x->plane[0].src, bsize, xd->bd);
3337     } else {
3338       pb_source_variance =
3339           av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
3340     }
3341   }
3342 
3343   if (use_pb_simple_motion_pred_sse(cpi) &&
3344       pb_simple_motion_pred_sse == UINT_MAX) {
3345     const FULLPEL_MV start_mv = kZeroFullMv;
3346     unsigned int var = 0;
3347 
3348     av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, start_mv, 0,
3349                               &pb_simple_motion_pred_sse, &var);
3350   }
3351 
3352   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !do_rectangular_split));
3353 
3354   const int ext_partition_allowed =
3355       do_rectangular_split &&
3356       bsize > cpi->sf.part_sf.ext_partition_eval_thresh && has_rows && has_cols;
3357 
3358   // The standard AB partitions are allowed whenever ext-partition-types are
3359   // allowed
3360   int horzab_partition_allowed =
3361       ext_partition_allowed & cpi->oxcf.enable_ab_partitions;
3362   int vertab_partition_allowed =
3363       ext_partition_allowed & cpi->oxcf.enable_ab_partitions;
3364 
3365   if (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3366     if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 1) {
3367       // TODO(debargha,huisu@google.com): may need to tune the threshold for
3368       // pb_source_variance.
3369       horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3370                                    (pc_tree->partitioning == PARTITION_NONE &&
3371                                     pb_source_variance < 32) ||
3372                                    pc_tree->partitioning == PARTITION_SPLIT);
3373       vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3374                                    (pc_tree->partitioning == PARTITION_NONE &&
3375                                     pb_source_variance < 32) ||
3376                                    pc_tree->partitioning == PARTITION_SPLIT);
3377     } else {
3378       horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3379                                    pc_tree->partitioning == PARTITION_SPLIT);
3380       vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3381                                    pc_tree->partitioning == PARTITION_SPLIT);
3382     }
3383     horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0);
3384     horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0);
3385     vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0);
3386     vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0);
3387     split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0);
3388     split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0);
3389     split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0);
3390     split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0);
3391   }
3392   int horza_partition_allowed = horzab_partition_allowed;
3393   int horzb_partition_allowed = horzab_partition_allowed;
3394   if (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3395     const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1];
3396     const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3];
3397     switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3398       case 1:
3399         horza_partition_allowed &= (horz_a_rd / 16 * 14 < best_rdc.rdcost);
3400         horzb_partition_allowed &= (horz_b_rd / 16 * 14 < best_rdc.rdcost);
3401         break;
3402       case 2:
3403       default:
3404         horza_partition_allowed &= (horz_a_rd / 16 * 15 < best_rdc.rdcost);
3405         horzb_partition_allowed &= (horz_b_rd / 16 * 15 < best_rdc.rdcost);
3406         break;
3407     }
3408   }
3409 
3410   int verta_partition_allowed = vertab_partition_allowed;
3411   int vertb_partition_allowed = vertab_partition_allowed;
3412   if (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3413     const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2];
3414     const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3];
3415     switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3416       case 1:
3417         verta_partition_allowed &= (vert_a_rd / 16 * 14 < best_rdc.rdcost);
3418         vertb_partition_allowed &= (vert_b_rd / 16 * 14 < best_rdc.rdcost);
3419         break;
3420       case 2:
3421       default:
3422         verta_partition_allowed &= (vert_a_rd / 16 * 15 < best_rdc.rdcost);
3423         vertb_partition_allowed &= (vert_b_rd / 16 * 15 < best_rdc.rdcost);
3424         break;
3425     }
3426   }
3427 
3428   if (cpi->sf.part_sf.ml_prune_ab_partition && ext_partition_allowed &&
3429       partition_horz_allowed && partition_vert_allowed) {
3430     // TODO(huisu@google.com): x->source_variance may not be the current
3431     // block's variance. The correct one to use is pb_source_variance. Need to
3432     // re-train the model to fix it.
3433     av1_ml_prune_ab_partition(
3434         bsize, pc_tree->partitioning, get_unsigned_bits(x->source_variance),
3435         best_rdc.rdcost, horz_rd, vert_rd, split_rd, &horza_partition_allowed,
3436         &horzb_partition_allowed, &verta_partition_allowed,
3437         &vertb_partition_allowed);
3438   }
3439 
3440   horza_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3441   horzb_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3442   verta_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3443   vertb_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3444 
3445   if (cpi->sf.part_sf.prune_ab_partition_using_split_info &&
3446       horza_partition_allowed) {
3447     horza_partition_allowed &= evaluate_ab_partition_based_on_split(
3448         pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 0, 1);
3449   }
3450 
3451   // PARTITION_HORZ_A
3452   if (!terminate_partition_search && partition_horz_allowed &&
3453       horza_partition_allowed && !is_gt_max_sq_part) {
3454     subsize = get_partition_subsize(bsize, PARTITION_HORZ_A);
3455     pc_tree->horizontala[0].rd_mode_is_ready = 0;
3456     pc_tree->horizontala[1].rd_mode_is_ready = 0;
3457     pc_tree->horizontala[2].rd_mode_is_ready = 0;
3458     if (split_ctx_is_ready[0]) {
3459       av1_copy_tree_context(&pc_tree->horizontala[0], &pc_tree->split[0]->none);
3460       pc_tree->horizontala[0].mic.partition = PARTITION_HORZ_A;
3461       pc_tree->horizontala[0].rd_mode_is_ready = 1;
3462       if (split_ctx_is_ready[1]) {
3463         av1_copy_tree_context(&pc_tree->horizontala[1],
3464                               &pc_tree->split[1]->none);
3465         pc_tree->horizontala[1].mic.partition = PARTITION_HORZ_A;
3466         pc_tree->horizontala[1].rd_mode_is_ready = 1;
3467       }
3468     }
3469 #if CONFIG_COLLECT_PARTITION_STATS
3470     {
3471       RD_STATS tmp_sum_rdc;
3472       av1_init_rd_stats(&tmp_sum_rdc);
3473       tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_A];
3474       tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3475       if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3476         partition_attempts[PARTITION_HORZ_A] += 1;
3477         aom_usec_timer_start(&partition_timer);
3478         partition_timer_on = 1;
3479       }
3480     }
3481 #endif
3482     found_best_partition |= rd_test_partition3(
3483         cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->horizontala,
3484         ctx_none, mi_row, mi_col, bsize, PARTITION_HORZ_A, mi_row, mi_col,
3485         bsize2, mi_row, mi_col + mi_step, bsize2, mi_row + mi_step, mi_col,
3486         subsize);
3487 #if CONFIG_COLLECT_PARTITION_STATS
3488     if (partition_timer_on) {
3489       aom_usec_timer_mark(&partition_timer);
3490       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3491       partition_times[PARTITION_HORZ_A] += time;
3492       partition_timer_on = 0;
3493     }
3494 #endif
3495     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3496   }
3497 
3498   if (cpi->sf.part_sf.prune_ab_partition_using_split_info &&
3499       horzb_partition_allowed) {
3500     horzb_partition_allowed &= evaluate_ab_partition_based_on_split(
3501         pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 2, 3);
3502   }
3503 
3504   // PARTITION_HORZ_B
3505   if (!terminate_partition_search && partition_horz_allowed &&
3506       horzb_partition_allowed && !is_gt_max_sq_part) {
3507     subsize = get_partition_subsize(bsize, PARTITION_HORZ_B);
3508     pc_tree->horizontalb[0].rd_mode_is_ready = 0;
3509     pc_tree->horizontalb[1].rd_mode_is_ready = 0;
3510     pc_tree->horizontalb[2].rd_mode_is_ready = 0;
3511     if (horz_ctx_is_ready) {
3512       av1_copy_tree_context(&pc_tree->horizontalb[0], &pc_tree->horizontal[0]);
3513       pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B;
3514       pc_tree->horizontalb[0].rd_mode_is_ready = 1;
3515     }
3516 #if CONFIG_COLLECT_PARTITION_STATS
3517     {
3518       RD_STATS tmp_sum_rdc;
3519       av1_init_rd_stats(&tmp_sum_rdc);
3520       tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_B];
3521       tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3522       if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3523         partition_attempts[PARTITION_HORZ_B] += 1;
3524         aom_usec_timer_start(&partition_timer);
3525         partition_timer_on = 1;
3526       }
3527     }
3528 #endif
3529     found_best_partition |= rd_test_partition3(
3530         cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->horizontalb,
3531         ctx_none, mi_row, mi_col, bsize, PARTITION_HORZ_B, mi_row, mi_col,
3532         subsize, mi_row + mi_step, mi_col, bsize2, mi_row + mi_step,
3533         mi_col + mi_step, bsize2);
3534 
3535 #if CONFIG_COLLECT_PARTITION_STATS
3536     if (partition_timer_on) {
3537       aom_usec_timer_mark(&partition_timer);
3538       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3539       partition_times[PARTITION_HORZ_B] += time;
3540       partition_timer_on = 0;
3541     }
3542 #endif
3543     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3544   }
3545 
3546   if (cpi->sf.part_sf.prune_ab_partition_using_split_info &&
3547       verta_partition_allowed) {
3548     verta_partition_allowed &= evaluate_ab_partition_based_on_split(
3549         pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 0, 2);
3550   }
3551 
3552   // PARTITION_VERT_A
3553   if (!terminate_partition_search && partition_vert_allowed &&
3554       verta_partition_allowed && !is_gt_max_sq_part) {
3555     subsize = get_partition_subsize(bsize, PARTITION_VERT_A);
3556     pc_tree->verticala[0].rd_mode_is_ready = 0;
3557     pc_tree->verticala[1].rd_mode_is_ready = 0;
3558     pc_tree->verticala[2].rd_mode_is_ready = 0;
3559     if (split_ctx_is_ready[0]) {
3560       av1_copy_tree_context(&pc_tree->verticala[0], &pc_tree->split[0]->none);
3561       pc_tree->verticala[0].mic.partition = PARTITION_VERT_A;
3562       pc_tree->verticala[0].rd_mode_is_ready = 1;
3563     }
3564 #if CONFIG_COLLECT_PARTITION_STATS
3565     {
3566       RD_STATS tmp_sum_rdc;
3567       av1_init_rd_stats(&tmp_sum_rdc);
3568       tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_A];
3569       tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3570       if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3571         partition_attempts[PARTITION_VERT_A] += 1;
3572         aom_usec_timer_start(&partition_timer);
3573         partition_timer_on = 1;
3574       }
3575     }
3576 #endif
3577     found_best_partition |= rd_test_partition3(
3578         cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->verticala,
3579         ctx_none, mi_row, mi_col, bsize, PARTITION_VERT_A, mi_row, mi_col,
3580         bsize2, mi_row + mi_step, mi_col, bsize2, mi_row, mi_col + mi_step,
3581         subsize);
3582 #if CONFIG_COLLECT_PARTITION_STATS
3583     if (partition_timer_on) {
3584       aom_usec_timer_mark(&partition_timer);
3585       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3586       partition_times[PARTITION_VERT_A] += time;
3587       partition_timer_on = 0;
3588     }
3589 #endif
3590     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3591   }
3592 
3593   if (cpi->sf.part_sf.prune_ab_partition_using_split_info &&
3594       vertb_partition_allowed) {
3595     vertb_partition_allowed &= evaluate_ab_partition_based_on_split(
3596         pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 1, 3);
3597   }
3598 
3599   // PARTITION_VERT_B
3600   if (!terminate_partition_search && partition_vert_allowed &&
3601       vertb_partition_allowed && !is_gt_max_sq_part) {
3602     subsize = get_partition_subsize(bsize, PARTITION_VERT_B);
3603     pc_tree->verticalb[0].rd_mode_is_ready = 0;
3604     pc_tree->verticalb[1].rd_mode_is_ready = 0;
3605     pc_tree->verticalb[2].rd_mode_is_ready = 0;
3606     if (vert_ctx_is_ready) {
3607       av1_copy_tree_context(&pc_tree->verticalb[0], &pc_tree->vertical[0]);
3608       pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B;
3609       pc_tree->verticalb[0].rd_mode_is_ready = 1;
3610     }
3611 #if CONFIG_COLLECT_PARTITION_STATS
3612     {
3613       RD_STATS tmp_sum_rdc;
3614       av1_init_rd_stats(&tmp_sum_rdc);
3615       tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_B];
3616       tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3617       if (!frame_is_intra_only(cm) &&
3618           best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3619         partition_attempts[PARTITION_VERT_B] += 1;
3620         aom_usec_timer_start(&partition_timer);
3621         partition_timer_on = 1;
3622       }
3623     }
3624 #endif
3625     found_best_partition |= rd_test_partition3(
3626         cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->verticalb,
3627         ctx_none, mi_row, mi_col, bsize, PARTITION_VERT_B, mi_row, mi_col,
3628         subsize, mi_row, mi_col + mi_step, bsize2, mi_row + mi_step,
3629         mi_col + mi_step, bsize2);
3630 #if CONFIG_COLLECT_PARTITION_STATS
3631     if (partition_timer_on) {
3632       aom_usec_timer_mark(&partition_timer);
3633       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3634       partition_times[PARTITION_VERT_B] += time;
3635       partition_timer_on = 0;
3636     }
3637 #endif
3638     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3639   }
3640 
3641   // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or
3642   // PARTITION_VERT_4 for this block. This is almost the same as
3643   // ext_partition_allowed, except that we don't allow 128x32 or 32x128
3644   // blocks, so we require that bsize is not BLOCK_128X128.
3645   const int partition4_allowed = cpi->oxcf.enable_1to4_partitions &&
3646                                  ext_partition_allowed &&
3647                                  bsize != BLOCK_128X128;
3648 
3649   int partition_horz4_allowed =
3650       partition4_allowed && partition_horz_allowed &&
3651       get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ_4), xss,
3652                            yss) != BLOCK_INVALID;
3653   int partition_vert4_allowed =
3654       partition4_allowed && partition_vert_allowed &&
3655       get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT_4), xss,
3656                            yss) != BLOCK_INVALID;
3657   if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 2) {
3658     partition_horz4_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3659                                 pc_tree->partitioning == PARTITION_HORZ_A ||
3660                                 pc_tree->partitioning == PARTITION_HORZ_B ||
3661                                 pc_tree->partitioning == PARTITION_SPLIT ||
3662                                 pc_tree->partitioning == PARTITION_NONE);
3663     partition_vert4_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3664                                 pc_tree->partitioning == PARTITION_VERT_A ||
3665                                 pc_tree->partitioning == PARTITION_VERT_B ||
3666                                 pc_tree->partitioning == PARTITION_SPLIT ||
3667                                 pc_tree->partitioning == PARTITION_NONE);
3668   }
3669   if (cpi->sf.part_sf.ml_prune_4_partition && partition4_allowed &&
3670       partition_horz_allowed && partition_vert_allowed) {
3671     av1_ml_prune_4_partition(cpi, x, bsize, pc_tree->partitioning,
3672                              best_rdc.rdcost, horz_rd, vert_rd, split_rd,
3673                              &partition_horz4_allowed, &partition_vert4_allowed,
3674                              pb_source_variance, mi_row, mi_col);
3675   }
3676 
3677   if (blksize < (min_partition_size << 2)) {
3678     partition_horz4_allowed = 0;
3679     partition_vert4_allowed = 0;
3680   }
3681 
3682   if (cpi->sf.part_sf.prune_4_partition_using_split_info &&
3683       (partition_horz4_allowed || partition_vert4_allowed)) {
3684     // Count of child blocks in which HORZ or VERT partition has won
3685     int num_child_horz_win = 0, num_child_vert_win = 0;
3686     for (int idx = 0; idx < 4; idx++) {
3687       num_child_horz_win += (split_part_rect_win[idx].horz_win) ? 1 : 0;
3688       num_child_vert_win += (split_part_rect_win[idx].vert_win) ? 1 : 0;
3689     }
3690 
3691     // Prune HORZ4/VERT4 partitions based on number of HORZ/VERT winners of
3692     // split partiitons.
3693     // Conservative pruning for high quantizers
3694     const int num_win_thresh = AOMMIN(3 * (MAXQ - x->qindex) / MAXQ + 1, 3);
3695     if (num_child_horz_win < num_win_thresh) {
3696       partition_horz4_allowed = 0;
3697     }
3698     if (num_child_vert_win < num_win_thresh) {
3699       partition_vert4_allowed = 0;
3700     }
3701   }
3702 
3703   // PARTITION_HORZ_4
3704   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz4_allowed));
3705   if (!terminate_partition_search && partition_horz4_allowed && has_rows &&
3706       (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
3707       !is_gt_max_sq_part) {
3708     av1_init_rd_stats(&sum_rdc);
3709     const int quarter_step = mi_size_high[bsize] / 4;
3710     PICK_MODE_CONTEXT *ctx_prev = ctx_none;
3711 
3712     subsize = get_partition_subsize(bsize, PARTITION_HORZ_4);
3713     sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
3714     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3715 
3716 #if CONFIG_COLLECT_PARTITION_STATS
3717     if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
3718       partition_attempts[PARTITION_HORZ_4] += 1;
3719       aom_usec_timer_start(&partition_timer);
3720       partition_timer_on = 1;
3721     }
3722 #endif
3723     for (int i = 0; i < 4; ++i) {
3724       const int this_mi_row = mi_row + i * quarter_step;
3725 
3726       if (i > 0 && this_mi_row >= mi_params->mi_rows) break;
3727 
3728       PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i];
3729 
3730       ctx_this->rd_mode_is_ready = 0;
3731       if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row,
3732                            mi_col, subsize, best_rdc, &sum_rdc,
3733                            PARTITION_HORZ_4, ctx_prev, ctx_this)) {
3734         av1_invalid_rd_stats(&sum_rdc);
3735         break;
3736       }
3737 
3738       ctx_prev = ctx_this;
3739     }
3740 
3741     av1_rd_cost_update(x->rdmult, &sum_rdc);
3742     if (sum_rdc.rdcost < best_rdc.rdcost) {
3743       best_rdc = sum_rdc;
3744       found_best_partition = true;
3745       pc_tree->partitioning = PARTITION_HORZ_4;
3746     }
3747 
3748 #if CONFIG_COLLECT_PARTITION_STATS
3749     if (partition_timer_on) {
3750       aom_usec_timer_mark(&partition_timer);
3751       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3752       partition_times[PARTITION_HORZ_4] += time;
3753       partition_timer_on = 0;
3754     }
3755 #endif
3756     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3757   }
3758 
3759   // PARTITION_VERT_4
3760   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert4_allowed));
3761   if (!terminate_partition_search && partition_vert4_allowed && has_cols &&
3762       (do_rectangular_split || active_v_edge(cpi, mi_row, mi_step)) &&
3763       !is_gt_max_sq_part) {
3764     av1_init_rd_stats(&sum_rdc);
3765     const int quarter_step = mi_size_wide[bsize] / 4;
3766     PICK_MODE_CONTEXT *ctx_prev = ctx_none;
3767 
3768     subsize = get_partition_subsize(bsize, PARTITION_VERT_4);
3769     sum_rdc.rate = partition_cost[PARTITION_VERT_4];
3770     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3771 
3772 #if CONFIG_COLLECT_PARTITION_STATS
3773     if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
3774       partition_attempts[PARTITION_VERT_4] += 1;
3775       aom_usec_timer_start(&partition_timer);
3776       partition_timer_on = 1;
3777     }
3778 #endif
3779     for (int i = 0; i < 4; ++i) {
3780       const int this_mi_col = mi_col + i * quarter_step;
3781 
3782       if (i > 0 && this_mi_col >= mi_params->mi_cols) break;
3783 
3784       PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i];
3785 
3786       ctx_this->rd_mode_is_ready = 0;
3787       if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row,
3788                            this_mi_col, subsize, best_rdc, &sum_rdc,
3789                            PARTITION_VERT_4, ctx_prev, ctx_this)) {
3790         av1_invalid_rd_stats(&sum_rdc);
3791         break;
3792       }
3793 
3794       ctx_prev = ctx_this;
3795     }
3796 
3797     av1_rd_cost_update(x->rdmult, &sum_rdc);
3798     if (sum_rdc.rdcost < best_rdc.rdcost) {
3799       best_rdc = sum_rdc;
3800       found_best_partition = true;
3801       pc_tree->partitioning = PARTITION_VERT_4;
3802     }
3803 #if CONFIG_COLLECT_PARTITION_STATS
3804     if (partition_timer_on) {
3805       aom_usec_timer_mark(&partition_timer);
3806       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3807       partition_times[PARTITION_VERT_4] += time;
3808       partition_timer_on = 0;
3809     }
3810 #endif
3811     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3812   }
3813 
3814   if (bsize == cm->seq_params.sb_size && !found_best_partition) {
3815     // Did not find a valid partition, go back and search again, with less
3816     // constraint on which partition types to search.
3817     x->must_find_valid_partition = 1;
3818 #if CONFIG_COLLECT_PARTITION_STATS == 2
3819     part_stats->partition_redo += 1;
3820 #endif
3821     goto BEGIN_PARTITION_SEARCH;
3822   }
3823 
3824   *rd_cost = best_rdc;
3825 
3826 #if CONFIG_COLLECT_PARTITION_STATS
3827   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) {
3828     partition_decisions[pc_tree->partitioning] += 1;
3829   }
3830 #endif
3831 
3832 #if CONFIG_COLLECT_PARTITION_STATS == 1
3833   // If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each
3834   // prediction block
3835   FILE *f = fopen("data.csv", "a");
3836   fprintf(f, "%d,%d,%d,", bsize, cm->show_frame, frame_is_intra_only(cm));
3837   for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3838     fprintf(f, "%d,", partition_decisions[idx]);
3839   }
3840   for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3841     fprintf(f, "%d,", partition_attempts[idx]);
3842   }
3843   for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3844     fprintf(f, "%ld,", partition_times[idx]);
3845   }
3846   fprintf(f, "\n");
3847   fclose(f);
3848 #endif
3849 
3850 #if CONFIG_COLLECT_PARTITION_STATS == 2
3851   // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for
3852   // the whole clip. So we need to pass the information upstream to the encoder
3853   const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
3854   int *agg_attempts = part_stats->partition_attempts[bsize_idx];
3855   int *agg_decisions = part_stats->partition_decisions[bsize_idx];
3856   int64_t *agg_times = part_stats->partition_times[bsize_idx];
3857   for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3858     agg_attempts[idx] += partition_attempts[idx];
3859     agg_decisions[idx] += partition_decisions[idx];
3860     agg_times[idx] += partition_times[idx];
3861   }
3862 #endif
3863 
3864   if (found_best_partition && pc_tree->index != 3) {
3865     if (bsize == cm->seq_params.sb_size) {
3866       const int emit_output = multi_pass_mode != SB_DRY_PASS;
3867       const RUN_TYPE run_type = emit_output ? OUTPUT_ENABLED : DRY_RUN_NORMAL;
3868 
3869       x->cb_offset = 0;
3870       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, run_type, bsize,
3871                 pc_tree, NULL);
3872     } else {
3873       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
3874                 pc_tree, NULL);
3875     }
3876   }
3877 
3878   if (bsize == cm->seq_params.sb_size) {
3879     assert(best_rdc.rate < INT_MAX);
3880     assert(best_rdc.dist < INT64_MAX);
3881   } else {
3882     assert(tp_orig == *tp);
3883   }
3884 
3885   x->rdmult = orig_rdmult;
3886   return found_best_partition;
3887 }
3888 #endif  // !CONFIG_REALTIME_ONLY
3889 #undef NUM_SIMPLE_MOTION_FEATURES
3890 
3891 #if !CONFIG_REALTIME_ONLY
3892 
get_rdmult_delta(AV1_COMP * cpi,BLOCK_SIZE bsize,int analysis_type,int mi_row,int mi_col,int orig_rdmult)3893 static int get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int analysis_type,
3894                             int mi_row, int mi_col, int orig_rdmult) {
3895   AV1_COMMON *const cm = &cpi->common;
3896   assert(IMPLIES(cpi->gf_group.size > 0,
3897                  cpi->gf_group.index < cpi->gf_group.size));
3898   const int tpl_idx = cpi->gf_group.index;
3899   TplParams *const tpl_data = &cpi->tpl_data;
3900   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
3901   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
3902   const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
3903   int tpl_stride = tpl_frame->stride;
3904   int64_t intra_cost = 0;
3905   int64_t mc_dep_cost = 0;
3906   const int mi_wide = mi_size_wide[bsize];
3907   const int mi_high = mi_size_high[bsize];
3908 
3909   if (tpl_frame->is_valid == 0) return orig_rdmult;
3910 
3911   if (!is_frame_tpl_eligible(cpi)) return orig_rdmult;
3912 
3913   if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult;
3914 
3915   int64_t mc_count = 0, mc_saved = 0;
3916   int mi_count = 0;
3917   const int mi_col_sr =
3918       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
3919   const int mi_col_end_sr =
3920       coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
3921   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
3922   const int step = 1 << block_mis_log2;
3923   for (int row = mi_row; row < mi_row + mi_high; row += step) {
3924     for (int col = mi_col_sr; col < mi_col_end_sr; col += step) {
3925       if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue;
3926       TplDepStats *this_stats =
3927           &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
3928       int64_t mc_dep_delta =
3929           RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
3930                  this_stats->mc_dep_dist);
3931       intra_cost += this_stats->recrf_dist << RDDIV_BITS;
3932       mc_dep_cost += (this_stats->recrf_dist << RDDIV_BITS) + mc_dep_delta;
3933       mc_count += this_stats->mc_count;
3934       mc_saved += this_stats->mc_saved;
3935       mi_count++;
3936     }
3937   }
3938 
3939   aom_clear_system_state();
3940 
3941   double beta = 1.0;
3942   if (analysis_type == 0) {
3943     if (mc_dep_cost > 0 && intra_cost > 0) {
3944       const double r0 = cpi->rd.r0;
3945       const double rk = (double)intra_cost / mc_dep_cost;
3946       beta = (r0 / rk);
3947     }
3948   } else if (analysis_type == 1) {
3949     const double mc_count_base = (mi_count * cpi->rd.mc_count_base);
3950     beta = (mc_count + 1.0) / (mc_count_base + 1.0);
3951     beta = pow(beta, 0.5);
3952   } else if (analysis_type == 2) {
3953     const double mc_saved_base = (mi_count * cpi->rd.mc_saved_base);
3954     beta = (mc_saved + 1.0) / (mc_saved_base + 1.0);
3955     beta = pow(beta, 0.5);
3956   }
3957 
3958   int rdmult = av1_get_adaptive_rdmult(cpi, beta);
3959 
3960   aom_clear_system_state();
3961 
3962   rdmult = AOMMIN(rdmult, orig_rdmult * 3 / 2);
3963   rdmult = AOMMAX(rdmult, orig_rdmult * 1 / 2);
3964 
3965   rdmult = AOMMAX(1, rdmult);
3966 
3967   return rdmult;
3968 }
3969 
get_tpl_stats_b(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * intra_cost_b,int64_t * inter_cost_b,int_mv mv_b[][INTER_REFS_PER_FRAME],int * stride)3970 static int get_tpl_stats_b(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
3971                            int mi_col, int64_t *intra_cost_b,
3972                            int64_t *inter_cost_b,
3973                            int_mv mv_b[][INTER_REFS_PER_FRAME], int *stride) {
3974   if (!cpi->oxcf.enable_tpl_model) return 0;
3975   if (cpi->superres_mode != SUPERRES_NONE) return 0;
3976   if (cpi->common.current_frame.frame_type == KEY_FRAME) return 0;
3977   const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
3978   if (update_type == INTNL_OVERLAY_UPDATE || update_type == OVERLAY_UPDATE)
3979     return 0;
3980   assert(IMPLIES(cpi->gf_group.size > 0,
3981                  cpi->gf_group.index < cpi->gf_group.size));
3982 
3983   AV1_COMMON *const cm = &cpi->common;
3984   const int gf_group_index = cpi->gf_group.index;
3985   TplParams *const tpl_data = &cpi->tpl_data;
3986   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_group_index];
3987   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
3988   int tpl_stride = tpl_frame->stride;
3989   const int mi_wide = mi_size_wide[bsize];
3990   const int mi_high = mi_size_high[bsize];
3991 
3992   if (tpl_frame->is_valid == 0) return 0;
3993   if (gf_group_index >= MAX_LAG_BUFFERS) return 0;
3994 
3995   int mi_count = 0;
3996   int count = 0;
3997   const int mi_col_sr =
3998       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
3999   const int mi_col_end_sr =
4000       coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
4001   // mi_cols_sr is mi_cols at superres case.
4002   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
4003 
4004   // TPL store unit size is not the same as the motion estimation unit size.
4005   // Here always use motion estimation size to avoid getting repetitive inter/
4006   // intra cost.
4007   const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
4008   const int step = mi_size_wide[tpl_bsize];
4009   assert(mi_size_wide[tpl_bsize] == mi_size_high[tpl_bsize]);
4010 
4011   // Stride is only based on SB size, and we fill in values for every 16x16
4012   // block in a SB.
4013   *stride = (mi_col_end_sr - mi_col_sr) / step;
4014 
4015   for (int row = mi_row; row < mi_row + mi_high; row += step) {
4016     for (int col = mi_col_sr; col < mi_col_end_sr; col += step) {
4017       // Handle partial SB, so that no invalid values are used later.
4018       if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) {
4019         inter_cost_b[count] = INT64_MAX;
4020         intra_cost_b[count] = INT64_MAX;
4021         for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
4022           mv_b[count][i].as_int = INVALID_MV;
4023         }
4024         count++;
4025         continue;
4026       }
4027 
4028       TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
4029           row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
4030       inter_cost_b[count] = this_stats->inter_cost;
4031       intra_cost_b[count] = this_stats->intra_cost;
4032       memcpy(mv_b[count], this_stats->mv, sizeof(this_stats->mv));
4033       mi_count++;
4034       count++;
4035     }
4036   }
4037 
4038   return mi_count;
4039 }
4040 
4041 // analysis_type 0: Use mc_dep_cost and intra_cost
4042 // analysis_type 1: Use count of best inter predictor chosen
4043 // analysis_type 2: Use cost reduction from intra to inter for best inter
4044 //                  predictor chosen
get_q_for_deltaq_objective(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)4045 static int get_q_for_deltaq_objective(AV1_COMP *const cpi, BLOCK_SIZE bsize,
4046                                       int mi_row, int mi_col) {
4047   AV1_COMMON *const cm = &cpi->common;
4048   assert(IMPLIES(cpi->gf_group.size > 0,
4049                  cpi->gf_group.index < cpi->gf_group.size));
4050   const int tpl_idx = cpi->gf_group.index;
4051   TplParams *const tpl_data = &cpi->tpl_data;
4052   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
4053   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
4054   const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
4055   int tpl_stride = tpl_frame->stride;
4056   int64_t intra_cost = 0;
4057   int64_t mc_dep_cost = 0;
4058   const int mi_wide = mi_size_wide[bsize];
4059   const int mi_high = mi_size_high[bsize];
4060   const int base_qindex = cm->quant_params.base_qindex;
4061 
4062   if (tpl_frame->is_valid == 0) return base_qindex;
4063 
4064   if (!is_frame_tpl_eligible(cpi)) return base_qindex;
4065 
4066   if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return base_qindex;
4067 
4068   int64_t mc_count = 0, mc_saved = 0;
4069   int mi_count = 0;
4070   const int mi_col_sr =
4071       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
4072   const int mi_col_end_sr =
4073       coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
4074   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
4075   const int step = 1 << block_mis_log2;
4076   for (int row = mi_row; row < mi_row + mi_high; row += step) {
4077     for (int col = mi_col_sr; col < mi_col_end_sr; col += step) {
4078       if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue;
4079       TplDepStats *this_stats =
4080           &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
4081       int64_t mc_dep_delta =
4082           RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
4083                  this_stats->mc_dep_dist);
4084       intra_cost += this_stats->recrf_dist << RDDIV_BITS;
4085       mc_dep_cost += (this_stats->recrf_dist << RDDIV_BITS) + mc_dep_delta;
4086       mc_count += this_stats->mc_count;
4087       mc_saved += this_stats->mc_saved;
4088       mi_count++;
4089     }
4090   }
4091 
4092   aom_clear_system_state();
4093 
4094   int offset = 0;
4095   double beta = 1.0;
4096   if (mc_dep_cost > 0 && intra_cost > 0) {
4097     const double r0 = cpi->rd.r0;
4098     const double rk = (double)intra_cost / mc_dep_cost;
4099     beta = (r0 / rk);
4100     assert(beta > 0.0);
4101   }
4102   offset = av1_get_deltaq_offset(cpi, base_qindex, beta);
4103   aom_clear_system_state();
4104 
4105   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
4106   offset = AOMMIN(offset, delta_q_info->delta_q_res * 9 - 1);
4107   offset = AOMMAX(offset, -delta_q_info->delta_q_res * 9 + 1);
4108   int qindex = cm->quant_params.base_qindex + offset;
4109   qindex = AOMMIN(qindex, MAXQ);
4110   qindex = AOMMAX(qindex, MINQ);
4111 
4112   return qindex;
4113 }
4114 
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)4115 static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
4116                                      MACROBLOCK *const x,
4117                                      const TileInfo *const tile_info,
4118                                      int mi_row, int mi_col, int num_planes) {
4119   AV1_COMMON *const cm = &cpi->common;
4120   const CommonModeInfoParams *const mi_params = &cm->mi_params;
4121   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
4122   assert(delta_q_info->delta_q_present_flag);
4123 
4124   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4125   // Delta-q modulation based on variance
4126   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
4127 
4128   int current_qindex = cm->quant_params.base_qindex;
4129   if (cpi->oxcf.deltaq_mode == DELTA_Q_PERCEPTUAL) {
4130     if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
4131       const int block_wavelet_energy_level =
4132           av1_block_wavelet_energy_level(cpi, x, sb_size);
4133       x->sb_energy_level = block_wavelet_energy_level;
4134       current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
4135           cpi, block_wavelet_energy_level);
4136     } else {
4137       const int block_var_level = av1_log_block_var(cpi, x, sb_size);
4138       x->sb_energy_level = block_var_level;
4139       current_qindex =
4140           av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
4141     }
4142   } else if (cpi->oxcf.deltaq_mode == DELTA_Q_OBJECTIVE &&
4143              cpi->oxcf.enable_tpl_model) {
4144     // Setup deltaq based on tpl stats
4145     current_qindex = get_q_for_deltaq_objective(cpi, sb_size, mi_row, mi_col);
4146   }
4147 
4148   const int delta_q_res = delta_q_info->delta_q_res;
4149   // Right now aq only works with tpl model. So if tpl is disabled, we set the
4150   // current_qindex to base_qindex.
4151   if (cpi->oxcf.enable_tpl_model && cpi->oxcf.deltaq_mode != NO_DELTA_Q) {
4152     current_qindex =
4153         clamp(current_qindex, delta_q_res, 256 - delta_q_info->delta_q_res);
4154   } else {
4155     current_qindex = cm->quant_params.base_qindex;
4156   }
4157 
4158   MACROBLOCKD *const xd = &x->e_mbd;
4159   const int sign_deltaq_index =
4160       current_qindex - xd->current_qindex >= 0 ? 1 : -1;
4161   const int deltaq_deadzone = delta_q_res / 4;
4162   const int qmask = ~(delta_q_res - 1);
4163   int abs_deltaq_index = abs(current_qindex - xd->current_qindex);
4164   abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
4165   current_qindex = xd->current_qindex + sign_deltaq_index * abs_deltaq_index;
4166   current_qindex = AOMMAX(current_qindex, MINQ + 1);
4167   assert(current_qindex > 0);
4168 
4169   xd->delta_qindex = current_qindex - cm->quant_params.base_qindex;
4170   set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4171   xd->mi[0]->current_qindex = current_qindex;
4172   av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
4173 
4174   // keep track of any non-zero delta-q used
4175   td->deltaq_used |= (xd->delta_qindex != 0);
4176 
4177   if (cpi->oxcf.deltalf_mode) {
4178     const int delta_lf_res = delta_q_info->delta_lf_res;
4179     const int lfmask = ~(delta_lf_res - 1);
4180     const int delta_lf_from_base =
4181         ((xd->delta_qindex / 2 + delta_lf_res / 2) & lfmask);
4182     const int8_t delta_lf =
4183         (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
4184     const int frame_lf_count =
4185         av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
4186     const int mib_size = cm->seq_params.mib_size;
4187 
4188     // pre-set the delta lf for loop filter. Note that this value is set
4189     // before mi is assigned for each block in current superblock
4190     for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
4191       for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
4192         const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
4193         mi_params->mi_grid_base[grid_idx]->delta_lf_from_base = delta_lf;
4194         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
4195           mi_params->mi_grid_base[grid_idx]->delta_lf[lf_id] = delta_lf;
4196         }
4197       }
4198     }
4199   }
4200 }
4201 #endif  // !CONFIG_REALTIME_ONLY
4202 
4203 #define AVG_CDF_WEIGHT_LEFT 3
4204 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
4205 
avg_cdf_symbol(aom_cdf_prob * cdf_ptr_left,aom_cdf_prob * cdf_ptr_tr,int num_cdfs,int cdf_stride,int nsymbs,int wt_left,int wt_tr)4206 static AOM_INLINE void avg_cdf_symbol(aom_cdf_prob *cdf_ptr_left,
4207                                       aom_cdf_prob *cdf_ptr_tr, int num_cdfs,
4208                                       int cdf_stride, int nsymbs, int wt_left,
4209                                       int wt_tr) {
4210   for (int i = 0; i < num_cdfs; i++) {
4211     for (int j = 0; j <= nsymbs; j++) {
4212       cdf_ptr_left[i * cdf_stride + j] =
4213           (aom_cdf_prob)(((int)cdf_ptr_left[i * cdf_stride + j] * wt_left +
4214                           (int)cdf_ptr_tr[i * cdf_stride + j] * wt_tr +
4215                           ((wt_left + wt_tr) / 2)) /
4216                          (wt_left + wt_tr));
4217       assert(cdf_ptr_left[i * cdf_stride + j] >= 0 &&
4218              cdf_ptr_left[i * cdf_stride + j] < CDF_PROB_TOP);
4219     }
4220   }
4221 }
4222 
4223 #define AVERAGE_CDF(cname_left, cname_tr, nsymbs) \
4224   AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, CDF_SIZE(nsymbs))
4225 
4226 #define AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, cdf_stride)           \
4227   do {                                                                     \
4228     aom_cdf_prob *cdf_ptr_left = (aom_cdf_prob *)cname_left;               \
4229     aom_cdf_prob *cdf_ptr_tr = (aom_cdf_prob *)cname_tr;                   \
4230     int array_size = (int)sizeof(cname_left) / sizeof(aom_cdf_prob);       \
4231     int num_cdfs = array_size / cdf_stride;                                \
4232     avg_cdf_symbol(cdf_ptr_left, cdf_ptr_tr, num_cdfs, cdf_stride, nsymbs, \
4233                    wt_left, wt_tr);                                        \
4234   } while (0)
4235 
avg_nmv(nmv_context * nmv_left,nmv_context * nmv_tr,int wt_left,int wt_tr)4236 static AOM_INLINE void avg_nmv(nmv_context *nmv_left, nmv_context *nmv_tr,
4237                                int wt_left, int wt_tr) {
4238   AVERAGE_CDF(nmv_left->joints_cdf, nmv_tr->joints_cdf, 4);
4239   for (int i = 0; i < 2; i++) {
4240     AVERAGE_CDF(nmv_left->comps[i].classes_cdf, nmv_tr->comps[i].classes_cdf,
4241                 MV_CLASSES);
4242     AVERAGE_CDF(nmv_left->comps[i].class0_fp_cdf,
4243                 nmv_tr->comps[i].class0_fp_cdf, MV_FP_SIZE);
4244     AVERAGE_CDF(nmv_left->comps[i].fp_cdf, nmv_tr->comps[i].fp_cdf, MV_FP_SIZE);
4245     AVERAGE_CDF(nmv_left->comps[i].sign_cdf, nmv_tr->comps[i].sign_cdf, 2);
4246     AVERAGE_CDF(nmv_left->comps[i].class0_hp_cdf,
4247                 nmv_tr->comps[i].class0_hp_cdf, 2);
4248     AVERAGE_CDF(nmv_left->comps[i].hp_cdf, nmv_tr->comps[i].hp_cdf, 2);
4249     AVERAGE_CDF(nmv_left->comps[i].class0_cdf, nmv_tr->comps[i].class0_cdf,
4250                 CLASS0_SIZE);
4251     AVERAGE_CDF(nmv_left->comps[i].bits_cdf, nmv_tr->comps[i].bits_cdf, 2);
4252   }
4253 }
4254 
4255 // In case of row-based multi-threading of encoder, since we always
4256 // keep a top - right sync, we can average the top - right SB's CDFs and
4257 // the left SB's CDFs and use the same for current SB's encoding to
4258 // improve the performance. This function facilitates the averaging
4259 // of CDF and used only when row-mt is enabled in encoder.
avg_cdf_symbols(FRAME_CONTEXT * ctx_left,FRAME_CONTEXT * ctx_tr,int wt_left,int wt_tr)4260 static AOM_INLINE void avg_cdf_symbols(FRAME_CONTEXT *ctx_left,
4261                                        FRAME_CONTEXT *ctx_tr, int wt_left,
4262                                        int wt_tr) {
4263   AVERAGE_CDF(ctx_left->txb_skip_cdf, ctx_tr->txb_skip_cdf, 2);
4264   AVERAGE_CDF(ctx_left->eob_extra_cdf, ctx_tr->eob_extra_cdf, 2);
4265   AVERAGE_CDF(ctx_left->dc_sign_cdf, ctx_tr->dc_sign_cdf, 2);
4266   AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, 5);
4267   AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, 6);
4268   AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, 7);
4269   AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, 8);
4270   AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, 9);
4271   AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, 10);
4272   AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, 11);
4273   AVERAGE_CDF(ctx_left->coeff_base_eob_cdf, ctx_tr->coeff_base_eob_cdf, 3);
4274   AVERAGE_CDF(ctx_left->coeff_base_cdf, ctx_tr->coeff_base_cdf, 4);
4275   AVERAGE_CDF(ctx_left->coeff_br_cdf, ctx_tr->coeff_br_cdf, BR_CDF_SIZE);
4276   AVERAGE_CDF(ctx_left->newmv_cdf, ctx_tr->newmv_cdf, 2);
4277   AVERAGE_CDF(ctx_left->zeromv_cdf, ctx_tr->zeromv_cdf, 2);
4278   AVERAGE_CDF(ctx_left->refmv_cdf, ctx_tr->refmv_cdf, 2);
4279   AVERAGE_CDF(ctx_left->drl_cdf, ctx_tr->drl_cdf, 2);
4280   AVERAGE_CDF(ctx_left->inter_compound_mode_cdf,
4281               ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_MODES);
4282   AVERAGE_CDF(ctx_left->compound_type_cdf, ctx_tr->compound_type_cdf,
4283               MASKED_COMPOUND_TYPES);
4284   AVERAGE_CDF(ctx_left->wedge_idx_cdf, ctx_tr->wedge_idx_cdf, 16);
4285   AVERAGE_CDF(ctx_left->interintra_cdf, ctx_tr->interintra_cdf, 2);
4286   AVERAGE_CDF(ctx_left->wedge_interintra_cdf, ctx_tr->wedge_interintra_cdf, 2);
4287   AVERAGE_CDF(ctx_left->interintra_mode_cdf, ctx_tr->interintra_mode_cdf,
4288               INTERINTRA_MODES);
4289   AVERAGE_CDF(ctx_left->motion_mode_cdf, ctx_tr->motion_mode_cdf, MOTION_MODES);
4290   AVERAGE_CDF(ctx_left->obmc_cdf, ctx_tr->obmc_cdf, 2);
4291   AVERAGE_CDF(ctx_left->palette_y_size_cdf, ctx_tr->palette_y_size_cdf,
4292               PALETTE_SIZES);
4293   AVERAGE_CDF(ctx_left->palette_uv_size_cdf, ctx_tr->palette_uv_size_cdf,
4294               PALETTE_SIZES);
4295   for (int j = 0; j < PALETTE_SIZES; j++) {
4296     int nsymbs = j + PALETTE_MIN_SIZE;
4297     AVG_CDF_STRIDE(ctx_left->palette_y_color_index_cdf[j],
4298                    ctx_tr->palette_y_color_index_cdf[j], nsymbs,
4299                    CDF_SIZE(PALETTE_COLORS));
4300     AVG_CDF_STRIDE(ctx_left->palette_uv_color_index_cdf[j],
4301                    ctx_tr->palette_uv_color_index_cdf[j], nsymbs,
4302                    CDF_SIZE(PALETTE_COLORS));
4303   }
4304   AVERAGE_CDF(ctx_left->palette_y_mode_cdf, ctx_tr->palette_y_mode_cdf, 2);
4305   AVERAGE_CDF(ctx_left->palette_uv_mode_cdf, ctx_tr->palette_uv_mode_cdf, 2);
4306   AVERAGE_CDF(ctx_left->comp_inter_cdf, ctx_tr->comp_inter_cdf, 2);
4307   AVERAGE_CDF(ctx_left->single_ref_cdf, ctx_tr->single_ref_cdf, 2);
4308   AVERAGE_CDF(ctx_left->comp_ref_type_cdf, ctx_tr->comp_ref_type_cdf, 2);
4309   AVERAGE_CDF(ctx_left->uni_comp_ref_cdf, ctx_tr->uni_comp_ref_cdf, 2);
4310   AVERAGE_CDF(ctx_left->comp_ref_cdf, ctx_tr->comp_ref_cdf, 2);
4311   AVERAGE_CDF(ctx_left->comp_bwdref_cdf, ctx_tr->comp_bwdref_cdf, 2);
4312   AVERAGE_CDF(ctx_left->txfm_partition_cdf, ctx_tr->txfm_partition_cdf, 2);
4313   AVERAGE_CDF(ctx_left->compound_index_cdf, ctx_tr->compound_index_cdf, 2);
4314   AVERAGE_CDF(ctx_left->comp_group_idx_cdf, ctx_tr->comp_group_idx_cdf, 2);
4315   AVERAGE_CDF(ctx_left->skip_mode_cdfs, ctx_tr->skip_mode_cdfs, 2);
4316   AVERAGE_CDF(ctx_left->skip_cdfs, ctx_tr->skip_cdfs, 2);
4317   AVERAGE_CDF(ctx_left->intra_inter_cdf, ctx_tr->intra_inter_cdf, 2);
4318   avg_nmv(&ctx_left->nmvc, &ctx_tr->nmvc, wt_left, wt_tr);
4319   avg_nmv(&ctx_left->ndvc, &ctx_tr->ndvc, wt_left, wt_tr);
4320   AVERAGE_CDF(ctx_left->intrabc_cdf, ctx_tr->intrabc_cdf, 2);
4321   AVERAGE_CDF(ctx_left->seg.tree_cdf, ctx_tr->seg.tree_cdf, MAX_SEGMENTS);
4322   AVERAGE_CDF(ctx_left->seg.pred_cdf, ctx_tr->seg.pred_cdf, 2);
4323   AVERAGE_CDF(ctx_left->seg.spatial_pred_seg_cdf,
4324               ctx_tr->seg.spatial_pred_seg_cdf, MAX_SEGMENTS);
4325   AVERAGE_CDF(ctx_left->filter_intra_cdfs, ctx_tr->filter_intra_cdfs, 2);
4326   AVERAGE_CDF(ctx_left->filter_intra_mode_cdf, ctx_tr->filter_intra_mode_cdf,
4327               FILTER_INTRA_MODES);
4328   AVERAGE_CDF(ctx_left->switchable_restore_cdf, ctx_tr->switchable_restore_cdf,
4329               RESTORE_SWITCHABLE_TYPES);
4330   AVERAGE_CDF(ctx_left->wiener_restore_cdf, ctx_tr->wiener_restore_cdf, 2);
4331   AVERAGE_CDF(ctx_left->sgrproj_restore_cdf, ctx_tr->sgrproj_restore_cdf, 2);
4332   AVERAGE_CDF(ctx_left->y_mode_cdf, ctx_tr->y_mode_cdf, INTRA_MODES);
4333   AVG_CDF_STRIDE(ctx_left->uv_mode_cdf[0], ctx_tr->uv_mode_cdf[0],
4334                  UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES));
4335   AVERAGE_CDF(ctx_left->uv_mode_cdf[1], ctx_tr->uv_mode_cdf[1], UV_INTRA_MODES);
4336   for (int i = 0; i < PARTITION_CONTEXTS; i++) {
4337     if (i < 4) {
4338       AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 4,
4339                      CDF_SIZE(10));
4340     } else if (i < 16) {
4341       AVERAGE_CDF(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 10);
4342     } else {
4343       AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 8,
4344                      CDF_SIZE(10));
4345     }
4346   }
4347   AVERAGE_CDF(ctx_left->switchable_interp_cdf, ctx_tr->switchable_interp_cdf,
4348               SWITCHABLE_FILTERS);
4349   AVERAGE_CDF(ctx_left->kf_y_cdf, ctx_tr->kf_y_cdf, INTRA_MODES);
4350   AVERAGE_CDF(ctx_left->angle_delta_cdf, ctx_tr->angle_delta_cdf,
4351               2 * MAX_ANGLE_DELTA + 1);
4352   AVG_CDF_STRIDE(ctx_left->tx_size_cdf[0], ctx_tr->tx_size_cdf[0], MAX_TX_DEPTH,
4353                  CDF_SIZE(MAX_TX_DEPTH + 1));
4354   AVERAGE_CDF(ctx_left->tx_size_cdf[1], ctx_tr->tx_size_cdf[1],
4355               MAX_TX_DEPTH + 1);
4356   AVERAGE_CDF(ctx_left->tx_size_cdf[2], ctx_tr->tx_size_cdf[2],
4357               MAX_TX_DEPTH + 1);
4358   AVERAGE_CDF(ctx_left->tx_size_cdf[3], ctx_tr->tx_size_cdf[3],
4359               MAX_TX_DEPTH + 1);
4360   AVERAGE_CDF(ctx_left->delta_q_cdf, ctx_tr->delta_q_cdf, DELTA_Q_PROBS + 1);
4361   AVERAGE_CDF(ctx_left->delta_lf_cdf, ctx_tr->delta_lf_cdf, DELTA_LF_PROBS + 1);
4362   for (int i = 0; i < FRAME_LF_COUNT; i++) {
4363     AVERAGE_CDF(ctx_left->delta_lf_multi_cdf[i], ctx_tr->delta_lf_multi_cdf[i],
4364                 DELTA_LF_PROBS + 1);
4365   }
4366   AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[1], ctx_tr->intra_ext_tx_cdf[1], 7,
4367                  CDF_SIZE(TX_TYPES));
4368   AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[2], ctx_tr->intra_ext_tx_cdf[2], 5,
4369                  CDF_SIZE(TX_TYPES));
4370   AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[1], ctx_tr->inter_ext_tx_cdf[1], 16,
4371                  CDF_SIZE(TX_TYPES));
4372   AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[2], ctx_tr->inter_ext_tx_cdf[2], 12,
4373                  CDF_SIZE(TX_TYPES));
4374   AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[3], ctx_tr->inter_ext_tx_cdf[3], 2,
4375                  CDF_SIZE(TX_TYPES));
4376   AVERAGE_CDF(ctx_left->cfl_sign_cdf, ctx_tr->cfl_sign_cdf, CFL_JOINT_SIGNS);
4377   AVERAGE_CDF(ctx_left->cfl_alpha_cdf, ctx_tr->cfl_alpha_cdf,
4378               CFL_ALPHABET_SIZE);
4379 }
4380 
4381 #if !CONFIG_REALTIME_ONLY
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)4382 static AOM_INLINE void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
4383                                                int mi_row, int mi_col) {
4384   const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size;
4385   const int orig_rdmult = cpi->rd.RDMULT;
4386 
4387   assert(IMPLIES(cpi->gf_group.size > 0,
4388                  cpi->gf_group.index < cpi->gf_group.size));
4389   const int gf_group_index = cpi->gf_group.index;
4390   if (cpi->oxcf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ &&
4391       cpi->oxcf.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
4392       cpi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
4393     const int dr =
4394         get_rdmult_delta(cpi, sb_size, 0, mi_row, mi_col, orig_rdmult);
4395     x->rdmult = dr;
4396   }
4397 }
4398 #endif
4399 
source_content_sb(AV1_COMP * cpi,MACROBLOCK * x,int shift)4400 static void source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int shift) {
4401   unsigned int tmp_sse;
4402   unsigned int tmp_variance;
4403   const BLOCK_SIZE bsize = BLOCK_64X64;
4404   uint8_t *src_y = cpi->source->y_buffer;
4405   int src_ystride = cpi->source->y_stride;
4406   uint8_t *last_src_y = cpi->last_source->y_buffer;
4407   int last_src_ystride = cpi->last_source->y_stride;
4408   uint64_t avg_source_sse_threshold = 100000;        // ~5*5*(64*64)
4409   uint64_t avg_source_sse_threshold_high = 1000000;  // ~15*15*(64*64)
4410   uint64_t sum_sq_thresh = 10000;  // sum = sqrt(thresh / 64*64)) ~1.5
4411 #if CONFIG_AV1_HIGHBITDEPTH
4412   MACROBLOCKD *xd = &x->e_mbd;
4413   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) return;
4414 #endif
4415   src_y += shift;
4416   last_src_y += shift;
4417   tmp_variance = cpi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
4418                                        last_src_ystride, &tmp_sse);
4419   // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
4420   // Detect large lighting change.
4421   if (tmp_variance < (tmp_sse >> 1) && (tmp_sse - tmp_variance) > sum_sq_thresh)
4422     x->content_state_sb = kLowVarHighSumdiff;
4423   else if (tmp_sse < avg_source_sse_threshold)
4424     x->content_state_sb = kLowSad;
4425   else if (tmp_sse > avg_source_sse_threshold_high)
4426     x->content_state_sb = kHighSad;
4427 }
4428 
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,PC_TREE * const pc_root,TOKENEXTRA ** tp,const int mi_row,const int mi_col,const int seg_skip)4429 static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
4430                                        TileDataEnc *tile_data,
4431                                        PC_TREE *const pc_root, TOKENEXTRA **tp,
4432                                        const int mi_row, const int mi_col,
4433                                        const int seg_skip) {
4434   AV1_COMMON *const cm = &cpi->common;
4435   MACROBLOCK *const x = &td->mb;
4436   const SPEED_FEATURES *const sf = &cpi->sf;
4437   const TileInfo *const tile_info = &tile_data->tile_info;
4438   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
4439                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
4440   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4441   if (sf->rt_sf.source_metrics_sb_nonrd && sb_size == BLOCK_64X64 &&
4442       cpi->svc.number_spatial_layers <= 1 &&
4443       cm->current_frame.frame_type != KEY_FRAME) {
4444     int shift = cpi->source->y_stride * (mi_row << 2) + (mi_col << 2);
4445     source_content_sb(cpi, x, shift);
4446   }
4447   if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
4448     set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4449     const BLOCK_SIZE bsize =
4450         seg_skip ? sb_size : sf->part_sf.always_this_block_size;
4451     set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4452   } else if (cpi->partition_search_skippable_frame) {
4453     set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4454     const BLOCK_SIZE bsize =
4455         get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
4456     set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4457   } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
4458     set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, sb_size);
4459     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
4460   }
4461   assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
4462          cpi->partition_search_skippable_frame ||
4463          sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
4464   td->mb.cb_offset = 0;
4465   nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4466                       pc_root);
4467 }
4468 
4469 // Memset the mbmis at the current superblock to 0
reset_mbmi(CommonModeInfoParams * const mi_params,BLOCK_SIZE sb_size,int mi_row,int mi_col)4470 static INLINE void reset_mbmi(CommonModeInfoParams *const mi_params,
4471                               BLOCK_SIZE sb_size, int mi_row, int mi_col) {
4472   // size of sb in unit of mi (BLOCK_4X4)
4473   const int sb_size_mi = mi_size_wide[sb_size];
4474   const int mi_alloc_size_1d = mi_size_wide[mi_params->mi_alloc_bsize];
4475   // size of sb in unit of allocated mi size
4476   const int sb_size_alloc_mi = mi_size_wide[sb_size] / mi_alloc_size_1d;
4477   assert(mi_params->mi_alloc_stride % sb_size_alloc_mi == 0 &&
4478          "mi is not allocated as a multiple of sb!");
4479   assert(mi_params->mi_stride % sb_size_mi == 0 &&
4480          "mi_grid_base is not allocated as a multiple of sb!");
4481 
4482   const int mi_rows = mi_size_high[sb_size];
4483   for (int cur_mi_row = 0; cur_mi_row < mi_rows; cur_mi_row++) {
4484     assert(get_mi_grid_idx(mi_params, 0, mi_col + mi_alloc_size_1d) <
4485            mi_params->mi_stride);
4486     const int mi_grid_idx =
4487         get_mi_grid_idx(mi_params, mi_row + cur_mi_row, mi_col);
4488     const int alloc_mi_idx =
4489         get_alloc_mi_idx(mi_params, mi_row + cur_mi_row, mi_col);
4490     memset(&mi_params->mi_grid_base[mi_grid_idx], 0,
4491            sb_size_mi * sizeof(*mi_params->mi_grid_base));
4492     memset(&mi_params->tx_type_map[mi_grid_idx], 0,
4493            sb_size_mi * sizeof(*mi_params->tx_type_map));
4494     if (cur_mi_row % mi_alloc_size_1d == 0) {
4495       memset(&mi_params->mi_alloc[alloc_mi_idx], 0,
4496              sb_size_alloc_mi * sizeof(*mi_params->mi_alloc));
4497     }
4498   }
4499 }
4500 
backup_sb_state(SB_FIRST_PASS_STATS * sb_fp_stats,const AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,int mi_row,int mi_col)4501 static INLINE void backup_sb_state(SB_FIRST_PASS_STATS *sb_fp_stats,
4502                                    const AV1_COMP *cpi, ThreadData *td,
4503                                    const TileDataEnc *tile_data, int mi_row,
4504                                    int mi_col) {
4505   MACROBLOCK *x = &td->mb;
4506   MACROBLOCKD *xd = &x->e_mbd;
4507   const TileInfo *tile_info = &tile_data->tile_info;
4508 
4509   const AV1_COMMON *cm = &cpi->common;
4510   const int num_planes = av1_num_planes(cm);
4511   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4512 
4513   xd->above_txfm_context =
4514       cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
4515   xd->left_txfm_context =
4516       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
4517   save_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, num_planes);
4518 
4519   sb_fp_stats->rd_count = cpi->td.rd_counts;
4520   sb_fp_stats->split_count = cpi->td.mb.txb_split_count;
4521 
4522   sb_fp_stats->fc = *td->counts;
4523 
4524   memcpy(sb_fp_stats->inter_mode_rd_models, tile_data->inter_mode_rd_models,
4525          sizeof(sb_fp_stats->inter_mode_rd_models));
4526 
4527   memcpy(sb_fp_stats->thresh_freq_fact, x->thresh_freq_fact,
4528          sizeof(sb_fp_stats->thresh_freq_fact));
4529 
4530   const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
4531   sb_fp_stats->current_qindex =
4532       cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
4533 
4534 #if CONFIG_INTERNAL_STATS
4535   memcpy(sb_fp_stats->mode_chosen_counts, cpi->mode_chosen_counts,
4536          sizeof(sb_fp_stats->mode_chosen_counts));
4537 #endif  // CONFIG_INTERNAL_STATS
4538 }
4539 
restore_sb_state(const SB_FIRST_PASS_STATS * sb_fp_stats,AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,int mi_col)4540 static INLINE void restore_sb_state(const SB_FIRST_PASS_STATS *sb_fp_stats,
4541                                     AV1_COMP *cpi, ThreadData *td,
4542                                     TileDataEnc *tile_data, int mi_row,
4543                                     int mi_col) {
4544   MACROBLOCK *x = &td->mb;
4545 
4546   const AV1_COMMON *cm = &cpi->common;
4547   const int num_planes = av1_num_planes(cm);
4548   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4549 
4550   restore_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, num_planes);
4551 
4552   cpi->td.rd_counts = sb_fp_stats->rd_count;
4553   cpi->td.mb.txb_split_count = sb_fp_stats->split_count;
4554 
4555   *td->counts = sb_fp_stats->fc;
4556 
4557   memcpy(tile_data->inter_mode_rd_models, sb_fp_stats->inter_mode_rd_models,
4558          sizeof(sb_fp_stats->inter_mode_rd_models));
4559   memcpy(x->thresh_freq_fact, sb_fp_stats->thresh_freq_fact,
4560          sizeof(sb_fp_stats->thresh_freq_fact));
4561 
4562   const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
4563   cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
4564       sb_fp_stats->current_qindex;
4565 
4566 #if CONFIG_INTERNAL_STATS
4567   memcpy(cpi->mode_chosen_counts, sb_fp_stats->mode_chosen_counts,
4568          sizeof(sb_fp_stats->mode_chosen_counts));
4569 #endif  // CONFIG_INTERNAL_STATS
4570 }
4571 
4572 #if !CONFIG_REALTIME_ONLY
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)4573 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
4574                                  int mi_col) {
4575   const AV1_COMMON *cm = &cpi->common;
4576   const CommonModeInfoParams *const mi_params = &cm->mi_params;
4577   MACROBLOCK *x = &td->mb;
4578   const int frame_idx = cpi->gf_group.index;
4579   TplParams *const tpl_data = &cpi->tpl_data;
4580   TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
4581   const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
4582 
4583   av1_zero(x->search_ref_frame);
4584 
4585   if (tpl_frame->is_valid == 0) return;
4586   if (!is_frame_tpl_eligible(cpi)) return;
4587   if (frame_idx >= MAX_LAG_BUFFERS) return;
4588   if (cpi->superres_mode != SUPERRES_NONE) return;
4589   if (cpi->oxcf.aq_mode != NO_AQ) return;
4590 
4591   const int is_overlay = cpi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
4592   if (is_overlay) {
4593     memset(x->search_ref_frame, 1, sizeof(x->search_ref_frame));
4594     return;
4595   }
4596 
4597   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
4598   const int tpl_stride = tpl_frame->stride;
4599   int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
4600   const int step = 1 << block_mis_log2;
4601   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4602   const int mi_row_end =
4603       AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
4604   const int mi_col_end =
4605       AOMMIN(mi_size_wide[sb_size] + mi_col, mi_params->mi_cols);
4606 
4607   for (int row = mi_row; row < mi_row_end; row += step) {
4608     for (int col = mi_col; col < mi_col_end; col += step) {
4609       const TplDepStats *this_stats =
4610           &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
4611       int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
4612       // Find the winner ref frame idx for the current block
4613       int64_t best_inter_cost = this_stats->pred_error[0];
4614       int best_rf_idx = 0;
4615       for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
4616         if ((this_stats->pred_error[idx] < best_inter_cost) &&
4617             (this_stats->pred_error[idx] != 0)) {
4618           best_inter_cost = this_stats->pred_error[idx];
4619           best_rf_idx = idx;
4620         }
4621       }
4622       // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
4623       // LAST_FRAME.
4624       tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
4625                                     this_stats->pred_error[LAST_FRAME - 1];
4626 
4627       for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
4628         inter_cost[rf_idx] += tpl_pred_error[rf_idx];
4629     }
4630   }
4631 
4632   int rank_index[INTER_REFS_PER_FRAME - 1];
4633   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
4634     rank_index[idx] = idx + 1;
4635     for (int i = idx; i > 0; --i) {
4636       if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
4637         const int tmp = rank_index[i - 1];
4638         rank_index[i - 1] = rank_index[i];
4639         rank_index[i] = tmp;
4640       }
4641     }
4642   }
4643 
4644   x->search_ref_frame[INTRA_FRAME] = 1;
4645   x->search_ref_frame[LAST_FRAME] = 1;
4646 
4647   int cutoff_ref = 0;
4648   for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
4649     x->search_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
4650     if (idx > 2) {
4651       if (!cutoff_ref) {
4652         // If the predictive coding gains are smaller than the previous more
4653         // relevant frame over certain amount, discard this frame and all the
4654         // frames afterwards.
4655         if (llabs(inter_cost[rank_index[idx]]) <
4656                 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
4657             inter_cost[rank_index[idx]] == 0)
4658           cutoff_ref = 1;
4659       }
4660 
4661       if (cutoff_ref) x->search_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
4662     }
4663   }
4664 }
4665 #endif  // !CONFIG_REALTIME_ONLY
4666 
4667 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,PC_TREE * pc_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)4668 static INLINE void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
4669                                      const TileDataEnc *tile_data,
4670                                      PC_TREE *pc_root, RD_STATS *rd_cost,
4671                                      int mi_row, int mi_col,
4672                                      int gather_tpl_data) {
4673   const AV1_COMMON *cm = &cpi->common;
4674   const TileInfo *tile_info = &tile_data->tile_info;
4675   MACROBLOCK *x = &td->mb;
4676 
4677   const SPEED_FEATURES *sf = &cpi->sf;
4678   const int use_simple_motion_search =
4679       (sf->part_sf.simple_motion_search_split ||
4680        sf->part_sf.simple_motion_search_prune_rect ||
4681        sf->part_sf.simple_motion_search_early_term_none ||
4682        sf->part_sf.ml_early_term_after_part_split_level) &&
4683       !frame_is_intra_only(cm);
4684   if (use_simple_motion_search) {
4685     init_simple_motion_search_mvs(pc_root);
4686   }
4687 
4688 #if !CONFIG_REALTIME_ONLY
4689   init_ref_frame_space(cpi, td, mi_row, mi_col);
4690   x->sb_energy_level = 0;
4691   x->cnn_output_valid = 0;
4692   if (gather_tpl_data) {
4693     if (cm->delta_q_info.delta_q_present_flag) {
4694       const int num_planes = av1_num_planes(cm);
4695       const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4696       setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
4697       av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
4698     }
4699     if (cpi->oxcf.enable_tpl_model) {
4700       adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
4701     }
4702   }
4703 #else
4704   (void)tile_info;
4705   (void)mi_row;
4706   (void)mi_col;
4707   (void)gather_tpl_data;
4708 #endif
4709 
4710   // Reset hash state for transform/mode rd hash information
4711   reset_hash_records(x, cpi->sf.tx_sf.use_inter_txb_hash);
4712   av1_zero(x->picked_ref_frames_mask);
4713   av1_zero(x->pred_mv);
4714   av1_invalid_rd_stats(rd_cost);
4715 }
4716 
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,PC_TREE * const pc_root,TOKENEXTRA ** tp,const int mi_row,const int mi_col,const int seg_skip)4717 static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
4718                                     TileDataEnc *tile_data,
4719                                     PC_TREE *const pc_root, TOKENEXTRA **tp,
4720                                     const int mi_row, const int mi_col,
4721                                     const int seg_skip) {
4722   AV1_COMMON *const cm = &cpi->common;
4723   MACROBLOCK *const x = &td->mb;
4724   const SPEED_FEATURES *const sf = &cpi->sf;
4725   const TileInfo *const tile_info = &tile_data->tile_info;
4726   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
4727                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
4728   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4729   int dummy_rate;
4730   int64_t dummy_dist;
4731   RD_STATS dummy_rdc;
4732 
4733 #if CONFIG_REALTIME_ONLY
4734   (void)seg_skip;
4735 #endif  // CONFIG_REALTIME_ONLY
4736 
4737   init_encode_rd_sb(cpi, td, tile_data, pc_root, &dummy_rdc, mi_row, mi_col, 1);
4738 
4739   if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
4740     set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, sb_size);
4741     av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
4742     rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4743                      &dummy_rate, &dummy_dist, 1, pc_root);
4744   }
4745 #if !CONFIG_REALTIME_ONLY
4746   else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
4747     set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4748     const BLOCK_SIZE bsize =
4749         seg_skip ? sb_size : sf->part_sf.always_this_block_size;
4750     set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4751     rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4752                      &dummy_rate, &dummy_dist, 1, pc_root);
4753   } else if (cpi->partition_search_skippable_frame) {
4754     set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4755     const BLOCK_SIZE bsize =
4756         get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
4757     set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4758     rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4759                      &dummy_rate, &dummy_dist, 1, pc_root);
4760   } else {
4761     // No stats for overlay frames. Exclude key frame.
4762     x->valid_cost_b =
4763         get_tpl_stats_b(cpi, sb_size, mi_row, mi_col, x->intra_cost_b,
4764                         x->inter_cost_b, x->mv_b, &x->cost_stride);
4765 
4766     reset_partition(pc_root, sb_size);
4767 
4768 #if CONFIG_COLLECT_COMPONENT_TIMING
4769     start_timing(cpi, rd_pick_partition_time);
4770 #endif
4771     BLOCK_SIZE max_sq_size = x->max_partition_size;
4772     BLOCK_SIZE min_sq_size = x->min_partition_size;
4773 
4774     if (use_auto_max_partition(cpi, sb_size, mi_row, mi_col)) {
4775       float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f };
4776 
4777       av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features);
4778       max_sq_size = AOMMAX(
4779           AOMMIN(av1_predict_max_partition(cpi, x, features), max_sq_size),
4780           min_sq_size);
4781     }
4782 
4783     const int num_passes = cpi->oxcf.sb_multipass_unit_test ? 2 : 1;
4784 
4785     if (num_passes == 1) {
4786       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4787                         max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
4788                         pc_root, NULL, SB_SINGLE_PASS, NULL);
4789     } else {
4790       // First pass
4791       SB_FIRST_PASS_STATS sb_fp_stats;
4792       backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
4793       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4794                         max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
4795                         pc_root, NULL, SB_DRY_PASS, NULL);
4796 
4797       // Second pass
4798       init_encode_rd_sb(cpi, td, tile_data, pc_root, &dummy_rdc, mi_row, mi_col,
4799                         0);
4800       reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
4801       reset_partition(pc_root, sb_size);
4802 
4803       restore_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
4804 
4805       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4806                         max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
4807                         pc_root, NULL, SB_WET_PASS, NULL);
4808     }
4809     // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
4810     x->valid_cost_b = 0;
4811 #if CONFIG_COLLECT_COMPONENT_TIMING
4812     end_timing(cpi, rd_pick_partition_time);
4813 #endif
4814   }
4815 #endif  // !CONFIG_REALTIME_ONLY
4816 
4817   // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
4818   if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
4819       cm->tiles.cols == 1 && cm->tiles.rows == 1) {
4820     av1_inter_mode_data_fit(tile_data, x->rdmult);
4821   }
4822 }
4823 
set_cost_upd_freq(AV1_COMP * cpi,ThreadData * td,const TileInfo * const tile_info,const int mi_row,const int mi_col)4824 static AOM_INLINE void set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td,
4825                                          const TileInfo *const tile_info,
4826                                          const int mi_row, const int mi_col) {
4827   AV1_COMMON *const cm = &cpi->common;
4828   const int num_planes = av1_num_planes(cm);
4829   MACROBLOCK *const x = &td->mb;
4830   MACROBLOCKD *const xd = &x->e_mbd;
4831 
4832   switch (cpi->oxcf.coeff_cost_upd_freq) {
4833     case COST_UPD_TILE:  // Tile level
4834       if (mi_row != tile_info->mi_row_start) break;
4835       AOM_FALLTHROUGH_INTENDED;
4836     case COST_UPD_SBROW:  // SB row level in tile
4837       if (mi_col != tile_info->mi_col_start) break;
4838       AOM_FALLTHROUGH_INTENDED;
4839     case COST_UPD_SB:  // SB level
4840       if (cpi->sf.inter_sf.disable_sb_level_coeff_cost_upd &&
4841           mi_col != tile_info->mi_col_start)
4842         break;
4843       av1_fill_coeff_costs(&td->mb, xd->tile_ctx, num_planes);
4844       break;
4845     default: assert(0);
4846   }
4847 
4848   switch (cpi->oxcf.mode_cost_upd_freq) {
4849     case COST_UPD_TILE:  // Tile level
4850       if (mi_row != tile_info->mi_row_start) break;
4851       AOM_FALLTHROUGH_INTENDED;
4852     case COST_UPD_SBROW:  // SB row level in tile
4853       if (mi_col != tile_info->mi_col_start) break;
4854       AOM_FALLTHROUGH_INTENDED;
4855     case COST_UPD_SB:  // SB level
4856       av1_fill_mode_rates(cm, x, xd->tile_ctx);
4857       break;
4858     default: assert(0);
4859   }
4860   switch (cpi->oxcf.mv_cost_upd_freq) {
4861     case COST_UPD_OFF: break;
4862     case COST_UPD_TILE:  // Tile level
4863       if (mi_row != tile_info->mi_row_start) break;
4864       AOM_FALLTHROUGH_INTENDED;
4865     case COST_UPD_SBROW:  // SB row level in tile
4866       if (mi_col != tile_info->mi_col_start) break;
4867       AOM_FALLTHROUGH_INTENDED;
4868     case COST_UPD_SB:  // SB level
4869       if (cpi->sf.inter_sf.disable_sb_level_mv_cost_upd &&
4870           mi_col != tile_info->mi_col_start)
4871         break;
4872       av1_fill_mv_costs(xd->tile_ctx, cm->features.cur_frame_force_integer_mv,
4873                         cm->features.allow_high_precision_mv, x);
4874       break;
4875     default: assert(0);
4876   }
4877 }
4878 
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp)4879 static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
4880                                      TileDataEnc *tile_data, int mi_row,
4881                                      TOKENEXTRA **tp) {
4882   AV1_COMMON *const cm = &cpi->common;
4883   const TileInfo *const tile_info = &tile_data->tile_info;
4884   MACROBLOCK *const x = &td->mb;
4885   MACROBLOCKD *const xd = &x->e_mbd;
4886   const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_data->tile_info);
4887   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4888   const int mib_size = cm->seq_params.mib_size;
4889   const int mib_size_log2 = cm->seq_params.mib_size_log2;
4890   const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
4891   const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
4892 
4893 #if CONFIG_COLLECT_COMPONENT_TIMING
4894   start_timing(cpi, encode_sb_time);
4895 #endif
4896 
4897   // Initialize the left context for the new SB row
4898   av1_zero_left_context(xd);
4899 
4900   // Reset delta for every tile
4901   if (mi_row == tile_info->mi_row_start || cpi->row_mt) {
4902     if (cm->delta_q_info.delta_q_present_flag)
4903       xd->current_qindex = cm->quant_params.base_qindex;
4904     if (cm->delta_q_info.delta_lf_present_flag) {
4905       av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
4906     }
4907   }
4908   reset_thresh_freq_fact(x);
4909 
4910   // Code each SB in the row
4911   for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
4912        mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
4913     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
4914                                    sb_col_in_tile);
4915     if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
4916         (tile_info->mi_row_start != mi_row)) {
4917       if ((tile_info->mi_col_start == mi_col)) {
4918         // restore frame context of 1st column sb
4919         memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
4920       } else {
4921         int wt_left = AVG_CDF_WEIGHT_LEFT;
4922         int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
4923         if (tile_info->mi_col_end > (mi_col + mib_size))
4924           avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile, wt_left,
4925                           wt_tr);
4926         else
4927           avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
4928                           wt_left, wt_tr);
4929       }
4930     }
4931 
4932     set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
4933 
4934     x->color_sensitivity[0] = 0;
4935     x->color_sensitivity[1] = 0;
4936     x->content_state_sb = 0;
4937 
4938     PC_TREE *const pc_root = td->pc_root;
4939     pc_root->index = 0;
4940 
4941     xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
4942     td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
4943     x->source_variance = UINT_MAX;
4944     x->simple_motion_pred_sse = UINT_MAX;
4945 
4946     const struct segmentation *const seg = &cm->seg;
4947     int seg_skip = 0;
4948     if (seg->enabled) {
4949       const uint8_t *const map =
4950           seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
4951       const int segment_id =
4952           map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
4953               : 0;
4954       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
4955     }
4956 
4957     if (use_nonrd_mode) {
4958       encode_nonrd_sb(cpi, td, tile_data, pc_root, tp, mi_row, mi_col,
4959                       seg_skip);
4960     } else {
4961       encode_rd_sb(cpi, td, tile_data, pc_root, tp, mi_row, mi_col, seg_skip);
4962     }
4963 
4964     if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
4965         (tile_info->mi_row_end > (mi_row + mib_size))) {
4966       if (sb_cols_in_tile == 1)
4967         memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
4968       else if (sb_col_in_tile >= 1)
4969         memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
4970                sizeof(*xd->tile_ctx));
4971     }
4972     (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
4973                                     sb_col_in_tile, sb_cols_in_tile);
4974   }
4975 #if CONFIG_COLLECT_COMPONENT_TIMING
4976   end_timing(cpi, encode_sb_time);
4977 #endif
4978 }
4979 
init_encode_frame_mb_context(AV1_COMP * cpi)4980 static AOM_INLINE void init_encode_frame_mb_context(AV1_COMP *cpi) {
4981   AV1_COMMON *const cm = &cpi->common;
4982   const int num_planes = av1_num_planes(cm);
4983   MACROBLOCK *const x = &cpi->td.mb;
4984   MACROBLOCKD *const xd = &x->e_mbd;
4985 
4986   // Copy data over into macro block data structures.
4987   av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
4988                        cm->seq_params.sb_size);
4989 
4990   av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
4991                          cm->seq_params.subsampling_y, num_planes);
4992 }
4993 
av1_alloc_tile_data(AV1_COMP * cpi)4994 void av1_alloc_tile_data(AV1_COMP *cpi) {
4995   AV1_COMMON *const cm = &cpi->common;
4996   const int tile_cols = cm->tiles.cols;
4997   const int tile_rows = cm->tiles.rows;
4998 
4999   if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
5000   CHECK_MEM_ERROR(
5001       cm, cpi->tile_data,
5002       aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
5003 
5004   cpi->allocated_tiles = tile_cols * tile_rows;
5005 }
5006 
av1_init_tile_data(AV1_COMP * cpi)5007 void av1_init_tile_data(AV1_COMP *cpi) {
5008   AV1_COMMON *const cm = &cpi->common;
5009   const int num_planes = av1_num_planes(cm);
5010   const int tile_cols = cm->tiles.cols;
5011   const int tile_rows = cm->tiles.rows;
5012   int tile_col, tile_row;
5013   TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
5014   TOKENLIST *tplist = cpi->tplist[0][0];
5015   unsigned int tile_tok = 0;
5016   int tplist_count = 0;
5017 
5018   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
5019     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
5020       TileDataEnc *const tile_data =
5021           &cpi->tile_data[tile_row * tile_cols + tile_col];
5022       TileInfo *const tile_info = &tile_data->tile_info;
5023       av1_tile_init(tile_info, cm, tile_row, tile_col);
5024 
5025       cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
5026       pre_tok = cpi->tile_tok[tile_row][tile_col];
5027       tile_tok = allocated_tokens(
5028           *tile_info, cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
5029       cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
5030       tplist = cpi->tplist[tile_row][tile_col];
5031       tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
5032       tile_data->allow_update_cdf = !cm->tiles.large_scale;
5033       tile_data->allow_update_cdf =
5034           tile_data->allow_update_cdf && !cm->features.disable_cdf_update;
5035       tile_data->tctx = *cm->fc;
5036     }
5037   }
5038 }
5039 
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)5040 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
5041                        int tile_col, int mi_row) {
5042   AV1_COMMON *const cm = &cpi->common;
5043   const int num_planes = av1_num_planes(cm);
5044   const int tile_cols = cm->tiles.cols;
5045   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
5046   const TileInfo *const tile_info = &this_tile->tile_info;
5047   TOKENEXTRA *tok = NULL;
5048   const int sb_row_in_tile =
5049       (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2;
5050   const int tile_mb_cols =
5051       (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
5052   const int num_mb_rows_in_sb =
5053       ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
5054 
5055   get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
5056                 cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
5057   cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok;
5058 
5059   encode_sb_row(cpi, td, this_tile, mi_row, &tok);
5060 
5061   cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok;
5062   cpi->tplist[tile_row][tile_col][sb_row_in_tile].count =
5063       (unsigned int)(cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop -
5064                      cpi->tplist[tile_row][tile_col][sb_row_in_tile].start);
5065 
5066   assert(
5067       (unsigned int)(tok -
5068                      cpi->tplist[tile_row][tile_col][sb_row_in_tile].start) <=
5069       get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
5070                       cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes));
5071 
5072   (void)tile_mb_cols;
5073   (void)num_mb_rows_in_sb;
5074 }
5075 
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)5076 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
5077                      int tile_col) {
5078   AV1_COMMON *const cm = &cpi->common;
5079   TileDataEnc *const this_tile =
5080       &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
5081   const TileInfo *const tile_info = &this_tile->tile_info;
5082 
5083   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
5084 
5085   av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
5086                          tile_info->mi_col_end, tile_row);
5087   av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
5088                          &td->mb.e_mbd);
5089 
5090   if (cpi->oxcf.enable_cfl_intra) cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params);
5091 
5092   av1_crc32c_calculator_init(&td->mb.mb_rd_record.crc_calculator);
5093 
5094   for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
5095        mi_row += cm->seq_params.mib_size) {
5096     av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
5097   }
5098 }
5099 
encode_tiles(AV1_COMP * cpi)5100 static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
5101   AV1_COMMON *const cm = &cpi->common;
5102   const int tile_cols = cm->tiles.cols;
5103   const int tile_rows = cm->tiles.rows;
5104   int tile_col, tile_row;
5105 
5106   if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
5107     av1_alloc_tile_data(cpi);
5108 
5109   av1_init_tile_data(cpi);
5110 
5111   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
5112     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
5113       TileDataEnc *const this_tile =
5114           &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
5115       cpi->td.intrabc_used = 0;
5116       cpi->td.deltaq_used = 0;
5117       cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
5118       cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
5119       av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
5120       cpi->intrabc_used |= cpi->td.intrabc_used;
5121       cpi->deltaq_used |= cpi->td.deltaq_used;
5122     }
5123   }
5124 }
5125 
5126 #define GLOBAL_TRANS_TYPES_ENC 3  // highest motion model to search
gm_get_params_cost(const WarpedMotionParams * gm,const WarpedMotionParams * ref_gm,int allow_hp)5127 static int gm_get_params_cost(const WarpedMotionParams *gm,
5128                               const WarpedMotionParams *ref_gm, int allow_hp) {
5129   int params_cost = 0;
5130   int trans_bits, trans_prec_diff;
5131   switch (gm->wmtype) {
5132     case AFFINE:
5133     case ROTZOOM:
5134       params_cost += aom_count_signed_primitive_refsubexpfin(
5135           GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5136           (ref_gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS),
5137           (gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
5138       params_cost += aom_count_signed_primitive_refsubexpfin(
5139           GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5140           (ref_gm->wmmat[3] >> GM_ALPHA_PREC_DIFF),
5141           (gm->wmmat[3] >> GM_ALPHA_PREC_DIFF));
5142       if (gm->wmtype >= AFFINE) {
5143         params_cost += aom_count_signed_primitive_refsubexpfin(
5144             GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5145             (ref_gm->wmmat[4] >> GM_ALPHA_PREC_DIFF),
5146             (gm->wmmat[4] >> GM_ALPHA_PREC_DIFF));
5147         params_cost += aom_count_signed_primitive_refsubexpfin(
5148             GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5149             (ref_gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
5150                 (1 << GM_ALPHA_PREC_BITS),
5151             (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
5152       }
5153       AOM_FALLTHROUGH_INTENDED;
5154     case TRANSLATION:
5155       trans_bits = (gm->wmtype == TRANSLATION)
5156                        ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
5157                        : GM_ABS_TRANS_BITS;
5158       trans_prec_diff = (gm->wmtype == TRANSLATION)
5159                             ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
5160                             : GM_TRANS_PREC_DIFF;
5161       params_cost += aom_count_signed_primitive_refsubexpfin(
5162           (1 << trans_bits) + 1, SUBEXPFIN_K,
5163           (ref_gm->wmmat[0] >> trans_prec_diff),
5164           (gm->wmmat[0] >> trans_prec_diff));
5165       params_cost += aom_count_signed_primitive_refsubexpfin(
5166           (1 << trans_bits) + 1, SUBEXPFIN_K,
5167           (ref_gm->wmmat[1] >> trans_prec_diff),
5168           (gm->wmmat[1] >> trans_prec_diff));
5169       AOM_FALLTHROUGH_INTENDED;
5170     case IDENTITY: break;
5171     default: assert(0);
5172   }
5173   return (params_cost << AV1_PROB_COST_SHIFT);
5174 }
5175 
do_gm_search_logic(SPEED_FEATURES * const sf,int frame)5176 static int do_gm_search_logic(SPEED_FEATURES *const sf, int frame) {
5177   (void)frame;
5178   switch (sf->gm_sf.gm_search_type) {
5179     case GM_FULL_SEARCH: return 1;
5180     case GM_REDUCED_REF_SEARCH_SKIP_L2_L3:
5181       return !(frame == LAST2_FRAME || frame == LAST3_FRAME);
5182     case GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2:
5183       return !(frame == LAST2_FRAME || frame == LAST3_FRAME ||
5184                (frame == ALTREF2_FRAME));
5185     case GM_DISABLE_SEARCH: return 0;
5186     default: assert(0);
5187   }
5188   return 1;
5189 }
5190 
5191 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(AV1_COMP * cpi)5192 static AOM_INLINE void set_rel_frame_dist(AV1_COMP *cpi) {
5193   const AV1_COMMON *const cm = &cpi->common;
5194   const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
5195   MV_REFERENCE_FRAME ref_frame;
5196   int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
5197   cpi->nearest_past_ref = NONE_FRAME;
5198   cpi->nearest_future_ref = NONE_FRAME;
5199   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
5200     cpi->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
5201     if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
5202       int dist = av1_encoder_get_relative_dist(
5203           order_hint_info,
5204           cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
5205           cm->current_frame.display_order_hint);
5206       cpi->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
5207       // Get the nearest ref_frame in the past
5208       if (abs(dist) < min_past_dist && dist < 0) {
5209         cpi->nearest_past_ref = ref_frame;
5210         min_past_dist = abs(dist);
5211       }
5212       // Get the nearest ref_frame in the future
5213       if (dist < min_future_dist && dist > 0) {
5214         cpi->nearest_future_ref = ref_frame;
5215         min_future_dist = dist;
5216       }
5217     }
5218   }
5219 }
5220 
refs_are_one_sided(const AV1_COMMON * cm)5221 static INLINE int refs_are_one_sided(const AV1_COMMON *cm) {
5222   assert(!frame_is_intra_only(cm));
5223 
5224   int one_sided_refs = 1;
5225   for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
5226     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
5227     if (buf == NULL) continue;
5228 
5229     const int ref_display_order_hint = buf->display_order_hint;
5230     if (av1_encoder_get_relative_dist(
5231             &cm->seq_params.order_hint_info, ref_display_order_hint,
5232             (int)cm->current_frame.display_order_hint) > 0) {
5233       one_sided_refs = 0;  // bwd reference
5234       break;
5235     }
5236   }
5237   return one_sided_refs;
5238 }
5239 
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])5240 static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
5241                                              int ref_order_hint[2]) {
5242   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
5243   ref_order_hint[0] = ref_order_hint[1] = 0;
5244   if (!skip_mode_info->skip_mode_allowed) return;
5245 
5246   const RefCntBuffer *const buf_0 =
5247       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
5248   const RefCntBuffer *const buf_1 =
5249       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
5250   assert(buf_0 != NULL && buf_1 != NULL);
5251 
5252   ref_order_hint[0] = buf_0->order_hint;
5253   ref_order_hint[1] = buf_1->order_hint;
5254 }
5255 
check_skip_mode_enabled(AV1_COMP * const cpi)5256 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
5257   AV1_COMMON *const cm = &cpi->common;
5258 
5259   av1_setup_skip_mode_allowed(cm);
5260   if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
5261 
5262   // Turn off skip mode if the temporal distances of the reference pair to the
5263   // current frame are different by more than 1 frame.
5264   const int cur_offset = (int)cm->current_frame.order_hint;
5265   int ref_offset[2];
5266   get_skip_mode_ref_offsets(cm, ref_offset);
5267   const int cur_to_ref0 = get_relative_dist(&cm->seq_params.order_hint_info,
5268                                             cur_offset, ref_offset[0]);
5269   const int cur_to_ref1 = abs(get_relative_dist(&cm->seq_params.order_hint_info,
5270                                                 cur_offset, ref_offset[1]));
5271   if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
5272 
5273   // High Latency: Turn off skip mode if all refs are fwd.
5274   if (cpi->all_one_sided_refs && cpi->oxcf.lag_in_frames > 0) return 0;
5275 
5276   static const int flag_list[REF_FRAMES] = { 0,
5277                                              AOM_LAST_FLAG,
5278                                              AOM_LAST2_FLAG,
5279                                              AOM_LAST3_FLAG,
5280                                              AOM_GOLD_FLAG,
5281                                              AOM_BWD_FLAG,
5282                                              AOM_ALT2_FLAG,
5283                                              AOM_ALT_FLAG };
5284   const int ref_frame[2] = {
5285     cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
5286     cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
5287   };
5288   if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) ||
5289       !(cpi->ref_frame_flags & flag_list[ref_frame[1]]))
5290     return 0;
5291 
5292   return 1;
5293 }
5294 
5295 // Function to decide if we can skip the global motion parameter computation
5296 // for a particular ref frame
skip_gm_frame(AV1_COMMON * const cm,int ref_frame)5297 static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) {
5298   if ((ref_frame == LAST3_FRAME || ref_frame == LAST2_FRAME) &&
5299       cm->global_motion[GOLDEN_FRAME].wmtype != IDENTITY) {
5300     return get_relative_dist(
5301                &cm->seq_params.order_hint_info,
5302                cm->cur_frame->ref_order_hints[ref_frame - LAST_FRAME],
5303                cm->cur_frame->ref_order_hints[GOLDEN_FRAME - LAST_FRAME]) <= 0;
5304   }
5305   return 0;
5306 }
5307 
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)5308 static AOM_INLINE void set_default_interp_skip_flags(
5309     const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
5310   const int num_planes = av1_num_planes(cm);
5311   interp_search_flags->default_interp_skip_flags =
5312       (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
5313                         : INTERP_SKIP_LUMA_SKIP_CHROMA;
5314 }
5315 
5316 // TODO(Remya): Can include erroradv_prod_tr[] for threshold calculation
calc_erroradv_threshold(AV1_COMP * cpi,int64_t ref_frame_error)5317 static INLINE int64_t calc_erroradv_threshold(AV1_COMP *cpi,
5318                                               int64_t ref_frame_error) {
5319   if (!cpi->sf.gm_sf.disable_adaptive_warp_error_thresh)
5320     return (int64_t)(
5321         ref_frame_error * erroradv_tr[cpi->sf.gm_sf.gm_erroradv_type] + 0.5);
5322   else
5323     return INT64_MAX;
5324 }
5325 
compute_global_motion_for_ref_frame(AV1_COMP * cpi,YV12_BUFFER_CONFIG * ref_buf[REF_FRAMES],int frame,int * num_frm_corners,int * frm_corners,unsigned char * frm_buffer,MotionModel * params_by_motion,uint8_t * segment_map,const int segment_map_w,const int segment_map_h,const WarpedMotionParams * ref_params)5326 static void compute_global_motion_for_ref_frame(
5327     AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES], int frame,
5328     int *num_frm_corners, int *frm_corners, unsigned char *frm_buffer,
5329     MotionModel *params_by_motion, uint8_t *segment_map,
5330     const int segment_map_w, const int segment_map_h,
5331     const WarpedMotionParams *ref_params) {
5332   ThreadData *const td = &cpi->td;
5333   MACROBLOCK *const x = &td->mb;
5334   AV1_COMMON *const cm = &cpi->common;
5335   MACROBLOCKD *const xd = &x->e_mbd;
5336   int i;
5337   // clang-format off
5338   static const double kIdentityParams[MAX_PARAMDIM - 1] = {
5339      0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
5340   };
5341   // clang-format on
5342   WarpedMotionParams tmp_wm_params;
5343   const double *params_this_motion;
5344   int inliers_by_motion[RANSAC_NUM_MOTIONS];
5345   assert(ref_buf[frame] != NULL);
5346   if (*num_frm_corners < 0) {
5347     // compute interest points using FAST features
5348     *num_frm_corners = av1_fast_corner_detect(
5349         frm_buffer, cpi->source->y_width, cpi->source->y_height,
5350         cpi->source->y_stride, frm_corners, MAX_CORNERS);
5351   }
5352   TransformationType model;
5353 
5354   aom_clear_system_state();
5355 
5356   // TODO(sarahparker, debargha): Explore do_adaptive_gm_estimation = 1
5357   const int do_adaptive_gm_estimation = 0;
5358 
5359   const int ref_frame_dist = get_relative_dist(
5360       &cm->seq_params.order_hint_info, cm->current_frame.order_hint,
5361       cm->cur_frame->ref_order_hints[frame - LAST_FRAME]);
5362   const GlobalMotionEstimationType gm_estimation_type =
5363       cm->seq_params.order_hint_info.enable_order_hint &&
5364               abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation
5365           ? GLOBAL_MOTION_DISFLOW_BASED
5366           : GLOBAL_MOTION_FEATURE_BASED;
5367   for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
5368     int64_t best_warp_error = INT64_MAX;
5369     // Initially set all params to identity.
5370     for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
5371       memcpy(params_by_motion[i].params, kIdentityParams,
5372              (MAX_PARAMDIM - 1) * sizeof(*(params_by_motion[i].params)));
5373       params_by_motion[i].num_inliers = 0;
5374     }
5375 
5376     av1_compute_global_motion(
5377         model, frm_buffer, cpi->source->y_width, cpi->source->y_height,
5378         cpi->source->y_stride, frm_corners, *num_frm_corners, ref_buf[frame],
5379         cpi->common.seq_params.bit_depth, gm_estimation_type, inliers_by_motion,
5380         params_by_motion, RANSAC_NUM_MOTIONS);
5381     int64_t ref_frame_error = 0;
5382     for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
5383       if (inliers_by_motion[i] == 0) continue;
5384 
5385       params_this_motion = params_by_motion[i].params;
5386       av1_convert_model_to_params(params_this_motion, &tmp_wm_params);
5387 
5388       if (tmp_wm_params.wmtype != IDENTITY) {
5389         av1_compute_feature_segmentation_map(
5390             segment_map, segment_map_w, segment_map_h,
5391             params_by_motion[i].inliers, params_by_motion[i].num_inliers);
5392 
5393         ref_frame_error = av1_segmented_frame_error(
5394             is_cur_buf_hbd(xd), xd->bd, ref_buf[frame]->y_buffer,
5395             ref_buf[frame]->y_stride, cpi->source->y_buffer,
5396             cpi->source->y_width, cpi->source->y_height, cpi->source->y_stride,
5397             segment_map, segment_map_w);
5398 
5399         int64_t erroradv_threshold =
5400             calc_erroradv_threshold(cpi, ref_frame_error);
5401 
5402         const int64_t warp_error = av1_refine_integerized_param(
5403             &tmp_wm_params, tmp_wm_params.wmtype, is_cur_buf_hbd(xd), xd->bd,
5404             ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
5405             ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
5406             cpi->source->y_buffer, cpi->source->y_width, cpi->source->y_height,
5407             cpi->source->y_stride, GM_REFINEMENT_COUNT, best_warp_error,
5408             segment_map, segment_map_w, erroradv_threshold);
5409 
5410         if (warp_error < best_warp_error) {
5411           best_warp_error = warp_error;
5412           // Save the wm_params modified by
5413           // av1_refine_integerized_param() rather than motion index to
5414           // avoid rerunning refine() below.
5415           memcpy(&(cm->global_motion[frame]), &tmp_wm_params,
5416                  sizeof(WarpedMotionParams));
5417         }
5418       }
5419     }
5420     if (cm->global_motion[frame].wmtype <= AFFINE)
5421       if (!av1_get_shear_params(&cm->global_motion[frame]))
5422         cm->global_motion[frame] = default_warp_params;
5423 
5424     if (cm->global_motion[frame].wmtype == TRANSLATION) {
5425       cm->global_motion[frame].wmmat[0] =
5426           convert_to_trans_prec(cm->features.allow_high_precision_mv,
5427                                 cm->global_motion[frame].wmmat[0]) *
5428           GM_TRANS_ONLY_DECODE_FACTOR;
5429       cm->global_motion[frame].wmmat[1] =
5430           convert_to_trans_prec(cm->features.allow_high_precision_mv,
5431                                 cm->global_motion[frame].wmmat[1]) *
5432           GM_TRANS_ONLY_DECODE_FACTOR;
5433     }
5434 
5435     if (cm->global_motion[frame].wmtype == IDENTITY) continue;
5436 
5437     if (ref_frame_error == 0) continue;
5438 
5439     // If the best error advantage found doesn't meet the threshold for
5440     // this motion type, revert to IDENTITY.
5441     if (!av1_is_enough_erroradvantage(
5442             (double)best_warp_error / ref_frame_error,
5443             gm_get_params_cost(&cm->global_motion[frame], ref_params,
5444                                cm->features.allow_high_precision_mv),
5445             cpi->sf.gm_sf.gm_erroradv_type)) {
5446       cm->global_motion[frame] = default_warp_params;
5447     }
5448 
5449     if (cm->global_motion[frame].wmtype != IDENTITY) break;
5450   }
5451 
5452   aom_clear_system_state();
5453 }
5454 
5455 typedef struct {
5456   int distance;
5457   MV_REFERENCE_FRAME frame;
5458 } FrameDistPair;
5459 
update_valid_ref_frames_for_gm(AV1_COMP * cpi,YV12_BUFFER_CONFIG * ref_buf[REF_FRAMES],FrameDistPair * past_ref_frame,FrameDistPair * future_ref_frame,int * num_past_ref_frames,int * num_future_ref_frames)5460 static INLINE void update_valid_ref_frames_for_gm(
5461     AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES],
5462     FrameDistPair *past_ref_frame, FrameDistPair *future_ref_frame,
5463     int *num_past_ref_frames, int *num_future_ref_frames) {
5464   AV1_COMMON *const cm = &cpi->common;
5465   const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
5466   for (int frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) {
5467     const MV_REFERENCE_FRAME ref_frame[2] = { frame, NONE_FRAME };
5468     RefCntBuffer *buf = get_ref_frame_buf(cm, frame);
5469     const int ref_disabled =
5470         !(cpi->ref_frame_flags & av1_ref_frame_flag_list[frame]);
5471     ref_buf[frame] = NULL;
5472     cm->global_motion[frame] = default_warp_params;
5473     // Skip global motion estimation for invalid ref frames
5474     if (buf == NULL ||
5475         (ref_disabled && cpi->sf.hl_sf.recode_loop != DISALLOW_RECODE)) {
5476       cpi->gm_info.params_cost[frame] = 0;
5477       continue;
5478     } else {
5479       ref_buf[frame] = &buf->buf;
5480     }
5481 
5482     if (ref_buf[frame]->y_crop_width == cpi->source->y_crop_width &&
5483         ref_buf[frame]->y_crop_height == cpi->source->y_crop_height &&
5484         do_gm_search_logic(&cpi->sf, frame) &&
5485         !prune_ref_by_selective_ref_frame(
5486             cpi, NULL, ref_frame, cm->cur_frame->ref_display_order_hint) &&
5487         !(cpi->sf.gm_sf.selective_ref_gm && skip_gm_frame(cm, frame))) {
5488       assert(ref_buf[frame] != NULL);
5489       int relative_frame_dist = av1_encoder_get_relative_dist(
5490           order_hint_info, buf->display_order_hint,
5491           cm->cur_frame->display_order_hint);
5492       // Populate past and future ref frames
5493       if (relative_frame_dist <= 0) {
5494         past_ref_frame[*num_past_ref_frames].distance =
5495             abs(relative_frame_dist);
5496         past_ref_frame[*num_past_ref_frames].frame = frame;
5497         (*num_past_ref_frames)++;
5498       } else {
5499         future_ref_frame[*num_future_ref_frames].distance =
5500             abs(relative_frame_dist);
5501         future_ref_frame[*num_future_ref_frames].frame = frame;
5502         (*num_future_ref_frames)++;
5503       }
5504     }
5505   }
5506 }
5507 
compute_gm_for_valid_ref_frames(AV1_COMP * cpi,YV12_BUFFER_CONFIG * ref_buf[REF_FRAMES],int frame,int * num_frm_corners,int * frm_corners,unsigned char * frm_buffer,MotionModel * params_by_motion,uint8_t * segment_map,const int segment_map_w,const int segment_map_h)5508 static INLINE void compute_gm_for_valid_ref_frames(
5509     AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES], int frame,
5510     int *num_frm_corners, int *frm_corners, unsigned char *frm_buffer,
5511     MotionModel *params_by_motion, uint8_t *segment_map,
5512     const int segment_map_w, const int segment_map_h) {
5513   AV1_COMMON *const cm = &cpi->common;
5514   GlobalMotionInfo *const gm_info = &cpi->gm_info;
5515   const WarpedMotionParams *ref_params =
5516       cm->prev_frame ? &cm->prev_frame->global_motion[frame]
5517                      : &default_warp_params;
5518 
5519   compute_global_motion_for_ref_frame(
5520       cpi, ref_buf, frame, num_frm_corners, frm_corners, frm_buffer,
5521       params_by_motion, segment_map, segment_map_w, segment_map_h, ref_params);
5522 
5523   gm_info->params_cost[frame] =
5524       gm_get_params_cost(&cm->global_motion[frame], ref_params,
5525                          cm->features.allow_high_precision_mv) +
5526       gm_info->type_cost[cm->global_motion[frame].wmtype] -
5527       gm_info->type_cost[IDENTITY];
5528 }
5529 
compare_distance(const void * a,const void * b)5530 static int compare_distance(const void *a, const void *b) {
5531   const int diff =
5532       ((FrameDistPair *)a)->distance - ((FrameDistPair *)b)->distance;
5533   if (diff > 0)
5534     return 1;
5535   else if (diff < 0)
5536     return -1;
5537   return 0;
5538 }
5539 
compute_global_motion_for_references(AV1_COMP * cpi,YV12_BUFFER_CONFIG * ref_buf[REF_FRAMES],FrameDistPair reference_frame[REF_FRAMES-1],int num_ref_frames,int * num_frm_corners,int * frm_corners,unsigned char * frm_buffer,MotionModel * params_by_motion,uint8_t * segment_map,const int segment_map_w,const int segment_map_h)5540 static INLINE void compute_global_motion_for_references(
5541     AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES],
5542     FrameDistPair reference_frame[REF_FRAMES - 1], int num_ref_frames,
5543     int *num_frm_corners, int *frm_corners, unsigned char *frm_buffer,
5544     MotionModel *params_by_motion, uint8_t *segment_map,
5545     const int segment_map_w, const int segment_map_h) {
5546   AV1_COMMON *const cm = &cpi->common;
5547   // Compute global motion w.r.t. reference frames starting from the nearest ref
5548   // frame in a given direction
5549   for (int frame = 0; frame < num_ref_frames; frame++) {
5550     int ref_frame = reference_frame[frame].frame;
5551     compute_gm_for_valid_ref_frames(cpi, ref_buf, ref_frame, num_frm_corners,
5552                                     frm_corners, frm_buffer, params_by_motion,
5553                                     segment_map, segment_map_w, segment_map_h);
5554     // If global motion w.r.t. current ref frame is
5555     // INVALID/TRANSLATION/IDENTITY, skip the evaluation of global motion w.r.t
5556     // the remaining ref frames in that direction. The below exit is disabled
5557     // when ref frame distance w.r.t. current frame is zero. E.g.:
5558     // source_alt_ref_frame w.r.t. ARF frames
5559     if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search &&
5560         reference_frame[frame].distance != 0 &&
5561         cm->global_motion[ref_frame].wmtype != ROTZOOM)
5562       break;
5563   }
5564 }
5565 
setup_prune_ref_frame_mask(AV1_COMP * cpi)5566 static AOM_INLINE void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
5567   if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
5568       cpi->sf.inter_sf.selective_ref_frame >= 2) {
5569     AV1_COMMON *const cm = &cpi->common;
5570     const OrderHintInfo *const order_hint_info =
5571         &cm->seq_params.order_hint_info;
5572     const int cur_frame_display_order_hint =
5573         cm->current_frame.display_order_hint;
5574     unsigned int *ref_display_order_hint =
5575         cm->cur_frame->ref_display_order_hint;
5576     const int arf2_dist = av1_encoder_get_relative_dist(
5577         order_hint_info, ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
5578         cur_frame_display_order_hint);
5579     const int bwd_dist = av1_encoder_get_relative_dist(
5580         order_hint_info, ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
5581         cur_frame_display_order_hint);
5582 
5583     for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
5584       MV_REFERENCE_FRAME rf[2];
5585       av1_set_ref_frame(rf, ref_idx);
5586       if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
5587           !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
5588         continue;
5589       }
5590 
5591       if (!cpi->all_one_sided_refs) {
5592         int ref_dist[2];
5593         for (int i = 0; i < 2; ++i) {
5594           ref_dist[i] = av1_encoder_get_relative_dist(
5595               order_hint_info, ref_display_order_hint[rf[i] - LAST_FRAME],
5596               cur_frame_display_order_hint);
5597         }
5598 
5599         // One-sided compound is used only when all reference frames are
5600         // one-sided.
5601         if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
5602           cpi->prune_ref_frame_mask |= 1 << ref_idx;
5603         }
5604       }
5605 
5606       if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
5607           (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
5608           (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
5609         // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
5610         if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
5611           // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
5612           // reference to the current frame than ALTREF2_FRAME
5613           cpi->prune_ref_frame_mask |= 1 << ref_idx;
5614         }
5615       }
5616     }
5617   }
5618 }
5619 
5620 #define CHECK_PRECOMPUTED_REF_FRAME_MAP 0
5621 
encode_frame_internal(AV1_COMP * cpi)5622 static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
5623   ThreadData *const td = &cpi->td;
5624   MACROBLOCK *const x = &td->mb;
5625   AV1_COMMON *const cm = &cpi->common;
5626   CommonModeInfoParams *const mi_params = &cm->mi_params;
5627   FeatureFlags *const features = &cm->features;
5628   MACROBLOCKD *const xd = &x->e_mbd;
5629   RD_COUNTS *const rdc = &cpi->td.rd_counts;
5630   GlobalMotionInfo *const gm_info = &cpi->gm_info;
5631   FrameProbInfo *const frame_probs = &cpi->frame_probs;
5632   IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
5633   int i;
5634 
5635   if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
5636     mi_params->setup_mi(mi_params);
5637   }
5638 
5639   set_mi_offsets(mi_params, xd, 0, 0);
5640 
5641 #if CONFIG_AV1_HIGHBITDEPTH
5642   x->fwd_txfm4x4 = aom_fdct4x4;
5643 #else
5644   x->fwd_txfm4x4 = aom_fdct4x4_lp;
5645 #endif
5646 
5647   av1_zero(*td->counts);
5648   av1_zero(rdc->comp_pred_diff);
5649   av1_zero(rdc->tx_type_used);
5650   av1_zero(rdc->obmc_used);
5651   av1_zero(rdc->warped_used);
5652 
5653   // Reset the flag.
5654   cpi->intrabc_used = 0;
5655   // Need to disable intrabc when superres is selected
5656   if (av1_superres_scaled(cm)) {
5657     features->allow_intrabc = 0;
5658   }
5659 
5660   features->allow_intrabc &= (cpi->oxcf.enable_intrabc);
5661 
5662   if (features->allow_warped_motion &&
5663       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
5664     const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5665     if (frame_probs->warped_probs[update_type] <
5666         cpi->sf.inter_sf.prune_warped_prob_thresh)
5667       features->allow_warped_motion = 0;
5668   }
5669 
5670   int hash_table_created = 0;
5671   if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
5672       !cpi->sf.rt_sf.use_nonrd_pick_mode) {
5673     // TODO(any): move this outside of the recoding loop to avoid recalculating
5674     // the hash table.
5675     // add to hash table
5676     const int pic_width = cpi->source->y_crop_width;
5677     const int pic_height = cpi->source->y_crop_height;
5678     uint32_t *block_hash_values[2][2];
5679     int8_t *is_block_same[2][3];
5680     int k, j;
5681 
5682     for (k = 0; k < 2; k++) {
5683       for (j = 0; j < 2; j++) {
5684         CHECK_MEM_ERROR(cm, block_hash_values[k][j],
5685                         aom_malloc(sizeof(uint32_t) * pic_width * pic_height));
5686       }
5687 
5688       for (j = 0; j < 3; j++) {
5689         CHECK_MEM_ERROR(cm, is_block_same[k][j],
5690                         aom_malloc(sizeof(int8_t) * pic_width * pic_height));
5691       }
5692     }
5693 
5694     av1_hash_table_init(intrabc_hash_info);
5695     av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table);
5696     hash_table_created = 1;
5697     av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
5698                                       block_hash_values[0], is_block_same[0]);
5699     // Hash data generated for screen contents is used for intraBC ME
5700     const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
5701     const int max_sb_size =
5702         (1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2));
5703     int src_idx = 0;
5704     for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
5705       const int dst_idx = !src_idx;
5706       av1_generate_block_hash_value(
5707           intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
5708           block_hash_values[dst_idx], is_block_same[src_idx],
5709           is_block_same[dst_idx]);
5710       if (size >= min_alloc_size) {
5711         av1_add_to_hash_map_by_row_with_precal_data(
5712             &intrabc_hash_info->intrabc_hash_table, block_hash_values[dst_idx],
5713             is_block_same[dst_idx][2], pic_width, pic_height, size);
5714       }
5715     }
5716 
5717     for (k = 0; k < 2; k++) {
5718       for (j = 0; j < 2; j++) {
5719         aom_free(block_hash_values[k][j]);
5720       }
5721 
5722       for (j = 0; j < 3; j++) {
5723         aom_free(is_block_same[k][j]);
5724       }
5725     }
5726   }
5727 
5728   const CommonQuantParams *quant_params = &cm->quant_params;
5729   for (i = 0; i < MAX_SEGMENTS; ++i) {
5730     const int qindex =
5731         cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
5732                         : quant_params->base_qindex;
5733     xd->lossless[i] =
5734         qindex == 0 && quant_params->y_dc_delta_q == 0 &&
5735         quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
5736         quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
5737     if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
5738     xd->qindex[i] = qindex;
5739     if (xd->lossless[i]) {
5740       cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
5741     } else {
5742       cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
5743     }
5744   }
5745   features->coded_lossless = is_coded_lossless(cm, xd);
5746   features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
5747 
5748   // Fix delta q resolution for the moment
5749   cm->delta_q_info.delta_q_res = 0;
5750   if (cpi->oxcf.deltaq_mode == DELTA_Q_OBJECTIVE)
5751     cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
5752   else if (cpi->oxcf.deltaq_mode == DELTA_Q_PERCEPTUAL)
5753     cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
5754   // Set delta_q_present_flag before it is used for the first time
5755   cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
5756   cm->delta_q_info.delta_q_present_flag = cpi->oxcf.deltaq_mode != NO_DELTA_Q;
5757 
5758   // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q is used
5759   // for ineligible frames. That effectively will turn off row_mt usage.
5760   // Note objective delta_q and tpl eligible frames are only altref frames
5761   // currently.
5762   if (cm->delta_q_info.delta_q_present_flag) {
5763     if (cpi->oxcf.deltaq_mode == DELTA_Q_OBJECTIVE &&
5764         !is_frame_tpl_eligible(cpi))
5765       cm->delta_q_info.delta_q_present_flag = 0;
5766   }
5767 
5768   // Reset delta_q_used flag
5769   cpi->deltaq_used = 0;
5770 
5771   cm->delta_q_info.delta_lf_present_flag =
5772       cm->delta_q_info.delta_q_present_flag && cpi->oxcf.deltalf_mode;
5773   cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
5774 
5775   // update delta_q_present_flag and delta_lf_present_flag based on
5776   // base_qindex
5777   cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
5778   cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
5779 
5780   av1_frame_init_quantizer(cpi);
5781   av1_initialize_rd_consts(cpi);
5782   av1_initialize_me_consts(cpi, x, quant_params->base_qindex);
5783 
5784   init_encode_frame_mb_context(cpi);
5785   set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
5786   if (cm->prev_frame && cm->prev_frame->seg.enabled)
5787     cm->last_frame_seg_map = cm->prev_frame->seg_map;
5788   else
5789     cm->last_frame_seg_map = NULL;
5790   if (features->allow_intrabc || features->coded_lossless) {
5791     av1_set_default_ref_deltas(cm->lf.ref_deltas);
5792     av1_set_default_mode_deltas(cm->lf.mode_deltas);
5793   } else if (cm->prev_frame) {
5794     memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
5795     memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
5796   }
5797   memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
5798   memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
5799 
5800   cpi->all_one_sided_refs =
5801       frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
5802 
5803   cpi->prune_ref_frame_mask = 0;
5804   // Figure out which ref frames can be skipped at frame level.
5805   setup_prune_ref_frame_mask(cpi);
5806 
5807   x->txb_split_count = 0;
5808 #if CONFIG_SPEED_STATS
5809   x->tx_search_count = 0;
5810 #endif  // CONFIG_SPEED_STATS
5811 
5812 #if CONFIG_COLLECT_COMPONENT_TIMING
5813   start_timing(cpi, av1_compute_global_motion_time);
5814 #endif
5815   av1_zero(rdc->global_motion_used);
5816   av1_zero(gm_info->params_cost);
5817   if (cpi->common.current_frame.frame_type == INTER_FRAME && cpi->source &&
5818       cpi->oxcf.enable_global_motion && !gm_info->search_done) {
5819     YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES];
5820     MotionModel params_by_motion[RANSAC_NUM_MOTIONS];
5821     for (int m = 0; m < RANSAC_NUM_MOTIONS; m++) {
5822       memset(&params_by_motion[m], 0, sizeof(params_by_motion[m]));
5823       params_by_motion[m].inliers =
5824           aom_malloc(sizeof(*(params_by_motion[m].inliers)) * 2 * MAX_CORNERS);
5825     }
5826 
5827     int num_frm_corners = -1;
5828     int frm_corners[2 * MAX_CORNERS];
5829     unsigned char *frm_buffer = cpi->source->y_buffer;
5830     if (cpi->source->flags & YV12_FLAG_HIGHBITDEPTH) {
5831       // The frame buffer is 16-bit, so we need to convert to 8 bits for the
5832       // following code. We cache the result until the frame is released.
5833       frm_buffer =
5834           av1_downconvert_frame(cpi->source, cpi->common.seq_params.bit_depth);
5835     }
5836     const int segment_map_w =
5837         (cpi->source->y_width + WARP_ERROR_BLOCK) >> WARP_ERROR_BLOCK_LOG;
5838     const int segment_map_h =
5839         (cpi->source->y_height + WARP_ERROR_BLOCK) >> WARP_ERROR_BLOCK_LOG;
5840 
5841     uint8_t *segment_map =
5842         aom_malloc(sizeof(*segment_map) * segment_map_w * segment_map_h);
5843     memset(segment_map, 0,
5844            sizeof(*segment_map) * segment_map_w * segment_map_h);
5845 
5846     FrameDistPair future_ref_frame[REF_FRAMES - 1] = {
5847       { -1, NONE_FRAME }, { -1, NONE_FRAME }, { -1, NONE_FRAME },
5848       { -1, NONE_FRAME }, { -1, NONE_FRAME }, { -1, NONE_FRAME },
5849       { -1, NONE_FRAME }
5850     };
5851     FrameDistPair past_ref_frame[REF_FRAMES - 1] = {
5852       { -1, NONE_FRAME }, { -1, NONE_FRAME }, { -1, NONE_FRAME },
5853       { -1, NONE_FRAME }, { -1, NONE_FRAME }, { -1, NONE_FRAME },
5854       { -1, NONE_FRAME }
5855     };
5856     int num_past_ref_frames = 0;
5857     int num_future_ref_frames = 0;
5858     // Populate ref_buf for valid ref frames in global motion
5859     update_valid_ref_frames_for_gm(cpi, ref_buf, past_ref_frame,
5860                                    future_ref_frame, &num_past_ref_frames,
5861                                    &num_future_ref_frames);
5862 
5863     // Sort the ref frames in the ascending order of their distance from the
5864     // current frame
5865     qsort(past_ref_frame, num_past_ref_frames, sizeof(past_ref_frame[0]),
5866           compare_distance);
5867     qsort(future_ref_frame, num_future_ref_frames, sizeof(future_ref_frame[0]),
5868           compare_distance);
5869 
5870     // Compute global motion w.r.t. past reference frames
5871     if (num_past_ref_frames > 0)
5872       compute_global_motion_for_references(
5873           cpi, ref_buf, past_ref_frame, num_past_ref_frames, &num_frm_corners,
5874           frm_corners, frm_buffer, params_by_motion, segment_map, segment_map_w,
5875           segment_map_h);
5876 
5877     // Compute global motion w.r.t. future reference frames
5878     if (num_future_ref_frames > 0)
5879       compute_global_motion_for_references(
5880           cpi, ref_buf, future_ref_frame, num_future_ref_frames,
5881           &num_frm_corners, frm_corners, frm_buffer, params_by_motion,
5882           segment_map, segment_map_w, segment_map_h);
5883 
5884     aom_free(segment_map);
5885 
5886     gm_info->search_done = 1;
5887     for (int m = 0; m < RANSAC_NUM_MOTIONS; m++) {
5888       aom_free(params_by_motion[m].inliers);
5889     }
5890   }
5891   memcpy(cm->cur_frame->global_motion, cm->global_motion,
5892          REF_FRAMES * sizeof(WarpedMotionParams));
5893 #if CONFIG_COLLECT_COMPONENT_TIMING
5894   end_timing(cpi, av1_compute_global_motion_time);
5895 #endif
5896 
5897 #if CONFIG_COLLECT_COMPONENT_TIMING
5898   start_timing(cpi, av1_setup_motion_field_time);
5899 #endif
5900   if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
5901 #if CONFIG_COLLECT_COMPONENT_TIMING
5902   end_timing(cpi, av1_setup_motion_field_time);
5903 #endif
5904 
5905   cm->current_frame.skip_mode_info.skip_mode_flag =
5906       check_skip_mode_enabled(cpi);
5907 
5908   cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read_dummy;
5909   cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write_dummy;
5910   cpi->row_mt = 0;
5911 
5912   if (cpi->oxcf.row_mt && (cpi->oxcf.max_threads > 1)) {
5913     cpi->row_mt = 1;
5914     cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read;
5915     cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write;
5916     av1_encode_tiles_row_mt(cpi);
5917   } else {
5918     if (AOMMIN(cpi->oxcf.max_threads, cm->tiles.cols * cm->tiles.rows) > 1)
5919       av1_encode_tiles_mt(cpi);
5920     else
5921       encode_tiles(cpi);
5922   }
5923 
5924   // If intrabc is allowed but never selected, reset the allow_intrabc flag.
5925   if (features->allow_intrabc && !cpi->intrabc_used) {
5926     features->allow_intrabc = 0;
5927   }
5928   if (features->allow_intrabc) {
5929     cm->delta_q_info.delta_lf_present_flag = 0;
5930   }
5931 
5932   if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
5933     cm->delta_q_info.delta_q_present_flag = 0;
5934   }
5935 
5936   // Set the transform size appropriately before bitstream creation
5937   const MODE_EVAL_TYPE eval_type =
5938       cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
5939           ? WINNER_MODE_EVAL
5940           : DEFAULT_EVAL;
5941   const TX_SIZE_SEARCH_METHOD tx_search_type =
5942       cpi->winner_mode_params.tx_size_search_methods[eval_type];
5943   assert(cpi->oxcf.enable_tx64 || tx_search_type != USE_LARGESTALL);
5944   features->tx_mode = select_tx_mode(cm, tx_search_type);
5945 
5946   if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats) {
5947     const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5948 
5949     for (i = 0; i < TX_SIZES_ALL; i++) {
5950       int sum = 0;
5951       int j;
5952       int left = 1024;
5953 
5954       for (j = 0; j < TX_TYPES; j++)
5955         sum += cpi->td.rd_counts.tx_type_used[i][j];
5956 
5957       for (j = TX_TYPES - 1; j >= 0; j--) {
5958         const int new_prob =
5959             sum ? 1024 * cpi->td.rd_counts.tx_type_used[i][j] / sum
5960                 : (j ? 0 : 1024);
5961         int prob =
5962             (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
5963         left -= prob;
5964         if (j == 0) prob += left;
5965         frame_probs->tx_type_probs[update_type][i][j] = prob;
5966       }
5967     }
5968   }
5969 
5970   if (!cpi->sf.inter_sf.disable_obmc &&
5971       cpi->sf.inter_sf.prune_obmc_prob_thresh > 0) {
5972     const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5973 
5974     for (i = 0; i < BLOCK_SIZES_ALL; i++) {
5975       int sum = 0;
5976       for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
5977 
5978       const int new_prob =
5979           sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
5980       frame_probs->obmc_probs[update_type][i] =
5981           (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
5982     }
5983   }
5984 
5985   if (features->allow_warped_motion &&
5986       cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
5987     const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5988     int sum = 0;
5989     for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
5990     const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
5991     frame_probs->warped_probs[update_type] =
5992         (frame_probs->warped_probs[update_type] + new_prob) >> 1;
5993   }
5994 
5995   if (cm->current_frame.frame_type != KEY_FRAME &&
5996       cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
5997       features->interp_filter == SWITCHABLE) {
5998     const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5999 
6000     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
6001       int sum = 0;
6002       int j;
6003       int left = 1536;
6004 
6005       for (j = 0; j < SWITCHABLE_FILTERS; j++) {
6006         sum += cpi->td.counts->switchable_interp[i][j];
6007       }
6008 
6009       for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
6010         const int new_prob =
6011             sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
6012                 : (j ? 0 : 1536);
6013         int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
6014                     new_prob) >>
6015                    1;
6016         left -= prob;
6017         if (j == 0) prob += left;
6018         frame_probs->switchable_interp_probs[update_type][i][j] = prob;
6019       }
6020     }
6021   }
6022 
6023   if ((!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
6024        !cpi->sf.rt_sf.use_nonrd_pick_mode) ||
6025       hash_table_created) {
6026     av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
6027   }
6028 }
6029 
av1_encode_frame(AV1_COMP * cpi)6030 void av1_encode_frame(AV1_COMP *cpi) {
6031   AV1_COMMON *const cm = &cpi->common;
6032   CurrentFrame *const current_frame = &cm->current_frame;
6033   FeatureFlags *const features = &cm->features;
6034   const int num_planes = av1_num_planes(cm);
6035   // Indicates whether or not to use a default reduced set for ext-tx
6036   // rather than the potential full set of 16 transforms
6037   features->reduced_tx_set_used = cpi->oxcf.reduced_tx_type_set;
6038 
6039   // Make sure segment_id is no larger than last_active_segid.
6040   if (cm->seg.enabled && cm->seg.update_map) {
6041     const int mi_rows = cm->mi_params.mi_rows;
6042     const int mi_cols = cm->mi_params.mi_cols;
6043     const int last_active_segid = cm->seg.last_active_segid;
6044     uint8_t *map = cpi->enc_seg.map;
6045     for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
6046       for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
6047         map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
6048       }
6049       map += mi_cols;
6050     }
6051   }
6052 
6053   av1_setup_frame_buf_refs(cm);
6054   enforce_max_ref_frames(cpi, &cpi->ref_frame_flags);
6055   set_rel_frame_dist(cpi);
6056   av1_setup_frame_sign_bias(cm);
6057 
6058 #if CHECK_PRECOMPUTED_REF_FRAME_MAP
6059   GF_GROUP *gf_group = &cpi->gf_group;
6060   // TODO(yuec): The check is disabled on OVERLAY frames for now, because info
6061   // in cpi->gf_group has been refreshed for the next GOP when the check is
6062   // performed for OVERLAY frames. Since we have not support inter-GOP ref
6063   // frame map computation, the precomputed ref map for an OVERLAY frame is all
6064   // -1 at this point (although it is meaning before gf_group is refreshed).
6065   if (!frame_is_intra_only(cm) && gf_group->index != 0) {
6066     const RefCntBuffer *const golden_buf = get_ref_frame_buf(cm, GOLDEN_FRAME);
6067 
6068     if (golden_buf) {
6069       const int golden_order_hint = golden_buf->order_hint;
6070 
6071       for (int ref = LAST_FRAME; ref < EXTREF_FRAME; ++ref) {
6072         const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
6073         const int ref_disp_idx_precomputed =
6074             gf_group->ref_frame_disp_idx[gf_group->index][ref - LAST_FRAME];
6075 
6076         (void)ref_disp_idx_precomputed;
6077 
6078         if (buf != NULL) {
6079           const int ref_disp_idx =
6080               get_relative_dist(&cm->seq_params.order_hint_info,
6081                                 buf->order_hint, golden_order_hint);
6082 
6083           if (ref_disp_idx >= 0)
6084             assert(ref_disp_idx == ref_disp_idx_precomputed);
6085           else
6086             assert(ref_disp_idx_precomputed == -1);
6087         } else {
6088           assert(ref_disp_idx_precomputed == -1);
6089         }
6090       }
6091     }
6092   }
6093 #endif
6094 
6095 #if CONFIG_MISMATCH_DEBUG
6096   mismatch_reset_frame(num_planes);
6097 #else
6098   (void)num_planes;
6099 #endif
6100 
6101   if (cpi->sf.hl_sf.frame_parameter_update) {
6102     RD_COUNTS *const rdc = &cpi->td.rd_counts;
6103 
6104     if (frame_is_intra_only(cm))
6105       current_frame->reference_mode = SINGLE_REFERENCE;
6106     else
6107       current_frame->reference_mode = REFERENCE_MODE_SELECT;
6108 
6109     features->interp_filter = SWITCHABLE;
6110     if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
6111 
6112     features->switchable_motion_mode = 1;
6113 
6114     rdc->compound_ref_used_flag = 0;
6115     rdc->skip_mode_used_flag = 0;
6116 
6117     encode_frame_internal(cpi);
6118 
6119     if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
6120       // Use a flag that includes 4x4 blocks
6121       if (rdc->compound_ref_used_flag == 0) {
6122         current_frame->reference_mode = SINGLE_REFERENCE;
6123 #if CONFIG_ENTROPY_STATS
6124         av1_zero(cpi->td.counts->comp_inter);
6125 #endif  // CONFIG_ENTROPY_STATS
6126       }
6127     }
6128     // Re-check on the skip mode status as reference mode may have been
6129     // changed.
6130     SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
6131     if (frame_is_intra_only(cm) ||
6132         current_frame->reference_mode == SINGLE_REFERENCE) {
6133       skip_mode_info->skip_mode_allowed = 0;
6134       skip_mode_info->skip_mode_flag = 0;
6135     }
6136     if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
6137       skip_mode_info->skip_mode_flag = 0;
6138 
6139     if (!cm->tiles.large_scale) {
6140       if (features->tx_mode == TX_MODE_SELECT &&
6141           cpi->td.mb.txb_split_count == 0)
6142         features->tx_mode = TX_MODE_LARGEST;
6143     }
6144   } else {
6145     encode_frame_internal(cpi);
6146   }
6147 }
6148 
update_txfm_count(MACROBLOCK * x,MACROBLOCKD * xd,FRAME_COUNTS * counts,TX_SIZE tx_size,int depth,int blk_row,int blk_col,uint8_t allow_update_cdf)6149 static AOM_INLINE void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
6150                                          FRAME_COUNTS *counts, TX_SIZE tx_size,
6151                                          int depth, int blk_row, int blk_col,
6152                                          uint8_t allow_update_cdf) {
6153   MB_MODE_INFO *mbmi = xd->mi[0];
6154   const BLOCK_SIZE bsize = mbmi->sb_type;
6155   const int max_blocks_high = max_block_high(xd, bsize, 0);
6156   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
6157   int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
6158                                    xd->left_txfm_context + blk_row,
6159                                    mbmi->sb_type, tx_size);
6160   const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
6161   const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
6162 
6163   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
6164   assert(tx_size > TX_4X4);
6165 
6166   if (depth == MAX_VARTX_DEPTH) {
6167     // Don't add to counts in this case
6168     mbmi->tx_size = tx_size;
6169     txfm_partition_update(xd->above_txfm_context + blk_col,
6170                           xd->left_txfm_context + blk_row, tx_size, tx_size);
6171     return;
6172   }
6173 
6174   if (tx_size == plane_tx_size) {
6175 #if CONFIG_ENTROPY_STATS
6176     ++counts->txfm_partition[ctx][0];
6177 #endif
6178     if (allow_update_cdf)
6179       update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2);
6180     mbmi->tx_size = tx_size;
6181     txfm_partition_update(xd->above_txfm_context + blk_col,
6182                           xd->left_txfm_context + blk_row, tx_size, tx_size);
6183   } else {
6184     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
6185     const int bsw = tx_size_wide_unit[sub_txs];
6186     const int bsh = tx_size_high_unit[sub_txs];
6187 
6188 #if CONFIG_ENTROPY_STATS
6189     ++counts->txfm_partition[ctx][1];
6190 #endif
6191     if (allow_update_cdf)
6192       update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2);
6193     ++x->txb_split_count;
6194 
6195     if (sub_txs == TX_4X4) {
6196       mbmi->inter_tx_size[txb_size_index] = TX_4X4;
6197       mbmi->tx_size = TX_4X4;
6198       txfm_partition_update(xd->above_txfm_context + blk_col,
6199                             xd->left_txfm_context + blk_row, TX_4X4, tx_size);
6200       return;
6201     }
6202 
6203     for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
6204       for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
6205         int offsetr = row;
6206         int offsetc = col;
6207 
6208         update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr,
6209                           blk_col + offsetc, allow_update_cdf);
6210       }
6211     }
6212   }
6213 }
6214 
tx_partition_count_update(const AV1_COMMON * const cm,MACROBLOCK * x,BLOCK_SIZE plane_bsize,FRAME_COUNTS * td_counts,uint8_t allow_update_cdf)6215 static AOM_INLINE void tx_partition_count_update(const AV1_COMMON *const cm,
6216                                                  MACROBLOCK *x,
6217                                                  BLOCK_SIZE plane_bsize,
6218                                                  FRAME_COUNTS *td_counts,
6219                                                  uint8_t allow_update_cdf) {
6220   MACROBLOCKD *xd = &x->e_mbd;
6221   const int mi_width = mi_size_wide[plane_bsize];
6222   const int mi_height = mi_size_high[plane_bsize];
6223   const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
6224   const int bh = tx_size_high_unit[max_tx_size];
6225   const int bw = tx_size_wide_unit[max_tx_size];
6226 
6227   xd->above_txfm_context =
6228       cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col;
6229   xd->left_txfm_context =
6230       xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK);
6231 
6232   for (int idy = 0; idy < mi_height; idy += bh) {
6233     for (int idx = 0; idx < mi_width; idx += bw) {
6234       update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx,
6235                         allow_update_cdf);
6236     }
6237   }
6238 }
6239 
set_txfm_context(MACROBLOCKD * xd,TX_SIZE tx_size,int blk_row,int blk_col)6240 static AOM_INLINE void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size,
6241                                         int blk_row, int blk_col) {
6242   MB_MODE_INFO *mbmi = xd->mi[0];
6243   const BLOCK_SIZE bsize = mbmi->sb_type;
6244   const int max_blocks_high = max_block_high(xd, bsize, 0);
6245   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
6246   const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
6247   const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
6248 
6249   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
6250 
6251   if (tx_size == plane_tx_size) {
6252     mbmi->tx_size = tx_size;
6253     txfm_partition_update(xd->above_txfm_context + blk_col,
6254                           xd->left_txfm_context + blk_row, tx_size, tx_size);
6255 
6256   } else {
6257     if (tx_size == TX_8X8) {
6258       mbmi->inter_tx_size[txb_size_index] = TX_4X4;
6259       mbmi->tx_size = TX_4X4;
6260       txfm_partition_update(xd->above_txfm_context + blk_col,
6261                             xd->left_txfm_context + blk_row, TX_4X4, tx_size);
6262       return;
6263     }
6264     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
6265     const int bsw = tx_size_wide_unit[sub_txs];
6266     const int bsh = tx_size_high_unit[sub_txs];
6267     for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
6268       for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
6269         const int offsetr = blk_row + row;
6270         const int offsetc = blk_col + col;
6271         if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
6272         set_txfm_context(xd, sub_txs, offsetr, offsetc);
6273       }
6274     }
6275   }
6276 }
6277 
tx_partition_set_contexts(const AV1_COMMON * const cm,MACROBLOCKD * xd,BLOCK_SIZE plane_bsize)6278 static AOM_INLINE void tx_partition_set_contexts(const AV1_COMMON *const cm,
6279                                                  MACROBLOCKD *xd,
6280                                                  BLOCK_SIZE plane_bsize) {
6281   const int mi_width = mi_size_wide[plane_bsize];
6282   const int mi_height = mi_size_high[plane_bsize];
6283   const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
6284   const int bh = tx_size_high_unit[max_tx_size];
6285   const int bw = tx_size_wide_unit[max_tx_size];
6286 
6287   xd->above_txfm_context =
6288       cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col;
6289   xd->left_txfm_context =
6290       xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK);
6291 
6292   for (int idy = 0; idy < mi_height; idy += bh) {
6293     for (int idx = 0; idx < mi_width; idx += bw) {
6294       set_txfm_context(xd, max_tx_size, idy, idx);
6295     }
6296   }
6297 }
6298 
encode_superblock(const AV1_COMP * const cpi,TileDataEnc * tile_data,ThreadData * td,TOKENEXTRA ** t,RUN_TYPE dry_run,BLOCK_SIZE bsize,int * rate)6299 static AOM_INLINE void encode_superblock(const AV1_COMP *const cpi,
6300                                          TileDataEnc *tile_data, ThreadData *td,
6301                                          TOKENEXTRA **t, RUN_TYPE dry_run,
6302                                          BLOCK_SIZE bsize, int *rate) {
6303   const AV1_COMMON *const cm = &cpi->common;
6304   const int num_planes = av1_num_planes(cm);
6305   MACROBLOCK *const x = &td->mb;
6306   MACROBLOCKD *const xd = &x->e_mbd;
6307   MB_MODE_INFO **mi_4x4 = xd->mi;
6308   MB_MODE_INFO *mbmi = mi_4x4[0];
6309   const int seg_skip =
6310       segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
6311   const int mis = cm->mi_params.mi_stride;
6312   const int mi_width = mi_size_wide[bsize];
6313   const int mi_height = mi_size_high[bsize];
6314   const int is_inter = is_inter_block(mbmi);
6315 
6316   // Initialize tx_mode and tx_size_search_method
6317   set_tx_size_search_method(
6318       cm, &cpi->winner_mode_params, x,
6319       cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch, 1);
6320 
6321   const int mi_row = xd->mi_row;
6322   const int mi_col = xd->mi_col;
6323   if (!is_inter) {
6324     xd->cfl.store_y = store_cfl_required(cm, xd);
6325     mbmi->skip = 1;
6326     for (int plane = 0; plane < num_planes; ++plane) {
6327       av1_encode_intra_block_plane(cpi, x, bsize, plane, dry_run,
6328                                    cpi->optimize_seg_arr[mbmi->segment_id]);
6329     }
6330 
6331     // If there is at least one lossless segment, force the skip for intra
6332     // block to be 0, in order to avoid the segment_id to be changed by in
6333     // write_segment_id().
6334     if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map &&
6335         cpi->enc_seg.has_lossless_segment)
6336       mbmi->skip = 0;
6337 
6338     xd->cfl.store_y = 0;
6339     if (av1_allow_palette(cm->features.allow_screen_content_tools, bsize)) {
6340       for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) {
6341         if (mbmi->palette_mode_info.palette_size[plane] > 0) {
6342           if (!dry_run) {
6343             av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size,
6344                                    PALETTE_MAP, tile_data->allow_update_cdf,
6345                                    td->counts);
6346           } else if (dry_run == DRY_RUN_COSTCOEFFS) {
6347             rate +=
6348                 av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP);
6349           }
6350         }
6351       }
6352     }
6353 
6354     av1_update_txb_context(cpi, td, dry_run, bsize,
6355                            tile_data->allow_update_cdf);
6356   } else {
6357     int ref;
6358     const int is_compound = has_second_ref(mbmi);
6359 
6360     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
6361     for (ref = 0; ref < 1 + is_compound; ++ref) {
6362       const YV12_BUFFER_CONFIG *cfg =
6363           get_ref_frame_yv12_buf(cm, mbmi->ref_frame[ref]);
6364       assert(IMPLIES(!is_intrabc_block(mbmi), cfg));
6365       av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
6366                            xd->block_ref_scale_factors[ref], num_planes);
6367     }
6368     int start_plane = (cpi->sf.rt_sf.reuse_inter_pred_nonrd) ? 1 : 0;
6369     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
6370                                   start_plane, av1_num_planes(cm) - 1);
6371     if (mbmi->motion_mode == OBMC_CAUSAL) {
6372       assert(cpi->oxcf.enable_obmc == 1);
6373       av1_build_obmc_inter_predictors_sb(cm, xd);
6374     }
6375 
6376 #if CONFIG_MISMATCH_DEBUG
6377     if (dry_run == OUTPUT_ENABLED) {
6378       for (int plane = 0; plane < num_planes; ++plane) {
6379         const struct macroblockd_plane *pd = &xd->plane[plane];
6380         int pixel_c, pixel_r;
6381         mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
6382                         pd->subsampling_x, pd->subsampling_y);
6383         if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
6384                                  pd->subsampling_y))
6385           continue;
6386         mismatch_record_block_pre(pd->dst.buf, pd->dst.stride,
6387                                   cm->current_frame.order_hint, plane, pixel_c,
6388                                   pixel_r, pd->width, pd->height,
6389                                   xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
6390       }
6391     }
6392 #else
6393     (void)num_planes;
6394 #endif
6395 
6396     av1_encode_sb(cpi, x, bsize, dry_run);
6397     av1_tokenize_sb_vartx(cpi, td, dry_run, bsize, rate,
6398                           tile_data->allow_update_cdf);
6399   }
6400 
6401   if (!dry_run) {
6402     if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi)) td->intrabc_used = 1;
6403     if (x->tx_mode_search_type == TX_MODE_SELECT &&
6404         !xd->lossless[mbmi->segment_id] && mbmi->sb_type > BLOCK_4X4 &&
6405         !(is_inter && (mbmi->skip || seg_skip))) {
6406       if (is_inter) {
6407         tx_partition_count_update(cm, x, bsize, td->counts,
6408                                   tile_data->allow_update_cdf);
6409       } else {
6410         if (mbmi->tx_size != max_txsize_rect_lookup[bsize])
6411           ++x->txb_split_count;
6412         if (block_signals_txsize(bsize)) {
6413           const int tx_size_ctx = get_tx_size_context(xd);
6414           const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
6415           const int depth = tx_size_to_depth(mbmi->tx_size, bsize);
6416           const int max_depths = bsize_to_max_depth(bsize);
6417 
6418           if (tile_data->allow_update_cdf)
6419             update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
6420                        depth, max_depths + 1);
6421 #if CONFIG_ENTROPY_STATS
6422           ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth];
6423 #endif
6424         }
6425       }
6426       assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
6427     } else {
6428       int i, j;
6429       TX_SIZE intra_tx_size;
6430       // The new intra coding scheme requires no change of transform size
6431       if (is_inter) {
6432         if (xd->lossless[mbmi->segment_id]) {
6433           intra_tx_size = TX_4X4;
6434         } else {
6435           intra_tx_size = tx_size_from_tx_mode(bsize, x->tx_mode_search_type);
6436         }
6437       } else {
6438         intra_tx_size = mbmi->tx_size;
6439       }
6440 
6441       for (j = 0; j < mi_height; j++)
6442         for (i = 0; i < mi_width; i++)
6443           if (mi_col + i < cm->mi_params.mi_cols &&
6444               mi_row + j < cm->mi_params.mi_rows)
6445             mi_4x4[mis * j + i]->tx_size = intra_tx_size;
6446 
6447       if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count;
6448     }
6449   }
6450 
6451   if (x->tx_mode_search_type == TX_MODE_SELECT &&
6452       block_signals_txsize(mbmi->sb_type) && is_inter &&
6453       !(mbmi->skip || seg_skip) && !xd->lossless[mbmi->segment_id]) {
6454     if (dry_run) tx_partition_set_contexts(cm, xd, bsize);
6455   } else {
6456     TX_SIZE tx_size = mbmi->tx_size;
6457     // The new intra coding scheme requires no change of transform size
6458     if (is_inter) {
6459       if (xd->lossless[mbmi->segment_id]) {
6460         tx_size = TX_4X4;
6461       } else {
6462         tx_size = tx_size_from_tx_mode(bsize, x->tx_mode_search_type);
6463       }
6464     } else {
6465       tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
6466     }
6467     mbmi->tx_size = tx_size;
6468     set_txfm_ctxs(tx_size, xd->width, xd->height,
6469                   (mbmi->skip || seg_skip) && is_inter_block(mbmi), xd);
6470   }
6471 
6472   if (is_inter_block(mbmi) && !xd->is_chroma_ref && is_cfl_allowed(xd)) {
6473     cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
6474   }
6475 }
6476