1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_ports/system_state.h"
27
28 #if CONFIG_MISMATCH_DEBUG
29 #include "aom_util/debug_util.h"
30 #endif // CONFIG_MISMATCH_DEBUG
31
32 #include "av1/common/cfl.h"
33 #include "av1/common/common.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46
47 #include "av1/encoder/aq_complexity.h"
48 #include "av1/encoder/aq_cyclicrefresh.h"
49 #include "av1/encoder/aq_variance.h"
50 #include "av1/encoder/corner_detect.h"
51 #include "av1/encoder/global_motion.h"
52 #include "av1/encoder/encodeframe.h"
53 #include "av1/encoder/encodemb.h"
54 #include "av1/encoder/encodemv.h"
55 #include "av1/encoder/encodetxb.h"
56 #include "av1/encoder/ethread.h"
57 #include "av1/encoder/extend.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/motion_search_facade.h"
60 #include "av1/encoder/partition_strategy.h"
61 #if !CONFIG_REALTIME_ONLY
62 #include "av1/encoder/partition_model_weights.h"
63 #endif
64 #include "av1/encoder/rd.h"
65 #include "av1/encoder/rdopt.h"
66 #include "av1/encoder/reconinter_enc.h"
67 #include "av1/encoder/segmentation.h"
68 #include "av1/encoder/tokenize.h"
69 #include "av1/encoder/tpl_model.h"
70 #include "av1/encoder/var_based_part.h"
71
72 #if CONFIG_TUNE_VMAF
73 #include "av1/encoder/tune_vmaf.h"
74 #endif
75
76 static AOM_INLINE void encode_superblock(const AV1_COMP *const cpi,
77 TileDataEnc *tile_data, ThreadData *td,
78 TOKENEXTRA **t, RUN_TYPE dry_run,
79 BLOCK_SIZE bsize, int *rate);
80
81 // This is used as a reference when computing the source variance for the
82 // purposes of activity masking.
83 // Eventually this should be replaced by custom no-reference routines,
84 // which will be faster.
85 const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
86 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
93 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94 128, 128, 128, 128, 128, 128, 128, 128
95 };
96
97 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
98 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
102 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
103 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
104 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
105 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
106 128, 128, 128, 128, 128, 128, 128, 128
107 };
108
109 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
110 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
121 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
122 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
123 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
124 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
125 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
126 };
127
128 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
129 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
143 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
144 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
145 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
146 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
147 128 * 16, 128 * 16
148 };
149
150 typedef struct {
151 ENTROPY_CONTEXT a[MAX_MIB_SIZE * MAX_MB_PLANE];
152 ENTROPY_CONTEXT l[MAX_MIB_SIZE * MAX_MB_PLANE];
153 PARTITION_CONTEXT sa[MAX_MIB_SIZE];
154 PARTITION_CONTEXT sl[MAX_MIB_SIZE];
155 TXFM_CONTEXT *p_ta;
156 TXFM_CONTEXT *p_tl;
157 TXFM_CONTEXT ta[MAX_MIB_SIZE];
158 TXFM_CONTEXT tl[MAX_MIB_SIZE];
159 } RD_SEARCH_MACROBLOCK_CONTEXT;
160
161 enum { PICK_MODE_RD = 0, PICK_MODE_NONRD };
162
163 enum {
164 SB_SINGLE_PASS, // Single pass encoding: all ctxs get updated normally
165 SB_DRY_PASS, // First pass of multi-pass: does not update the ctxs
166 SB_WET_PASS // Second pass of multi-pass: finalize and update the ctx
167 } UENUM1BYTE(SB_MULTI_PASS_MODE);
168
169 // This struct is used to store the statistics used by sb-level multi-pass
170 // encoding. Currently, this is only used to make a copy of the state before we
171 // perform the first pass
172 typedef struct SB_FIRST_PASS_STATS {
173 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
174 RD_COUNTS rd_count;
175
176 int split_count;
177 FRAME_COUNTS fc;
178 InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL];
179 int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES];
180 int current_qindex;
181
182 #if CONFIG_INTERNAL_STATS
183 unsigned int mode_chosen_counts[MAX_MODES];
184 #endif // CONFIG_INTERNAL_STATS
185 } SB_FIRST_PASS_STATS;
186
av1_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)187 unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
188 const struct buf_2d *ref,
189 BLOCK_SIZE bs) {
190 unsigned int sse;
191 const unsigned int var =
192 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
193 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
194 }
195
av1_high_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)196 unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
197 const struct buf_2d *ref,
198 BLOCK_SIZE bs, int bd) {
199 unsigned int var, sse;
200 assert(bd == 8 || bd == 10 || bd == 12);
201 const int off_index = (bd - 8) >> 1;
202 const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
203 AV1_HIGH_VAR_OFFS_10,
204 AV1_HIGH_VAR_OFFS_12 };
205 var =
206 cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
207 CONVERT_TO_BYTEPTR(high_var_offs[off_index]), 0, &sse);
208 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
209 }
210
get_sby_perpixel_diff_variance(const AV1_COMP * const cpi,const struct buf_2d * ref,int mi_row,int mi_col,BLOCK_SIZE bs)211 static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi,
212 const struct buf_2d *ref,
213 int mi_row, int mi_col,
214 BLOCK_SIZE bs) {
215 unsigned int sse, var;
216 uint8_t *last_y;
217 const YV12_BUFFER_CONFIG *last =
218 get_ref_frame_yv12_buf(&cpi->common, LAST_FRAME);
219
220 assert(last != NULL);
221 last_y =
222 &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
223 var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
224 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
225 }
226
get_rd_var_based_fixed_partition(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)227 static BLOCK_SIZE get_rd_var_based_fixed_partition(AV1_COMP *cpi, MACROBLOCK *x,
228 int mi_row, int mi_col) {
229 unsigned int var = get_sby_perpixel_diff_variance(
230 cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
231 if (var < 8)
232 return BLOCK_64X64;
233 else if (var < 128)
234 return BLOCK_32X32;
235 else if (var < 2048)
236 return BLOCK_16X16;
237 else
238 return BLOCK_8X8;
239 }
240
set_deltaq_rdmult(const AV1_COMP * const cpi,MACROBLOCKD * const xd)241 static int set_deltaq_rdmult(const AV1_COMP *const cpi, MACROBLOCKD *const xd) {
242 const AV1_COMMON *const cm = &cpi->common;
243 const CommonQuantParams *quant_params = &cm->quant_params;
244 return av1_compute_rd_mult(cpi, quant_params->base_qindex + xd->delta_qindex +
245 quant_params->y_dc_delta_q);
246 }
247
set_ssim_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,const int mi_row,const int mi_col,int * const rdmult)248 static AOM_INLINE void set_ssim_rdmult(const AV1_COMP *const cpi,
249 MACROBLOCK *const x,
250 const BLOCK_SIZE bsize, const int mi_row,
251 const int mi_col, int *const rdmult) {
252 const AV1_COMMON *const cm = &cpi->common;
253
254 const int bsize_base = BLOCK_16X16;
255 const int num_mi_w = mi_size_wide[bsize_base];
256 const int num_mi_h = mi_size_high[bsize_base];
257 const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
258 const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
259 const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
260 const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
261 int row, col;
262 double num_of_mi = 0.0;
263 double geom_mean_of_scale = 0.0;
264
265 assert(cpi->oxcf.tuning == AOM_TUNE_SSIM);
266
267 aom_clear_system_state();
268 for (row = mi_row / num_mi_w;
269 row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
270 for (col = mi_col / num_mi_h;
271 col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
272 const int index = row * num_cols + col;
273 geom_mean_of_scale += log(cpi->ssim_rdmult_scaling_factors[index]);
274 num_of_mi += 1.0;
275 }
276 }
277 geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi);
278
279 *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
280 *rdmult = AOMMAX(*rdmult, 0);
281 set_error_per_bit(x, *rdmult);
282 aom_clear_system_state();
283 }
284
get_hier_tpl_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,const int mi_row,const int mi_col,int orig_rdmult)285 static int get_hier_tpl_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
286 const BLOCK_SIZE bsize, const int mi_row,
287 const int mi_col, int orig_rdmult) {
288 const AV1_COMMON *const cm = &cpi->common;
289 assert(IMPLIES(cpi->gf_group.size > 0,
290 cpi->gf_group.index < cpi->gf_group.size));
291 const int tpl_idx = cpi->gf_group.index;
292 const TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
293 MACROBLOCKD *const xd = &x->e_mbd;
294 const int deltaq_rdmult = set_deltaq_rdmult(cpi, xd);
295 if (tpl_frame->is_valid == 0) return deltaq_rdmult;
296 if (!is_frame_tpl_eligible((AV1_COMP *)cpi)) return deltaq_rdmult;
297 if (tpl_idx >= MAX_LAG_BUFFERS) return deltaq_rdmult;
298 if (cpi->superres_mode != SUPERRES_NONE) return deltaq_rdmult;
299 if (cpi->oxcf.aq_mode != NO_AQ) return deltaq_rdmult;
300
301 const int bsize_base = BLOCK_16X16;
302 const int num_mi_w = mi_size_wide[bsize_base];
303 const int num_mi_h = mi_size_high[bsize_base];
304 const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
305 const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
306 const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
307 const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
308 int row, col;
309 double base_block_count = 0.0;
310 double geom_mean_of_scale = 0.0;
311 aom_clear_system_state();
312 for (row = mi_row / num_mi_w;
313 row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
314 for (col = mi_col / num_mi_h;
315 col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
316 const int index = row * num_cols + col;
317 geom_mean_of_scale += log(cpi->tpl_sb_rdmult_scaling_factors[index]);
318 base_block_count += 1.0;
319 }
320 }
321 geom_mean_of_scale = exp(geom_mean_of_scale / base_block_count);
322 int rdmult = (int)((double)orig_rdmult * geom_mean_of_scale + 0.5);
323 rdmult = AOMMAX(rdmult, 0);
324 set_error_per_bit(x, rdmult);
325 aom_clear_system_state();
326 if (bsize == cm->seq_params.sb_size) {
327 const int rdmult_sb = set_deltaq_rdmult(cpi, xd);
328 assert(rdmult_sb == rdmult);
329 (void)rdmult_sb;
330 }
331 return rdmult;
332 }
333
set_segment_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,int8_t segment_id)334 static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
335 int8_t segment_id) {
336 const AV1_COMMON *const cm = &cpi->common;
337 av1_init_plane_quantizers(cpi, x, segment_id);
338 aom_clear_system_state();
339 const int segment_qindex =
340 av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
341 return av1_compute_rd_mult(cpi,
342 segment_qindex + cm->quant_params.y_dc_delta_q);
343 }
344
setup_block_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize,AQ_MODE aq_mode,MB_MODE_INFO * mbmi)345 static AOM_INLINE void setup_block_rdmult(const AV1_COMP *const cpi,
346 MACROBLOCK *const x, int mi_row,
347 int mi_col, BLOCK_SIZE bsize,
348 AQ_MODE aq_mode, MB_MODE_INFO *mbmi) {
349 x->rdmult = cpi->rd.RDMULT;
350
351 if (aq_mode != NO_AQ) {
352 assert(mbmi != NULL);
353 if (aq_mode == VARIANCE_AQ) {
354 if (cpi->vaq_refresh) {
355 const int energy = bsize <= BLOCK_16X16
356 ? x->mb_energy
357 : av1_log_block_var(cpi, x, bsize);
358 mbmi->segment_id = energy;
359 }
360 x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
361 } else if (aq_mode == COMPLEXITY_AQ) {
362 x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
363 } else if (aq_mode == CYCLIC_REFRESH_AQ) {
364 // If segment is boosted, use rdmult for that segment.
365 if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
366 x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
367 }
368 }
369
370 const AV1_COMMON *const cm = &cpi->common;
371 if (cm->delta_q_info.delta_q_present_flag &&
372 !cpi->sf.rt_sf.use_nonrd_pick_mode) {
373 x->rdmult = get_hier_tpl_rdmult(cpi, x, bsize, mi_row, mi_col, x->rdmult);
374 }
375
376 if (cpi->oxcf.tuning == AOM_TUNE_SSIM) {
377 set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
378 }
379 #if CONFIG_TUNE_VMAF
380 if (cpi->oxcf.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
381 cpi->oxcf.tuning == AOM_TUNE_VMAF_MAX_GAIN) {
382 av1_set_vmaf_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
383 }
384 #endif
385 }
386
set_offsets_without_segment_id(const AV1_COMP * const cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)387 static AOM_INLINE void set_offsets_without_segment_id(
388 const AV1_COMP *const cpi, const TileInfo *const tile, MACROBLOCK *const x,
389 int mi_row, int mi_col, BLOCK_SIZE bsize) {
390 const AV1_COMMON *const cm = &cpi->common;
391 const int num_planes = av1_num_planes(cm);
392 MACROBLOCKD *const xd = &x->e_mbd;
393 assert(bsize < BLOCK_SIZES_ALL);
394 const int mi_width = mi_size_wide[bsize];
395 const int mi_height = mi_size_high[bsize];
396
397 set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
398 mi_row, mi_col);
399
400 set_entropy_context(xd, mi_row, mi_col, num_planes);
401 xd->above_txfm_context = cm->above_contexts.txfm[tile->tile_row] + mi_col;
402 xd->left_txfm_context =
403 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
404
405 // Set up destination pointers.
406 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0,
407 num_planes);
408
409 // Set up limit values for MV components.
410 // Mv beyond the range do not produce new/different prediction block.
411 av1_set_mv_limits(&cm->mi_params, &x->mv_limits, mi_row, mi_col, mi_height,
412 mi_width, cpi->oxcf.border_in_pixels);
413
414 set_plane_n4(xd, mi_width, mi_height, num_planes);
415
416 // Set up distance of MB to edge of frame in 1/8th pel units.
417 assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
418 set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
419 cm->mi_params.mi_rows, cm->mi_params.mi_cols);
420
421 // Set up source buffers.
422 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
423
424 // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
425 xd->tile = *tile;
426 }
427
set_offsets(const AV1_COMP * const cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)428 static AOM_INLINE void set_offsets(const AV1_COMP *const cpi,
429 const TileInfo *const tile,
430 MACROBLOCK *const x, int mi_row, int mi_col,
431 BLOCK_SIZE bsize) {
432 const AV1_COMMON *const cm = &cpi->common;
433 const struct segmentation *const seg = &cm->seg;
434 MACROBLOCKD *const xd = &x->e_mbd;
435 MB_MODE_INFO *mbmi;
436
437 set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
438
439 // Setup segment ID.
440 mbmi = xd->mi[0];
441 mbmi->segment_id = 0;
442 if (seg->enabled) {
443 if (seg->enabled && !cpi->vaq_refresh) {
444 const uint8_t *const map =
445 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
446 mbmi->segment_id =
447 map ? get_segment_id(&cm->mi_params, map, bsize, mi_row, mi_col) : 0;
448 }
449 av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
450 }
451 }
452
update_filter_type_count(FRAME_COUNTS * counts,const MACROBLOCKD * xd,const MB_MODE_INFO * mbmi)453 static AOM_INLINE void update_filter_type_count(FRAME_COUNTS *counts,
454 const MACROBLOCKD *xd,
455 const MB_MODE_INFO *mbmi) {
456 int dir;
457 for (dir = 0; dir < 2; ++dir) {
458 const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
459 InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir);
460 ++counts->switchable_interp[ctx][filter];
461 }
462 }
463
update_filter_type_cdf(const MACROBLOCKD * xd,const MB_MODE_INFO * mbmi)464 static AOM_INLINE void update_filter_type_cdf(const MACROBLOCKD *xd,
465 const MB_MODE_INFO *mbmi) {
466 int dir;
467 for (dir = 0; dir < 2; ++dir) {
468 const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
469 InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir);
470 update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx], filter,
471 SWITCHABLE_FILTERS);
472 }
473 }
474
update_global_motion_used(PREDICTION_MODE mode,BLOCK_SIZE bsize,const MB_MODE_INFO * mbmi,RD_COUNTS * rdc)475 static AOM_INLINE void update_global_motion_used(PREDICTION_MODE mode,
476 BLOCK_SIZE bsize,
477 const MB_MODE_INFO *mbmi,
478 RD_COUNTS *rdc) {
479 if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) {
480 const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize];
481 int ref;
482 for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
483 rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s;
484 }
485 }
486 }
487
reset_tx_size(MACROBLOCK * x,MB_MODE_INFO * mbmi,const TX_MODE tx_mode)488 static AOM_INLINE void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi,
489 const TX_MODE tx_mode) {
490 MACROBLOCKD *const xd = &x->e_mbd;
491 if (xd->lossless[mbmi->segment_id]) {
492 mbmi->tx_size = TX_4X4;
493 } else if (tx_mode != TX_MODE_SELECT) {
494 mbmi->tx_size = tx_size_from_tx_mode(mbmi->sb_type, tx_mode);
495 } else {
496 BLOCK_SIZE bsize = mbmi->sb_type;
497 TX_SIZE min_tx_size = depth_to_tx_size(MAX_TX_DEPTH, bsize);
498 mbmi->tx_size = (TX_SIZE)TXSIZEMAX(mbmi->tx_size, min_tx_size);
499 }
500 if (is_inter_block(mbmi)) {
501 memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
502 }
503 const int stride = xd->tx_type_map_stride;
504 const int bw = mi_size_wide[mbmi->sb_type];
505 for (int row = 0; row < mi_size_high[mbmi->sb_type]; ++row) {
506 memset(xd->tx_type_map + row * stride, DCT_DCT,
507 bw * sizeof(xd->tx_type_map[0]));
508 }
509 av1_zero(x->blk_skip);
510 x->force_skip = 0;
511 }
512
513 // This function will copy the best reference mode information from
514 // MB_MODE_INFO_EXT_FRAME to MB_MODE_INFO_EXT.
copy_mbmi_ext_frame_to_mbmi_ext(MB_MODE_INFO_EXT * mbmi_ext,const MB_MODE_INFO_EXT_FRAME * const mbmi_ext_best,uint8_t ref_frame_type)515 static INLINE void copy_mbmi_ext_frame_to_mbmi_ext(
516 MB_MODE_INFO_EXT *mbmi_ext,
517 const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_best, uint8_t ref_frame_type) {
518 memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack,
519 sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
520 memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight,
521 sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
522 mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context;
523 mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count;
524 memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs,
525 sizeof(mbmi_ext->global_mvs));
526 }
527
update_state(const AV1_COMP * const cpi,ThreadData * td,const PICK_MODE_CONTEXT * const ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,RUN_TYPE dry_run)528 static AOM_INLINE void update_state(const AV1_COMP *const cpi, ThreadData *td,
529 const PICK_MODE_CONTEXT *const ctx,
530 int mi_row, int mi_col, BLOCK_SIZE bsize,
531 RUN_TYPE dry_run) {
532 int i, x_idx, y;
533 const AV1_COMMON *const cm = &cpi->common;
534 const CommonModeInfoParams *const mi_params = &cm->mi_params;
535 const int num_planes = av1_num_planes(cm);
536 RD_COUNTS *const rdc = &td->rd_counts;
537 MACROBLOCK *const x = &td->mb;
538 MACROBLOCKD *const xd = &x->e_mbd;
539 struct macroblock_plane *const p = x->plane;
540 struct macroblockd_plane *const pd = xd->plane;
541 const MB_MODE_INFO *const mi = &ctx->mic;
542 MB_MODE_INFO *const mi_addr = xd->mi[0];
543 const struct segmentation *const seg = &cm->seg;
544 const int bw = mi_size_wide[mi->sb_type];
545 const int bh = mi_size_high[mi->sb_type];
546 const int mis = mi_params->mi_stride;
547 const int mi_width = mi_size_wide[bsize];
548 const int mi_height = mi_size_high[bsize];
549
550 assert(mi->sb_type == bsize);
551
552 *mi_addr = *mi;
553 copy_mbmi_ext_frame_to_mbmi_ext(x->mbmi_ext, &ctx->mbmi_ext_best,
554 av1_ref_frame_type(ctx->mic.ref_frame));
555
556 memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
557
558 x->force_skip = ctx->rd_stats.skip;
559
560 xd->tx_type_map = ctx->tx_type_map;
561 xd->tx_type_map_stride = mi_size_wide[bsize];
562 // If not dry_run, copy the transform type data into the frame level buffer.
563 // Encoder will fetch tx types when writing bitstream.
564 if (!dry_run) {
565 const int grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col);
566 uint8_t *const tx_type_map = mi_params->tx_type_map + grid_idx;
567 const int mi_stride = mi_params->mi_stride;
568 for (int blk_row = 0; blk_row < bh; ++blk_row) {
569 av1_copy_array(tx_type_map + blk_row * mi_stride,
570 xd->tx_type_map + blk_row * xd->tx_type_map_stride, bw);
571 }
572 xd->tx_type_map = tx_type_map;
573 xd->tx_type_map_stride = mi_stride;
574 }
575
576 // If segmentation in use
577 if (seg->enabled) {
578 // For in frame complexity AQ copy the segment id from the segment map.
579 if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
580 const uint8_t *const map =
581 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
582 mi_addr->segment_id =
583 map ? get_segment_id(mi_params, map, bsize, mi_row, mi_col) : 0;
584 reset_tx_size(x, mi_addr, x->tx_mode_search_type);
585 }
586 // Else for cyclic refresh mode update the segment map, set the segment id
587 // and then update the quantizer.
588 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
589 av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize,
590 ctx->rd_stats.rate, ctx->rd_stats.dist,
591 x->force_skip);
592 }
593 if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd))
594 mi_addr->uv_mode = UV_DC_PRED;
595 }
596
597 for (i = 0; i < num_planes; ++i) {
598 p[i].coeff = ctx->coeff[i];
599 p[i].qcoeff = ctx->qcoeff[i];
600 pd[i].dqcoeff = ctx->dqcoeff[i];
601 p[i].eobs = ctx->eobs[i];
602 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
603 }
604 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
605 // Restore the coding context of the MB to that that was in place
606 // when the mode was picked for it
607 for (y = 0; y < mi_height; y++) {
608 for (x_idx = 0; x_idx < mi_width; x_idx++) {
609 if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
610 (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
611 xd->mi[x_idx + y * mis] = mi_addr;
612 }
613 }
614 }
615
616 if (cpi->oxcf.aq_mode) av1_init_plane_quantizers(cpi, x, mi_addr->segment_id);
617
618 if (dry_run) return;
619
620 #if CONFIG_INTERNAL_STATS
621 {
622 unsigned int *const mode_chosen_counts =
623 (unsigned int *)cpi->mode_chosen_counts; // Cast const away.
624 if (frame_is_intra_only(cm)) {
625 static const int kf_mode_index[] = {
626 THR_DC /*DC_PRED*/,
627 THR_V_PRED /*V_PRED*/,
628 THR_H_PRED /*H_PRED*/,
629 THR_D45_PRED /*D45_PRED*/,
630 THR_D135_PRED /*D135_PRED*/,
631 THR_D113_PRED /*D113_PRED*/,
632 THR_D157_PRED /*D157_PRED*/,
633 THR_D203_PRED /*D203_PRED*/,
634 THR_D67_PRED /*D67_PRED*/,
635 THR_SMOOTH, /*SMOOTH_PRED*/
636 THR_SMOOTH_V, /*SMOOTH_V_PRED*/
637 THR_SMOOTH_H, /*SMOOTH_H_PRED*/
638 THR_PAETH /*PAETH_PRED*/,
639 };
640 ++mode_chosen_counts[kf_mode_index[mi_addr->mode]];
641 } else {
642 // Note how often each mode chosen as best
643 ++mode_chosen_counts[ctx->best_mode_index];
644 }
645 }
646 #endif
647 if (!frame_is_intra_only(cm)) {
648 if (is_inter_block(mi_addr)) {
649 // TODO(sarahparker): global motion stats need to be handled per-tile
650 // to be compatible with tile-based threading.
651 update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc);
652 }
653
654 if (cm->features.interp_filter == SWITCHABLE &&
655 mi_addr->motion_mode != WARPED_CAUSAL &&
656 !is_nontrans_global_motion(xd, xd->mi[0])) {
657 update_filter_type_count(td->counts, xd, mi_addr);
658 }
659
660 rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
661 rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
662 rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
663 }
664
665 const int x_mis = AOMMIN(bw, mi_params->mi_cols - mi_col);
666 const int y_mis = AOMMIN(bh, mi_params->mi_rows - mi_row);
667 if (cm->seq_params.order_hint_info.enable_ref_frame_mvs)
668 av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
669 }
670
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)671 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
672 int mi_row, int mi_col, const int num_planes,
673 BLOCK_SIZE bsize) {
674 // Set current frame pointer.
675 x->e_mbd.cur_buf = src;
676
677 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
678 // the static analysis warnings.
679 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
680 const int is_uv = i > 0;
681 setup_pred_plane(
682 &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
683 src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
684 x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
685 }
686 }
687
edge_info(const struct buf_2d * ref,const BLOCK_SIZE bsize,const bool high_bd,const int bd)688 static EdgeInfo edge_info(const struct buf_2d *ref, const BLOCK_SIZE bsize,
689 const bool high_bd, const int bd) {
690 const int width = block_size_wide[bsize];
691 const int height = block_size_high[bsize];
692 // Implementation requires width to be a multiple of 8. It also requires
693 // height to be a multiple of 4, but this is always the case.
694 assert(height % 4 == 0);
695 if (width % 8 != 0) {
696 EdgeInfo ei = { .magnitude = 0, .x = 0, .y = 0 };
697 return ei;
698 }
699 return av1_edge_exists(ref->buf, ref->stride, width, height, high_bd, bd);
700 }
701
use_pb_simple_motion_pred_sse(const AV1_COMP * const cpi)702 static int use_pb_simple_motion_pred_sse(const AV1_COMP *const cpi) {
703 // TODO(debargha, yuec): Not in use, need to implement a speed feature
704 // utilizing this data point, and replace '0' by the corresponding speed
705 // feature flag.
706 return 0 && !frame_is_intra_only(&cpi->common);
707 }
708
hybrid_intra_mode_search(AV1_COMP * cpi,MACROBLOCK * const x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)709 static void hybrid_intra_mode_search(AV1_COMP *cpi, MACROBLOCK *const x,
710 RD_STATS *rd_cost, BLOCK_SIZE bsize,
711 PICK_MODE_CONTEXT *ctx) {
712 // TODO(jianj): Investigate the failure of ScalabilityTest in AOM_Q mode,
713 // which sets base_qindex to 0 on keyframe.
714 if (cpi->oxcf.rc_mode != AOM_CBR || !cpi->sf.rt_sf.hybrid_intra_pickmode ||
715 bsize < BLOCK_16X16)
716 av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
717 else
718 av1_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
719 }
720
pick_sb_modes(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_STATS * rd_cost,PARTITION_TYPE partition,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,RD_STATS best_rd,int pick_mode_type)721 static AOM_INLINE void pick_sb_modes(AV1_COMP *const cpi,
722 TileDataEnc *tile_data,
723 MACROBLOCK *const x, int mi_row,
724 int mi_col, RD_STATS *rd_cost,
725 PARTITION_TYPE partition, BLOCK_SIZE bsize,
726 PICK_MODE_CONTEXT *ctx, RD_STATS best_rd,
727 int pick_mode_type) {
728 if (best_rd.rdcost < 0) {
729 ctx->rd_stats.rdcost = INT64_MAX;
730 ctx->rd_stats.skip = 0;
731 av1_invalid_rd_stats(rd_cost);
732 return;
733 }
734
735 set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
736
737 if (ctx->rd_mode_is_ready) {
738 assert(ctx->mic.sb_type == bsize);
739 assert(ctx->mic.partition == partition);
740 rd_cost->rate = ctx->rd_stats.rate;
741 rd_cost->dist = ctx->rd_stats.dist;
742 rd_cost->rdcost = ctx->rd_stats.rdcost;
743 return;
744 }
745
746 AV1_COMMON *const cm = &cpi->common;
747 const int num_planes = av1_num_planes(cm);
748 MACROBLOCKD *const xd = &x->e_mbd;
749 MB_MODE_INFO *mbmi;
750 struct macroblock_plane *const p = x->plane;
751 struct macroblockd_plane *const pd = xd->plane;
752 const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
753 int i;
754
755 #if CONFIG_COLLECT_COMPONENT_TIMING
756 start_timing(cpi, rd_pick_sb_modes_time);
757 #endif
758
759 aom_clear_system_state();
760
761 mbmi = xd->mi[0];
762 mbmi->sb_type = bsize;
763 mbmi->partition = partition;
764
765 #if CONFIG_RD_DEBUG
766 mbmi->mi_row = mi_row;
767 mbmi->mi_col = mi_col;
768 #endif
769
770 xd->tx_type_map = x->tx_type_map;
771 xd->tx_type_map_stride = mi_size_wide[bsize];
772
773 for (i = 0; i < num_planes; ++i) {
774 p[i].coeff = ctx->coeff[i];
775 p[i].qcoeff = ctx->qcoeff[i];
776 pd[i].dqcoeff = ctx->dqcoeff[i];
777 p[i].eobs = ctx->eobs[i];
778 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
779 }
780
781 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
782
783 ctx->skippable = 0;
784 // Set to zero to make sure we do not use the previous encoded frame stats
785 mbmi->skip = 0;
786 // Reset skip mode flag.
787 mbmi->skip_mode = 0;
788
789 if (is_cur_buf_hbd(xd)) {
790 x->source_variance = av1_high_get_sby_perpixel_variance(
791 cpi, &x->plane[0].src, bsize, xd->bd);
792 } else {
793 x->source_variance =
794 av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
795 }
796 if (use_pb_simple_motion_pred_sse(cpi)) {
797 const FULLPEL_MV start_mv = kZeroFullMv;
798 unsigned int var = 0;
799 av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, start_mv, 0,
800 &x->simple_motion_pred_sse, &var);
801 }
802
803 // If the threshold for disabling wedge search is zero, it means the feature
804 // should not be used. Use a value that will always succeed in the check.
805 if (cpi->sf.inter_sf.disable_wedge_search_edge_thresh == 0) {
806 x->edge_strength = UINT16_MAX;
807 x->edge_strength_x = UINT16_MAX;
808 x->edge_strength_y = UINT16_MAX;
809 } else {
810 EdgeInfo ei =
811 edge_info(&x->plane[0].src, bsize, is_cur_buf_hbd(xd), xd->bd);
812 x->edge_strength = ei.magnitude;
813 x->edge_strength_x = ei.x;
814 x->edge_strength_y = ei.y;
815 }
816
817 // Initialize default mode evaluation params
818 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
819
820 // Save rdmult before it might be changed, so it can be restored later.
821 const int orig_rdmult = x->rdmult;
822 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi);
823 // Set error per bit for current rdmult
824 set_error_per_bit(x, x->rdmult);
825 av1_rd_cost_update(x->rdmult, &best_rd);
826
827 // Find best coding mode & reconstruct the MB so it is available
828 // as a predictor for MBs that follow in the SB
829 if (frame_is_intra_only(cm)) {
830 #if CONFIG_COLLECT_COMPONENT_TIMING
831 start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
832 #endif
833 switch (pick_mode_type) {
834 case PICK_MODE_RD:
835 av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost);
836 break;
837 case PICK_MODE_NONRD:
838 hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
839 break;
840 default: assert(0 && "Unknown pick mode type.");
841 }
842 #if CONFIG_COLLECT_COMPONENT_TIMING
843 end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
844 #endif
845 } else {
846 #if CONFIG_COLLECT_COMPONENT_TIMING
847 start_timing(cpi, av1_rd_pick_inter_mode_sb_time);
848 #endif
849 if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
850 av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
851 rd_cost, bsize, ctx, best_rd.rdcost);
852 } else {
853 // TODO(kyslov): do the same for pick_inter_mode_sb_seg_skip
854 switch (pick_mode_type) {
855 case PICK_MODE_RD:
856 av1_rd_pick_inter_mode_sb(cpi, tile_data, x, rd_cost, bsize, ctx,
857 best_rd.rdcost);
858 break;
859 case PICK_MODE_NONRD:
860 av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, rd_cost, bsize, ctx,
861 best_rd.rdcost);
862 break;
863 default: assert(0 && "Unknown pick mode type.");
864 }
865 }
866 #if CONFIG_COLLECT_COMPONENT_TIMING
867 end_timing(cpi, av1_rd_pick_inter_mode_sb_time);
868 #endif
869 }
870
871 // Examine the resulting rate and for AQ mode 2 make a segment choice.
872 if (rd_cost->rate != INT_MAX && aq_mode == COMPLEXITY_AQ &&
873 bsize >= BLOCK_16X16) {
874 av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
875 }
876
877 x->rdmult = orig_rdmult;
878
879 // TODO(jingning) The rate-distortion optimization flow needs to be
880 // refactored to provide proper exit/return handle.
881 if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
882
883 ctx->rd_stats.rate = rd_cost->rate;
884 ctx->rd_stats.dist = rd_cost->dist;
885 ctx->rd_stats.rdcost = rd_cost->rdcost;
886
887 #if CONFIG_COLLECT_COMPONENT_TIMING
888 end_timing(cpi, rd_pick_sb_modes_time);
889 #endif
890 }
891
update_inter_mode_stats(FRAME_CONTEXT * fc,FRAME_COUNTS * counts,PREDICTION_MODE mode,int16_t mode_context)892 static AOM_INLINE void update_inter_mode_stats(FRAME_CONTEXT *fc,
893 FRAME_COUNTS *counts,
894 PREDICTION_MODE mode,
895 int16_t mode_context) {
896 (void)counts;
897
898 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
899 if (mode == NEWMV) {
900 #if CONFIG_ENTROPY_STATS
901 ++counts->newmv_mode[mode_ctx][0];
902 #endif
903 update_cdf(fc->newmv_cdf[mode_ctx], 0, 2);
904 return;
905 }
906
907 #if CONFIG_ENTROPY_STATS
908 ++counts->newmv_mode[mode_ctx][1];
909 #endif
910 update_cdf(fc->newmv_cdf[mode_ctx], 1, 2);
911
912 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
913 if (mode == GLOBALMV) {
914 #if CONFIG_ENTROPY_STATS
915 ++counts->zeromv_mode[mode_ctx][0];
916 #endif
917 update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2);
918 return;
919 }
920
921 #if CONFIG_ENTROPY_STATS
922 ++counts->zeromv_mode[mode_ctx][1];
923 #endif
924 update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2);
925
926 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
927 #if CONFIG_ENTROPY_STATS
928 ++counts->refmv_mode[mode_ctx][mode != NEARESTMV];
929 #endif
930 update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2);
931 }
932
update_palette_cdf(MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,FRAME_COUNTS * counts)933 static AOM_INLINE void update_palette_cdf(MACROBLOCKD *xd,
934 const MB_MODE_INFO *const mbmi,
935 FRAME_COUNTS *counts) {
936 FRAME_CONTEXT *fc = xd->tile_ctx;
937 const BLOCK_SIZE bsize = mbmi->sb_type;
938 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
939 const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize);
940
941 (void)counts;
942
943 if (mbmi->mode == DC_PRED) {
944 const int n = pmi->palette_size[0];
945 const int palette_mode_ctx = av1_get_palette_mode_ctx(xd);
946
947 #if CONFIG_ENTROPY_STATS
948 ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0];
949 #endif
950 update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx],
951 n > 0, 2);
952 if (n > 0) {
953 #if CONFIG_ENTROPY_STATS
954 ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
955 #endif
956 update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx],
957 n - PALETTE_MIN_SIZE, PALETTE_SIZES);
958 }
959 }
960
961 if (mbmi->uv_mode == UV_DC_PRED) {
962 const int n = pmi->palette_size[1];
963 const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
964
965 #if CONFIG_ENTROPY_STATS
966 ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0];
967 #endif
968 update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2);
969
970 if (n > 0) {
971 #if CONFIG_ENTROPY_STATS
972 ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
973 #endif
974 update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx],
975 n - PALETTE_MIN_SIZE, PALETTE_SIZES);
976 }
977 }
978 }
979
sum_intra_stats(const AV1_COMMON * const cm,FRAME_COUNTS * counts,MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,const MB_MODE_INFO * above_mi,const MB_MODE_INFO * left_mi,const int intraonly)980 static AOM_INLINE void sum_intra_stats(const AV1_COMMON *const cm,
981 FRAME_COUNTS *counts, MACROBLOCKD *xd,
982 const MB_MODE_INFO *const mbmi,
983 const MB_MODE_INFO *above_mi,
984 const MB_MODE_INFO *left_mi,
985 const int intraonly) {
986 FRAME_CONTEXT *fc = xd->tile_ctx;
987 const PREDICTION_MODE y_mode = mbmi->mode;
988 (void)counts;
989 const BLOCK_SIZE bsize = mbmi->sb_type;
990
991 if (intraonly) {
992 #if CONFIG_ENTROPY_STATS
993 const PREDICTION_MODE above = av1_above_block_mode(above_mi);
994 const PREDICTION_MODE left = av1_left_block_mode(left_mi);
995 const int above_ctx = intra_mode_context[above];
996 const int left_ctx = intra_mode_context[left];
997 ++counts->kf_y_mode[above_ctx][left_ctx][y_mode];
998 #endif // CONFIG_ENTROPY_STATS
999 update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES);
1000 } else {
1001 #if CONFIG_ENTROPY_STATS
1002 ++counts->y_mode[size_group_lookup[bsize]][y_mode];
1003 #endif // CONFIG_ENTROPY_STATS
1004 update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES);
1005 }
1006
1007 if (av1_filter_intra_allowed(cm, mbmi)) {
1008 const int use_filter_intra_mode =
1009 mbmi->filter_intra_mode_info.use_filter_intra;
1010 #if CONFIG_ENTROPY_STATS
1011 ++counts->filter_intra[mbmi->sb_type][use_filter_intra_mode];
1012 if (use_filter_intra_mode) {
1013 ++counts
1014 ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode];
1015 }
1016 #endif // CONFIG_ENTROPY_STATS
1017 update_cdf(fc->filter_intra_cdfs[mbmi->sb_type], use_filter_intra_mode, 2);
1018 if (use_filter_intra_mode) {
1019 update_cdf(fc->filter_intra_mode_cdf,
1020 mbmi->filter_intra_mode_info.filter_intra_mode,
1021 FILTER_INTRA_MODES);
1022 }
1023 }
1024 if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
1025 #if CONFIG_ENTROPY_STATS
1026 ++counts->angle_delta[mbmi->mode - V_PRED]
1027 [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA];
1028 #endif
1029 update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED],
1030 mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA,
1031 2 * MAX_ANGLE_DELTA + 1);
1032 }
1033
1034 if (!xd->is_chroma_ref) return;
1035
1036 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
1037 const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd);
1038 #if CONFIG_ENTROPY_STATS
1039 ++counts->uv_mode[cfl_allowed][y_mode][uv_mode];
1040 #endif // CONFIG_ENTROPY_STATS
1041 update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode,
1042 UV_INTRA_MODES - !cfl_allowed);
1043 if (uv_mode == UV_CFL_PRED) {
1044 const int8_t joint_sign = mbmi->cfl_alpha_signs;
1045 const uint8_t idx = mbmi->cfl_alpha_idx;
1046
1047 #if CONFIG_ENTROPY_STATS
1048 ++counts->cfl_sign[joint_sign];
1049 #endif
1050 update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS);
1051 if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
1052 aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
1053
1054 #if CONFIG_ENTROPY_STATS
1055 ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)];
1056 #endif
1057 update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE);
1058 }
1059 if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
1060 aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
1061
1062 #if CONFIG_ENTROPY_STATS
1063 ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)];
1064 #endif
1065 update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE);
1066 }
1067 }
1068 if (av1_is_directional_mode(get_uv_mode(uv_mode)) &&
1069 av1_use_angle_delta(bsize)) {
1070 #if CONFIG_ENTROPY_STATS
1071 ++counts->angle_delta[uv_mode - UV_V_PRED]
1072 [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA];
1073 #endif
1074 update_cdf(fc->angle_delta_cdf[uv_mode - UV_V_PRED],
1075 mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA,
1076 2 * MAX_ANGLE_DELTA + 1);
1077 }
1078 if (av1_allow_palette(cm->features.allow_screen_content_tools, bsize)) {
1079 update_palette_cdf(xd, mbmi, counts);
1080 }
1081 }
1082
update_stats(const AV1_COMMON * const cm,ThreadData * td)1083 static AOM_INLINE void update_stats(const AV1_COMMON *const cm,
1084 ThreadData *td) {
1085 MACROBLOCK *x = &td->mb;
1086 MACROBLOCKD *const xd = &x->e_mbd;
1087 const MB_MODE_INFO *const mbmi = xd->mi[0];
1088 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1089 const CurrentFrame *const current_frame = &cm->current_frame;
1090 const BLOCK_SIZE bsize = mbmi->sb_type;
1091 FRAME_CONTEXT *fc = xd->tile_ctx;
1092 const int seg_ref_active =
1093 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1094
1095 if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active &&
1096 is_comp_ref_allowed(bsize)) {
1097 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1098 #if CONFIG_ENTROPY_STATS
1099 td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++;
1100 #endif
1101 update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2);
1102 }
1103
1104 if (!mbmi->skip_mode && !seg_ref_active) {
1105 const int skip_ctx = av1_get_skip_context(xd);
1106 #if CONFIG_ENTROPY_STATS
1107 td->counts->skip[skip_ctx][mbmi->skip]++;
1108 #endif
1109 update_cdf(fc->skip_cdfs[skip_ctx], mbmi->skip, 2);
1110 }
1111
1112 #if CONFIG_ENTROPY_STATS
1113 // delta quant applies to both intra and inter
1114 const int super_block_upper_left =
1115 ((xd->mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
1116 ((xd->mi_col & (cm->seq_params.mib_size - 1)) == 0);
1117 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
1118 if (delta_q_info->delta_q_present_flag &&
1119 (bsize != cm->seq_params.sb_size || !mbmi->skip) &&
1120 super_block_upper_left) {
1121 const int dq =
1122 (mbmi->current_qindex - xd->current_qindex) / delta_q_info->delta_q_res;
1123 const int absdq = abs(dq);
1124 for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) {
1125 td->counts->delta_q[i][1]++;
1126 }
1127 if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
1128 if (delta_q_info->delta_lf_present_flag) {
1129 if (delta_q_info->delta_lf_multi) {
1130 const int frame_lf_count =
1131 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1132 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
1133 const int delta_lf = (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) /
1134 delta_q_info->delta_lf_res;
1135 const int abs_delta_lf = abs(delta_lf);
1136 for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
1137 td->counts->delta_lf_multi[lf_id][i][1]++;
1138 }
1139 if (abs_delta_lf < DELTA_LF_SMALL)
1140 td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++;
1141 }
1142 } else {
1143 const int delta_lf =
1144 (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
1145 delta_q_info->delta_lf_res;
1146 const int abs_delta_lf = abs(delta_lf);
1147 for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
1148 td->counts->delta_lf[i][1]++;
1149 }
1150 if (abs_delta_lf < DELTA_LF_SMALL)
1151 td->counts->delta_lf[abs_delta_lf][0]++;
1152 }
1153 }
1154 }
1155 #endif
1156
1157 if (!is_inter_block(mbmi)) {
1158 sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi,
1159 frame_is_intra_only(cm));
1160 }
1161
1162 if (av1_allow_intrabc(cm)) {
1163 update_cdf(fc->intrabc_cdf, is_intrabc_block(mbmi), 2);
1164 #if CONFIG_ENTROPY_STATS
1165 ++td->counts->intrabc[is_intrabc_block(mbmi)];
1166 #endif // CONFIG_ENTROPY_STATS
1167 }
1168
1169 if (frame_is_intra_only(cm) || mbmi->skip_mode) return;
1170
1171 FRAME_COUNTS *const counts = td->counts;
1172 const int inter_block = is_inter_block(mbmi);
1173
1174 if (!seg_ref_active) {
1175 #if CONFIG_ENTROPY_STATS
1176 counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
1177 #endif
1178 update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
1179 inter_block, 2);
1180 // If the segment reference feature is enabled we have only a single
1181 // reference frame allowed for the segment so exclude it from
1182 // the reference frame counts used to work out probabilities.
1183 if (inter_block) {
1184 const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
1185 const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
1186 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1187 if (is_comp_ref_allowed(bsize)) {
1188 #if CONFIG_ENTROPY_STATS
1189 counts->comp_inter[av1_get_reference_mode_context(xd)]
1190 [has_second_ref(mbmi)]++;
1191 #endif // CONFIG_ENTROPY_STATS
1192 update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi), 2);
1193 }
1194 }
1195
1196 if (has_second_ref(mbmi)) {
1197 const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
1198 ? UNIDIR_COMP_REFERENCE
1199 : BIDIR_COMP_REFERENCE;
1200 update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type,
1201 COMP_REFERENCE_TYPES);
1202 #if CONFIG_ENTROPY_STATS
1203 counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
1204 [comp_ref_type]++;
1205 #endif // CONFIG_ENTROPY_STATS
1206
1207 if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
1208 const int bit = (ref0 == BWDREF_FRAME);
1209 update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2);
1210 #if CONFIG_ENTROPY_STATS
1211 counts
1212 ->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0][bit]++;
1213 #endif // CONFIG_ENTROPY_STATS
1214 if (!bit) {
1215 const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
1216 update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2);
1217 #if CONFIG_ENTROPY_STATS
1218 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
1219 [bit1]++;
1220 #endif // CONFIG_ENTROPY_STATS
1221 if (bit1) {
1222 update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd),
1223 ref1 == GOLDEN_FRAME, 2);
1224 #if CONFIG_ENTROPY_STATS
1225 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)][2]
1226 [ref1 == GOLDEN_FRAME]++;
1227 #endif // CONFIG_ENTROPY_STATS
1228 }
1229 }
1230 } else {
1231 const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
1232 update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2);
1233 #if CONFIG_ENTROPY_STATS
1234 counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++;
1235 #endif // CONFIG_ENTROPY_STATS
1236 if (!bit) {
1237 update_cdf(av1_get_pred_cdf_comp_ref_p1(xd), ref0 == LAST2_FRAME,
1238 2);
1239 #if CONFIG_ENTROPY_STATS
1240 counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1]
1241 [ref0 == LAST2_FRAME]++;
1242 #endif // CONFIG_ENTROPY_STATS
1243 } else {
1244 update_cdf(av1_get_pred_cdf_comp_ref_p2(xd), ref0 == GOLDEN_FRAME,
1245 2);
1246 #if CONFIG_ENTROPY_STATS
1247 counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2]
1248 [ref0 == GOLDEN_FRAME]++;
1249 #endif // CONFIG_ENTROPY_STATS
1250 }
1251 update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd), ref1 == ALTREF_FRAME,
1252 2);
1253 #if CONFIG_ENTROPY_STATS
1254 counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0]
1255 [ref1 == ALTREF_FRAME]++;
1256 #endif // CONFIG_ENTROPY_STATS
1257 if (ref1 != ALTREF_FRAME) {
1258 update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd),
1259 ref1 == ALTREF2_FRAME, 2);
1260 #if CONFIG_ENTROPY_STATS
1261 counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1]
1262 [ref1 == ALTREF2_FRAME]++;
1263 #endif // CONFIG_ENTROPY_STATS
1264 }
1265 }
1266 } else {
1267 const int bit = (ref0 >= BWDREF_FRAME);
1268 update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2);
1269 #if CONFIG_ENTROPY_STATS
1270 counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++;
1271 #endif // CONFIG_ENTROPY_STATS
1272 if (bit) {
1273 assert(ref0 <= ALTREF_FRAME);
1274 update_cdf(av1_get_pred_cdf_single_ref_p2(xd), ref0 == ALTREF_FRAME,
1275 2);
1276 #if CONFIG_ENTROPY_STATS
1277 counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1]
1278 [ref0 == ALTREF_FRAME]++;
1279 #endif // CONFIG_ENTROPY_STATS
1280 if (ref0 != ALTREF_FRAME) {
1281 update_cdf(av1_get_pred_cdf_single_ref_p6(xd),
1282 ref0 == ALTREF2_FRAME, 2);
1283 #if CONFIG_ENTROPY_STATS
1284 counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5]
1285 [ref0 == ALTREF2_FRAME]++;
1286 #endif // CONFIG_ENTROPY_STATS
1287 }
1288 } else {
1289 const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
1290 update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2);
1291 #if CONFIG_ENTROPY_STATS
1292 counts->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++;
1293 #endif // CONFIG_ENTROPY_STATS
1294 if (!bit1) {
1295 update_cdf(av1_get_pred_cdf_single_ref_p4(xd), ref0 != LAST_FRAME,
1296 2);
1297 #if CONFIG_ENTROPY_STATS
1298 counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3]
1299 [ref0 != LAST_FRAME]++;
1300 #endif // CONFIG_ENTROPY_STATS
1301 } else {
1302 update_cdf(av1_get_pred_cdf_single_ref_p5(xd), ref0 != LAST3_FRAME,
1303 2);
1304 #if CONFIG_ENTROPY_STATS
1305 counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4]
1306 [ref0 != LAST3_FRAME]++;
1307 #endif // CONFIG_ENTROPY_STATS
1308 }
1309 }
1310 }
1311
1312 if (cm->seq_params.enable_interintra_compound &&
1313 is_interintra_allowed(mbmi)) {
1314 const int bsize_group = size_group_lookup[bsize];
1315 if (mbmi->ref_frame[1] == INTRA_FRAME) {
1316 #if CONFIG_ENTROPY_STATS
1317 counts->interintra[bsize_group][1]++;
1318 #endif
1319 update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
1320 #if CONFIG_ENTROPY_STATS
1321 counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
1322 #endif
1323 update_cdf(fc->interintra_mode_cdf[bsize_group],
1324 mbmi->interintra_mode, INTERINTRA_MODES);
1325 if (av1_is_wedge_used(bsize)) {
1326 #if CONFIG_ENTROPY_STATS
1327 counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
1328 #endif
1329 update_cdf(fc->wedge_interintra_cdf[bsize],
1330 mbmi->use_wedge_interintra, 2);
1331 if (mbmi->use_wedge_interintra) {
1332 #if CONFIG_ENTROPY_STATS
1333 counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++;
1334 #endif
1335 update_cdf(fc->wedge_idx_cdf[bsize], mbmi->interintra_wedge_index,
1336 16);
1337 }
1338 }
1339 } else {
1340 #if CONFIG_ENTROPY_STATS
1341 counts->interintra[bsize_group][0]++;
1342 #endif
1343 update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
1344 }
1345 }
1346
1347 const MOTION_MODE motion_allowed =
1348 cm->features.switchable_motion_mode
1349 ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1350 cm->features.allow_warped_motion)
1351 : SIMPLE_TRANSLATION;
1352 if (mbmi->ref_frame[1] != INTRA_FRAME) {
1353 if (motion_allowed == WARPED_CAUSAL) {
1354 #if CONFIG_ENTROPY_STATS
1355 counts->motion_mode[bsize][mbmi->motion_mode]++;
1356 #endif
1357 update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode,
1358 MOTION_MODES);
1359 } else if (motion_allowed == OBMC_CAUSAL) {
1360 #if CONFIG_ENTROPY_STATS
1361 counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1362 #endif
1363 update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL, 2);
1364 }
1365 }
1366
1367 if (has_second_ref(mbmi)) {
1368 assert(current_frame->reference_mode != SINGLE_REFERENCE &&
1369 is_inter_compound_mode(mbmi->mode) &&
1370 mbmi->motion_mode == SIMPLE_TRANSLATION);
1371
1372 const int masked_compound_used = is_any_masked_compound_used(bsize) &&
1373 cm->seq_params.enable_masked_compound;
1374 if (masked_compound_used) {
1375 const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
1376 #if CONFIG_ENTROPY_STATS
1377 ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx];
1378 #endif
1379 update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx],
1380 mbmi->comp_group_idx, 2);
1381 }
1382
1383 if (mbmi->comp_group_idx == 0) {
1384 const int comp_index_ctx = get_comp_index_context(cm, xd);
1385 #if CONFIG_ENTROPY_STATS
1386 ++counts->compound_index[comp_index_ctx][mbmi->compound_idx];
1387 #endif
1388 update_cdf(fc->compound_index_cdf[comp_index_ctx], mbmi->compound_idx,
1389 2);
1390 } else {
1391 assert(masked_compound_used);
1392 if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1393 #if CONFIG_ENTROPY_STATS
1394 ++counts->compound_type[bsize][mbmi->interinter_comp.type -
1395 COMPOUND_WEDGE];
1396 #endif
1397 update_cdf(fc->compound_type_cdf[bsize],
1398 mbmi->interinter_comp.type - COMPOUND_WEDGE,
1399 MASKED_COMPOUND_TYPES);
1400 }
1401 }
1402 }
1403 if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
1404 if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1405 #if CONFIG_ENTROPY_STATS
1406 counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++;
1407 #endif
1408 update_cdf(fc->wedge_idx_cdf[bsize],
1409 mbmi->interinter_comp.wedge_index, 16);
1410 }
1411 }
1412 }
1413 }
1414
1415 if (inter_block && cm->features.interp_filter == SWITCHABLE &&
1416 mbmi->motion_mode != WARPED_CAUSAL &&
1417 !is_nontrans_global_motion(xd, mbmi)) {
1418 update_filter_type_cdf(xd, mbmi);
1419 }
1420 if (inter_block &&
1421 !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
1422 const PREDICTION_MODE mode = mbmi->mode;
1423 const int16_t mode_ctx =
1424 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1425 if (has_second_ref(mbmi)) {
1426 #if CONFIG_ENTROPY_STATS
1427 ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
1428 #endif
1429 update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
1430 INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
1431 } else {
1432 update_inter_mode_stats(fc, counts, mode, mode_ctx);
1433 }
1434
1435 const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV;
1436 if (new_mv) {
1437 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1438 for (int idx = 0; idx < 2; ++idx) {
1439 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1440 const uint8_t drl_ctx =
1441 av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1442 update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx, 2);
1443 #if CONFIG_ENTROPY_STATS
1444 ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
1445 #endif
1446 if (mbmi->ref_mv_idx == idx) break;
1447 }
1448 }
1449 }
1450
1451 if (have_nearmv_in_inter_mode(mbmi->mode)) {
1452 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1453 for (int idx = 1; idx < 3; ++idx) {
1454 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1455 const uint8_t drl_ctx =
1456 av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1457 update_cdf(fc->drl_cdf[drl_ctx], mbmi->ref_mv_idx != idx - 1, 2);
1458 #if CONFIG_ENTROPY_STATS
1459 ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
1460 #endif
1461 if (mbmi->ref_mv_idx == idx - 1) break;
1462 }
1463 }
1464 }
1465 if (have_newmv_in_inter_mode(mbmi->mode)) {
1466 const int allow_hp = cm->features.cur_frame_force_integer_mv
1467 ? MV_SUBPEL_NONE
1468 : cm->features.allow_high_precision_mv;
1469 if (new_mv) {
1470 for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
1471 const int_mv ref_mv = av1_get_ref_mv(x, ref);
1472 av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1473 allow_hp);
1474 }
1475 } else if (mbmi->mode == NEAREST_NEWMV || mbmi->mode == NEAR_NEWMV) {
1476 const int ref = 1;
1477 const int_mv ref_mv = av1_get_ref_mv(x, ref);
1478 av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1479 allow_hp);
1480 } else if (mbmi->mode == NEW_NEARESTMV || mbmi->mode == NEW_NEARMV) {
1481 const int ref = 0;
1482 const int_mv ref_mv = av1_get_ref_mv(x, ref);
1483 av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
1484 allow_hp);
1485 }
1486 }
1487 }
1488 }
1489
restore_context(MACROBLOCK * x,const RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)1490 static AOM_INLINE void restore_context(MACROBLOCK *x,
1491 const RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
1492 int mi_row, int mi_col, BLOCK_SIZE bsize,
1493 const int num_planes) {
1494 MACROBLOCKD *xd = &x->e_mbd;
1495 int p;
1496 const int num_4x4_blocks_wide = mi_size_wide[bsize];
1497 const int num_4x4_blocks_high = mi_size_high[bsize];
1498 int mi_width = mi_size_wide[bsize];
1499 int mi_height = mi_size_high[bsize];
1500 for (p = 0; p < num_planes; p++) {
1501 int tx_col = mi_col;
1502 int tx_row = mi_row & MAX_MIB_MASK;
1503 memcpy(
1504 xd->above_entropy_context[p] + (tx_col >> xd->plane[p].subsampling_x),
1505 ctx->a + num_4x4_blocks_wide * p,
1506 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1507 xd->plane[p].subsampling_x);
1508 memcpy(xd->left_entropy_context[p] + (tx_row >> xd->plane[p].subsampling_y),
1509 ctx->l + num_4x4_blocks_high * p,
1510 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1511 xd->plane[p].subsampling_y);
1512 }
1513 memcpy(xd->above_partition_context + mi_col, ctx->sa,
1514 sizeof(*xd->above_partition_context) * mi_width);
1515 memcpy(xd->left_partition_context + (mi_row & MAX_MIB_MASK), ctx->sl,
1516 sizeof(xd->left_partition_context[0]) * mi_height);
1517 xd->above_txfm_context = ctx->p_ta;
1518 xd->left_txfm_context = ctx->p_tl;
1519 memcpy(xd->above_txfm_context, ctx->ta,
1520 sizeof(*xd->above_txfm_context) * mi_width);
1521 memcpy(xd->left_txfm_context, ctx->tl,
1522 sizeof(*xd->left_txfm_context) * mi_height);
1523 }
1524
save_context(const MACROBLOCK * x,RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)1525 static AOM_INLINE void save_context(const MACROBLOCK *x,
1526 RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
1527 int mi_row, int mi_col, BLOCK_SIZE bsize,
1528 const int num_planes) {
1529 const MACROBLOCKD *xd = &x->e_mbd;
1530 int p;
1531 int mi_width = mi_size_wide[bsize];
1532 int mi_height = mi_size_high[bsize];
1533
1534 // buffer the above/left context information of the block in search.
1535 for (p = 0; p < num_planes; ++p) {
1536 int tx_col = mi_col;
1537 int tx_row = mi_row & MAX_MIB_MASK;
1538 memcpy(
1539 ctx->a + mi_width * p,
1540 xd->above_entropy_context[p] + (tx_col >> xd->plane[p].subsampling_x),
1541 (sizeof(ENTROPY_CONTEXT) * mi_width) >> xd->plane[p].subsampling_x);
1542 memcpy(ctx->l + mi_height * p,
1543 xd->left_entropy_context[p] + (tx_row >> xd->plane[p].subsampling_y),
1544 (sizeof(ENTROPY_CONTEXT) * mi_height) >> xd->plane[p].subsampling_y);
1545 }
1546 memcpy(ctx->sa, xd->above_partition_context + mi_col,
1547 sizeof(*xd->above_partition_context) * mi_width);
1548 memcpy(ctx->sl, xd->left_partition_context + (mi_row & MAX_MIB_MASK),
1549 sizeof(xd->left_partition_context[0]) * mi_height);
1550 memcpy(ctx->ta, xd->above_txfm_context,
1551 sizeof(*xd->above_txfm_context) * mi_width);
1552 memcpy(ctx->tl, xd->left_txfm_context,
1553 sizeof(*xd->left_txfm_context) * mi_height);
1554 ctx->p_ta = xd->above_txfm_context;
1555 ctx->p_tl = xd->left_txfm_context;
1556 }
1557
encode_b(const AV1_COMP * const cpi,TileDataEnc * tile_data,ThreadData * td,TOKENEXTRA ** tp,int mi_row,int mi_col,RUN_TYPE dry_run,BLOCK_SIZE bsize,PARTITION_TYPE partition,PICK_MODE_CONTEXT * const ctx,int * rate)1558 static AOM_INLINE void encode_b(const AV1_COMP *const cpi,
1559 TileDataEnc *tile_data, ThreadData *td,
1560 TOKENEXTRA **tp, int mi_row, int mi_col,
1561 RUN_TYPE dry_run, BLOCK_SIZE bsize,
1562 PARTITION_TYPE partition,
1563 PICK_MODE_CONTEXT *const ctx, int *rate) {
1564 TileInfo *const tile = &tile_data->tile_info;
1565 MACROBLOCK *const x = &td->mb;
1566 MACROBLOCKD *xd = &x->e_mbd;
1567
1568 set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
1569 const int origin_mult = x->rdmult;
1570 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
1571 MB_MODE_INFO *mbmi = xd->mi[0];
1572 mbmi->partition = partition;
1573 update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run);
1574
1575 if (!dry_run) {
1576 x->mbmi_ext_frame->cb_offset = x->cb_offset;
1577 assert(x->cb_offset <
1578 (1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size]));
1579 }
1580
1581 encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate);
1582
1583 if (!dry_run) {
1584 const AV1_COMMON *const cm = &cpi->common;
1585 x->cb_offset += block_size_wide[bsize] * block_size_high[bsize];
1586 if (bsize == cpi->common.seq_params.sb_size && mbmi->skip == 1 &&
1587 cm->delta_q_info.delta_lf_present_flag) {
1588 const int frame_lf_count =
1589 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1590 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
1591 mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
1592 mbmi->delta_lf_from_base = xd->delta_lf_from_base;
1593 }
1594 if (has_second_ref(mbmi)) {
1595 if (mbmi->compound_idx == 0 ||
1596 mbmi->interinter_comp.type == COMPOUND_AVERAGE)
1597 mbmi->comp_group_idx = 0;
1598 else
1599 mbmi->comp_group_idx = 1;
1600 }
1601
1602 // delta quant applies to both intra and inter
1603 const int super_block_upper_left =
1604 ((mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
1605 ((mi_col & (cm->seq_params.mib_size - 1)) == 0);
1606 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
1607 if (delta_q_info->delta_q_present_flag &&
1608 (bsize != cm->seq_params.sb_size || !mbmi->skip) &&
1609 super_block_upper_left) {
1610 xd->current_qindex = mbmi->current_qindex;
1611 if (delta_q_info->delta_lf_present_flag) {
1612 if (delta_q_info->delta_lf_multi) {
1613 const int frame_lf_count =
1614 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
1615 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
1616 xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
1617 }
1618 } else {
1619 xd->delta_lf_from_base = mbmi->delta_lf_from_base;
1620 }
1621 }
1622 }
1623
1624 RD_COUNTS *rdc = &td->rd_counts;
1625 if (mbmi->skip_mode) {
1626 assert(!frame_is_intra_only(cm));
1627 rdc->skip_mode_used_flag = 1;
1628 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
1629 assert(has_second_ref(mbmi));
1630 rdc->compound_ref_used_flag = 1;
1631 }
1632 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1633 } else {
1634 const int seg_ref_active =
1635 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1636 if (!seg_ref_active) {
1637 // If the segment reference feature is enabled we have only a single
1638 // reference frame allowed for the segment so exclude it from
1639 // the reference frame counts used to work out probabilities.
1640 if (is_inter_block(mbmi)) {
1641 av1_collect_neighbors_ref_counts(xd);
1642 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
1643 if (has_second_ref(mbmi)) {
1644 // This flag is also updated for 4x4 blocks
1645 rdc->compound_ref_used_flag = 1;
1646 }
1647 }
1648 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1649 }
1650 }
1651 }
1652
1653 if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
1654
1655 // Gather obmc and warped motion count to update the probability.
1656 if ((!cpi->sf.inter_sf.disable_obmc &&
1657 cpi->sf.inter_sf.prune_obmc_prob_thresh > 0) ||
1658 (cm->features.allow_warped_motion &&
1659 cpi->sf.inter_sf.prune_warped_prob_thresh > 0)) {
1660 const int inter_block = is_inter_block(mbmi);
1661 const int seg_ref_active =
1662 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
1663 if (!seg_ref_active && inter_block) {
1664 const MOTION_MODE motion_allowed =
1665 cm->features.switchable_motion_mode
1666 ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1667 cm->features.allow_warped_motion)
1668 : SIMPLE_TRANSLATION;
1669
1670 if (mbmi->ref_frame[1] != INTRA_FRAME) {
1671 if (motion_allowed >= OBMC_CAUSAL) {
1672 td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1673 }
1674 if (motion_allowed == WARPED_CAUSAL) {
1675 td->rd_counts.warped_used[mbmi->motion_mode == WARPED_CAUSAL]++;
1676 }
1677 }
1678 }
1679 }
1680 }
1681 // TODO(Ravi/Remya): Move this copy function to a better logical place
1682 // This function will copy the best mode information from block
1683 // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This
1684 // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during
1685 // bitstream preparation.
1686 av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, x->mbmi_ext,
1687 av1_ref_frame_type(xd->mi[0]->ref_frame));
1688 x->rdmult = origin_mult;
1689 }
1690
encode_sb(const AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,RUN_TYPE dry_run,BLOCK_SIZE bsize,PC_TREE * pc_tree,int * rate)1691 static AOM_INLINE void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
1692 TileDataEnc *tile_data, TOKENEXTRA **tp,
1693 int mi_row, int mi_col, RUN_TYPE dry_run,
1694 BLOCK_SIZE bsize, PC_TREE *pc_tree,
1695 int *rate) {
1696 assert(bsize < BLOCK_SIZES_ALL);
1697 const AV1_COMMON *const cm = &cpi->common;
1698 const CommonModeInfoParams *const mi_params = &cm->mi_params;
1699 MACROBLOCK *const x = &td->mb;
1700 MACROBLOCKD *const xd = &x->e_mbd;
1701 assert(bsize < BLOCK_SIZES_ALL);
1702 const int hbs = mi_size_wide[bsize] / 2;
1703 const int is_partition_root = bsize >= BLOCK_8X8;
1704 const int ctx = is_partition_root
1705 ? partition_plane_context(xd, mi_row, mi_col, bsize)
1706 : -1;
1707 const PARTITION_TYPE partition = pc_tree->partitioning;
1708 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1709 int quarter_step = mi_size_wide[bsize] / 4;
1710 int i;
1711 BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
1712
1713 if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
1714
1715 if (!dry_run && ctx >= 0) {
1716 const int has_rows = (mi_row + hbs) < mi_params->mi_rows;
1717 const int has_cols = (mi_col + hbs) < mi_params->mi_cols;
1718
1719 if (has_rows && has_cols) {
1720 #if CONFIG_ENTROPY_STATS
1721 td->counts->partition[ctx][partition]++;
1722 #endif
1723
1724 if (tile_data->allow_update_cdf) {
1725 FRAME_CONTEXT *fc = xd->tile_ctx;
1726 update_cdf(fc->partition_cdf[ctx], partition,
1727 partition_cdf_length(bsize));
1728 }
1729 }
1730 }
1731
1732 switch (partition) {
1733 case PARTITION_NONE:
1734 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1735 partition, &pc_tree->none, rate);
1736 break;
1737 case PARTITION_VERT:
1738 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1739 partition, &pc_tree->vertical[0], rate);
1740 if (mi_col + hbs < mi_params->mi_cols) {
1741 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1742 partition, &pc_tree->vertical[1], rate);
1743 }
1744 break;
1745 case PARTITION_HORZ:
1746 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1747 partition, &pc_tree->horizontal[0], rate);
1748 if (mi_row + hbs < mi_params->mi_rows) {
1749 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1750 partition, &pc_tree->horizontal[1], rate);
1751 }
1752 break;
1753 case PARTITION_SPLIT:
1754 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
1755 pc_tree->split[0], rate);
1756 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize,
1757 pc_tree->split[1], rate);
1758 encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize,
1759 pc_tree->split[2], rate);
1760 encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run,
1761 subsize, pc_tree->split[3], rate);
1762 break;
1763
1764 case PARTITION_HORZ_A:
1765 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1766 partition, &pc_tree->horizontala[0], rate);
1767 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1768 partition, &pc_tree->horizontala[1], rate);
1769 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1770 partition, &pc_tree->horizontala[2], rate);
1771 break;
1772 case PARTITION_HORZ_B:
1773 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1774 partition, &pc_tree->horizontalb[0], rate);
1775 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1776 partition, &pc_tree->horizontalb[1], rate);
1777 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1778 bsize2, partition, &pc_tree->horizontalb[2], rate);
1779 break;
1780 case PARTITION_VERT_A:
1781 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1782 partition, &pc_tree->verticala[0], rate);
1783 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1784 partition, &pc_tree->verticala[1], rate);
1785 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1786 partition, &pc_tree->verticala[2], rate);
1787
1788 break;
1789 case PARTITION_VERT_B:
1790 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1791 partition, &pc_tree->verticalb[0], rate);
1792 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1793 partition, &pc_tree->verticalb[1], rate);
1794 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1795 bsize2, partition, &pc_tree->verticalb[2], rate);
1796 break;
1797 case PARTITION_HORZ_4:
1798 for (i = 0; i < 4; ++i) {
1799 int this_mi_row = mi_row + i * quarter_step;
1800 if (i > 0 && this_mi_row >= mi_params->mi_rows) break;
1801
1802 encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize,
1803 partition, &pc_tree->horizontal4[i], rate);
1804 }
1805 break;
1806 case PARTITION_VERT_4:
1807 for (i = 0; i < 4; ++i) {
1808 int this_mi_col = mi_col + i * quarter_step;
1809 if (i > 0 && this_mi_col >= mi_params->mi_cols) break;
1810 encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize,
1811 partition, &pc_tree->vertical4[i], rate);
1812 }
1813 break;
1814 default: assert(0 && "Invalid partition type."); break;
1815 }
1816
1817 update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
1818 }
1819
set_partial_sb_partition(const AV1_COMMON * const cm,MB_MODE_INFO * mi,int bh_in,int bw_in,int mi_rows_remaining,int mi_cols_remaining,BLOCK_SIZE bsize,MB_MODE_INFO ** mib)1820 static AOM_INLINE void set_partial_sb_partition(
1821 const AV1_COMMON *const cm, MB_MODE_INFO *mi, int bh_in, int bw_in,
1822 int mi_rows_remaining, int mi_cols_remaining, BLOCK_SIZE bsize,
1823 MB_MODE_INFO **mib) {
1824 int bh = bh_in;
1825 int r, c;
1826 for (r = 0; r < cm->seq_params.mib_size; r += bh) {
1827 int bw = bw_in;
1828 for (c = 0; c < cm->seq_params.mib_size; c += bw) {
1829 const int grid_index = get_mi_grid_idx(&cm->mi_params, r, c);
1830 const int mi_index = get_alloc_mi_idx(&cm->mi_params, r, c);
1831 mib[grid_index] = mi + mi_index;
1832 mib[grid_index]->sb_type = find_partition_size(
1833 bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw);
1834 }
1835 }
1836 }
1837
1838 // This function attempts to set all mode info entries in a given superblock
1839 // to the same block partition size.
1840 // However, at the bottom and right borders of the image the requested size
1841 // may not be allowed in which case this code attempts to choose the largest
1842 // allowable partition.
set_fixed_partitioning(AV1_COMP * cpi,const TileInfo * const tile,MB_MODE_INFO ** mib,int mi_row,int mi_col,BLOCK_SIZE bsize)1843 static AOM_INLINE void set_fixed_partitioning(AV1_COMP *cpi,
1844 const TileInfo *const tile,
1845 MB_MODE_INFO **mib, int mi_row,
1846 int mi_col, BLOCK_SIZE bsize) {
1847 AV1_COMMON *const cm = &cpi->common;
1848 const CommonModeInfoParams *const mi_params = &cm->mi_params;
1849 const int mi_rows_remaining = tile->mi_row_end - mi_row;
1850 const int mi_cols_remaining = tile->mi_col_end - mi_col;
1851 MB_MODE_INFO *const mi_upper_left =
1852 mi_params->mi_alloc + get_alloc_mi_idx(mi_params, mi_row, mi_col);
1853 int bh = mi_size_high[bsize];
1854 int bw = mi_size_wide[bsize];
1855
1856 assert(bsize >= mi_params->mi_alloc_bsize &&
1857 "Attempted to use bsize < mi_params->mi_alloc_bsize");
1858 assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
1859
1860 // Apply the requested partition size to the SB if it is all "in image"
1861 if ((mi_cols_remaining >= cm->seq_params.mib_size) &&
1862 (mi_rows_remaining >= cm->seq_params.mib_size)) {
1863 for (int block_row = 0; block_row < cm->seq_params.mib_size;
1864 block_row += bh) {
1865 for (int block_col = 0; block_col < cm->seq_params.mib_size;
1866 block_col += bw) {
1867 const int grid_index = get_mi_grid_idx(mi_params, block_row, block_col);
1868 const int mi_index = get_alloc_mi_idx(mi_params, block_row, block_col);
1869 mib[grid_index] = mi_upper_left + mi_index;
1870 mib[grid_index]->sb_type = bsize;
1871 }
1872 }
1873 } else {
1874 // Else this is a partial SB.
1875 set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining,
1876 mi_cols_remaining, bsize, mib);
1877 }
1878 }
1879
rd_use_partition(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MB_MODE_INFO ** mib,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate,int64_t * dist,int do_recon,PC_TREE * pc_tree)1880 static AOM_INLINE void rd_use_partition(
1881 AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, MB_MODE_INFO **mib,
1882 TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate,
1883 int64_t *dist, int do_recon, PC_TREE *pc_tree) {
1884 AV1_COMMON *const cm = &cpi->common;
1885 const CommonModeInfoParams *const mi_params = &cm->mi_params;
1886 const int num_planes = av1_num_planes(cm);
1887 TileInfo *const tile_info = &tile_data->tile_info;
1888 MACROBLOCK *const x = &td->mb;
1889 MACROBLOCKD *const xd = &x->e_mbd;
1890 const int bs = mi_size_wide[bsize];
1891 const int hbs = bs / 2;
1892 int i;
1893 const int pl = (bsize >= BLOCK_8X8)
1894 ? partition_plane_context(xd, mi_row, mi_col, bsize)
1895 : 0;
1896 const PARTITION_TYPE partition =
1897 (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
1898 : PARTITION_NONE;
1899 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1900 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
1901 RD_STATS last_part_rdc, none_rdc, chosen_rdc, invalid_rdc;
1902 BLOCK_SIZE sub_subsize = BLOCK_4X4;
1903 int splits_below = 0;
1904 BLOCK_SIZE bs_type = mib[0]->sb_type;
1905 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
1906
1907 if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
1908
1909 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
1910
1911 av1_invalid_rd_stats(&last_part_rdc);
1912 av1_invalid_rd_stats(&none_rdc);
1913 av1_invalid_rd_stats(&chosen_rdc);
1914 av1_invalid_rd_stats(&invalid_rdc);
1915
1916 pc_tree->partitioning = partition;
1917
1918 xd->above_txfm_context =
1919 cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
1920 xd->left_txfm_context =
1921 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
1922 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1923
1924 if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
1925 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1926 x->mb_energy = av1_log_block_var(cpi, x, bsize);
1927 }
1928
1929 // Save rdmult before it might be changed, so it can be restored later.
1930 const int orig_rdmult = x->rdmult;
1931 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
1932
1933 if (cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION &&
1934 (cpi->sf.part_sf.adjust_var_based_rd_partitioning == 2 ||
1935 (cpi->sf.part_sf.adjust_var_based_rd_partitioning == 1 &&
1936 cm->quant_params.base_qindex > 190 && bsize <= BLOCK_32X32 &&
1937 !frame_is_intra_only(cm)))) {
1938 // Check if any of the sub blocks are further split.
1939 if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
1940 sub_subsize = get_partition_subsize(subsize, PARTITION_SPLIT);
1941 splits_below = 1;
1942 for (i = 0; i < 4; i++) {
1943 int jj = i >> 1, ii = i & 0x01;
1944 MB_MODE_INFO *this_mi = mib[jj * hbs * mi_params->mi_stride + ii * hbs];
1945 if (this_mi && this_mi->sb_type >= sub_subsize) {
1946 splits_below = 0;
1947 }
1948 }
1949 }
1950
1951 // If partition is not none try none unless each of the 4 splits are split
1952 // even further..
1953 if (partition != PARTITION_NONE && !splits_below &&
1954 mi_row + hbs < mi_params->mi_rows &&
1955 mi_col + hbs < mi_params->mi_cols) {
1956 pc_tree->partitioning = PARTITION_NONE;
1957 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
1958 PARTITION_NONE, bsize, ctx_none, invalid_rdc, PICK_MODE_RD);
1959
1960 if (none_rdc.rate < INT_MAX) {
1961 none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
1962 none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
1963 }
1964
1965 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1966 mib[0]->sb_type = bs_type;
1967 pc_tree->partitioning = partition;
1968 }
1969 }
1970
1971 switch (partition) {
1972 case PARTITION_NONE:
1973 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1974 PARTITION_NONE, bsize, ctx_none, invalid_rdc, PICK_MODE_RD);
1975 break;
1976 case PARTITION_HORZ:
1977 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1978 PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
1979 invalid_rdc, PICK_MODE_RD);
1980 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1981 mi_row + hbs < mi_params->mi_rows) {
1982 RD_STATS tmp_rdc;
1983 const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
1984 av1_init_rd_stats(&tmp_rdc);
1985 update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
1986 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
1987 NULL);
1988 pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
1989 PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
1990 invalid_rdc, PICK_MODE_RD);
1991 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1992 av1_invalid_rd_stats(&last_part_rdc);
1993 break;
1994 }
1995 last_part_rdc.rate += tmp_rdc.rate;
1996 last_part_rdc.dist += tmp_rdc.dist;
1997 last_part_rdc.rdcost += tmp_rdc.rdcost;
1998 }
1999 break;
2000 case PARTITION_VERT:
2001 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2002 PARTITION_VERT, subsize, &pc_tree->vertical[0], invalid_rdc,
2003 PICK_MODE_RD);
2004 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2005 mi_col + hbs < mi_params->mi_cols) {
2006 RD_STATS tmp_rdc;
2007 const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
2008 av1_init_rd_stats(&tmp_rdc);
2009 update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1);
2010 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize,
2011 NULL);
2012 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
2013 PARTITION_VERT, subsize,
2014 &pc_tree->vertical[bsize > BLOCK_8X8], invalid_rdc,
2015 PICK_MODE_RD);
2016 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2017 av1_invalid_rd_stats(&last_part_rdc);
2018 break;
2019 }
2020 last_part_rdc.rate += tmp_rdc.rate;
2021 last_part_rdc.dist += tmp_rdc.dist;
2022 last_part_rdc.rdcost += tmp_rdc.rdcost;
2023 }
2024 break;
2025 case PARTITION_SPLIT:
2026 if (cpi->sf.part_sf.adjust_var_based_rd_partitioning == 1 &&
2027 none_rdc.rate < INT_MAX && none_rdc.skip == 1) {
2028 av1_invalid_rd_stats(&last_part_rdc);
2029 break;
2030 }
2031 last_part_rdc.rate = 0;
2032 last_part_rdc.dist = 0;
2033 last_part_rdc.rdcost = 0;
2034 for (i = 0; i < 4; i++) {
2035 int x_idx = (i & 1) * hbs;
2036 int y_idx = (i >> 1) * hbs;
2037 int jj = i >> 1, ii = i & 0x01;
2038 RD_STATS tmp_rdc;
2039 if ((mi_row + y_idx >= mi_params->mi_rows) ||
2040 (mi_col + x_idx >= mi_params->mi_cols))
2041 continue;
2042
2043 av1_init_rd_stats(&tmp_rdc);
2044 rd_use_partition(cpi, td, tile_data,
2045 mib + jj * hbs * mi_params->mi_stride + ii * hbs, tp,
2046 mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
2047 &tmp_rdc.dist, i != 3, pc_tree->split[i]);
2048 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2049 av1_invalid_rd_stats(&last_part_rdc);
2050 break;
2051 }
2052 last_part_rdc.rate += tmp_rdc.rate;
2053 last_part_rdc.dist += tmp_rdc.dist;
2054 }
2055 break;
2056 case PARTITION_VERT_A:
2057 case PARTITION_VERT_B:
2058 case PARTITION_HORZ_A:
2059 case PARTITION_HORZ_B:
2060 case PARTITION_HORZ_4:
2061 case PARTITION_VERT_4:
2062 assert(0 && "Cannot handle extended partition types");
2063 default: assert(0); break;
2064 }
2065
2066 if (last_part_rdc.rate < INT_MAX) {
2067 last_part_rdc.rate += x->partition_cost[pl][partition];
2068 last_part_rdc.rdcost =
2069 RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
2070 }
2071
2072 if ((cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION &&
2073 cpi->sf.part_sf.adjust_var_based_rd_partitioning > 2) &&
2074 partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
2075 (mi_row + bs < mi_params->mi_rows ||
2076 mi_row + hbs == mi_params->mi_rows) &&
2077 (mi_col + bs < mi_params->mi_cols ||
2078 mi_col + hbs == mi_params->mi_cols)) {
2079 BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2080 chosen_rdc.rate = 0;
2081 chosen_rdc.dist = 0;
2082
2083 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2084 pc_tree->partitioning = PARTITION_SPLIT;
2085
2086 // Split partition.
2087 for (i = 0; i < 4; i++) {
2088 int x_idx = (i & 1) * hbs;
2089 int y_idx = (i >> 1) * hbs;
2090 RD_STATS tmp_rdc;
2091
2092 if ((mi_row + y_idx >= mi_params->mi_rows) ||
2093 (mi_col + x_idx >= mi_params->mi_cols))
2094 continue;
2095
2096 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2097 pc_tree->split[i]->partitioning = PARTITION_NONE;
2098 pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
2099 PARTITION_SPLIT, split_subsize, &pc_tree->split[i]->none,
2100 invalid_rdc, PICK_MODE_RD);
2101
2102 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2103 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2104 av1_invalid_rd_stats(&chosen_rdc);
2105 break;
2106 }
2107
2108 chosen_rdc.rate += tmp_rdc.rate;
2109 chosen_rdc.dist += tmp_rdc.dist;
2110
2111 if (i != 3)
2112 encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
2113 OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL);
2114
2115 chosen_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
2116 }
2117 if (chosen_rdc.rate < INT_MAX) {
2118 chosen_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
2119 chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
2120 }
2121 }
2122
2123 // If last_part is better set the partitioning to that.
2124 if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
2125 mib[0]->sb_type = bsize;
2126 if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
2127 chosen_rdc = last_part_rdc;
2128 }
2129 // If none was better set the partitioning to that.
2130 if (none_rdc.rdcost < chosen_rdc.rdcost) {
2131 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
2132 chosen_rdc = none_rdc;
2133 }
2134
2135 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2136
2137 // We must have chosen a partitioning and encoding or we'll fail later on.
2138 // No other opportunities for success.
2139 if (bsize == cm->seq_params.sb_size)
2140 assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
2141
2142 if (do_recon) {
2143 if (bsize == cm->seq_params.sb_size) {
2144 // NOTE: To get estimate for rate due to the tokens, use:
2145 // int rate_coeffs = 0;
2146 // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
2147 // bsize, pc_tree, &rate_coeffs);
2148 x->cb_offset = 0;
2149 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
2150 pc_tree, NULL);
2151 } else {
2152 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
2153 pc_tree, NULL);
2154 }
2155 }
2156
2157 *rate = chosen_rdc.rate;
2158 *dist = chosen_rdc.dist;
2159 x->rdmult = orig_rdmult;
2160 }
2161
is_leaf_split_partition(AV1_COMMON * cm,int mi_row,int mi_col,BLOCK_SIZE bsize)2162 static int is_leaf_split_partition(AV1_COMMON *cm, int mi_row, int mi_col,
2163 BLOCK_SIZE bsize) {
2164 const int bs = mi_size_wide[bsize];
2165 const int hbs = bs / 2;
2166 assert(bsize >= BLOCK_8X8);
2167 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2168
2169 for (int i = 0; i < 4; i++) {
2170 int x_idx = (i & 1) * hbs;
2171 int y_idx = (i >> 1) * hbs;
2172 if ((mi_row + y_idx >= cm->mi_params.mi_rows) ||
2173 (mi_col + x_idx >= cm->mi_params.mi_cols))
2174 return 0;
2175 if (get_partition(cm, mi_row + y_idx, mi_col + x_idx, subsize) !=
2176 PARTITION_NONE &&
2177 subsize != BLOCK_8X8)
2178 return 0;
2179 }
2180 return 1;
2181 }
2182
do_slipt_check(BLOCK_SIZE bsize)2183 static AOM_INLINE int do_slipt_check(BLOCK_SIZE bsize) {
2184 return (bsize == BLOCK_16X16 || bsize == BLOCK_32X32);
2185 }
2186
nonrd_use_partition(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MB_MODE_INFO ** mib,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,PC_TREE * pc_tree)2187 static AOM_INLINE void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
2188 TileDataEnc *tile_data,
2189 MB_MODE_INFO **mib, TOKENEXTRA **tp,
2190 int mi_row, int mi_col,
2191 BLOCK_SIZE bsize, PC_TREE *pc_tree) {
2192 AV1_COMMON *const cm = &cpi->common;
2193 const CommonModeInfoParams *const mi_params = &cm->mi_params;
2194 TileInfo *const tile_info = &tile_data->tile_info;
2195 MACROBLOCK *const x = &td->mb;
2196 MACROBLOCKD *const xd = &x->e_mbd;
2197 // Only square blocks from 8x8 to 128x128 are supported
2198 assert(bsize >= BLOCK_8X8 && bsize <= BLOCK_128X128);
2199 const int bs = mi_size_wide[bsize];
2200 const int hbs = bs / 2;
2201 const PARTITION_TYPE partition =
2202 (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
2203 : PARTITION_NONE;
2204 BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
2205 assert(subsize <= BLOCK_LARGEST);
2206 const int pl = (bsize >= BLOCK_8X8)
2207 ? partition_plane_context(xd, mi_row, mi_col, bsize)
2208 : 0;
2209
2210 RD_STATS dummy_cost;
2211 av1_invalid_rd_stats(&dummy_cost);
2212 RD_STATS invalid_rd;
2213 av1_invalid_rd_stats(&invalid_rd);
2214
2215 if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
2216
2217 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2218
2219 pc_tree->partitioning = partition;
2220
2221 xd->above_txfm_context =
2222 cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2223 xd->left_txfm_context =
2224 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2225
2226 switch (partition) {
2227 case PARTITION_NONE:
2228 if (cpi->sf.rt_sf.nonrd_check_partition_split && do_slipt_check(bsize) &&
2229 !frame_is_intra_only(cm)) {
2230 RD_STATS split_rdc, none_rdc, block_rdc;
2231 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2232
2233 av1_init_rd_stats(&split_rdc);
2234 av1_invalid_rd_stats(&none_rdc);
2235
2236 save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2237 subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2238 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
2239 PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
2240 PICK_MODE_NONRD);
2241 none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
2242 none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
2243 restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2244
2245 for (int i = 0; i < 4; i++) {
2246 av1_invalid_rd_stats(&block_rdc);
2247 const int x_idx = (i & 1) * hbs;
2248 const int y_idx = (i >> 1) * hbs;
2249 if (mi_row + y_idx >= mi_params->mi_rows ||
2250 mi_col + x_idx >= mi_params->mi_cols)
2251 continue;
2252 xd->above_txfm_context =
2253 cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
2254 xd->left_txfm_context =
2255 xd->left_txfm_context_buffer + ((mi_row + y_idx) & MAX_MIB_MASK);
2256 pc_tree->split[i]->partitioning = PARTITION_NONE;
2257 pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2258 &block_rdc, PARTITION_NONE, subsize,
2259 &pc_tree->split[i]->none, invalid_rd, PICK_MODE_NONRD);
2260 split_rdc.rate += block_rdc.rate;
2261 split_rdc.dist += block_rdc.dist;
2262
2263 encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
2264 subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
2265 }
2266 split_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
2267 split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
2268 restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2269
2270 if (none_rdc.rdcost < split_rdc.rdcost) {
2271 mib[0]->sb_type = bsize;
2272 pc_tree->partitioning = PARTITION_NONE;
2273 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
2274 &pc_tree->none, NULL);
2275 } else {
2276 mib[0]->sb_type = subsize;
2277 pc_tree->partitioning = PARTITION_SPLIT;
2278 for (int i = 0; i < 4; i++) {
2279 const int x_idx = (i & 1) * hbs;
2280 const int y_idx = (i >> 1) * hbs;
2281 if (mi_row + y_idx >= mi_params->mi_rows ||
2282 mi_col + x_idx >= mi_params->mi_cols)
2283 continue;
2284
2285 encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0,
2286 subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
2287 }
2288 }
2289
2290 } else {
2291 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
2292 PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
2293 PICK_MODE_NONRD);
2294 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
2295 &pc_tree->none, NULL);
2296 }
2297 break;
2298 case PARTITION_VERT:
2299 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
2300 PARTITION_VERT, subsize, &pc_tree->vertical[0], invalid_rd,
2301 PICK_MODE_NONRD);
2302 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
2303 PARTITION_VERT, &pc_tree->vertical[0], NULL);
2304 if (mi_col + hbs < mi_params->mi_cols && bsize > BLOCK_8X8) {
2305 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &dummy_cost,
2306 PARTITION_VERT, subsize, &pc_tree->vertical[1],
2307 invalid_rd, PICK_MODE_NONRD);
2308 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, 0, subsize,
2309 PARTITION_VERT, &pc_tree->vertical[1], NULL);
2310 }
2311 break;
2312 case PARTITION_HORZ:
2313 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
2314 PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
2315 invalid_rd, PICK_MODE_NONRD);
2316 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, subsize,
2317 PARTITION_HORZ, &pc_tree->horizontal[0], NULL);
2318
2319 if (mi_row + hbs < mi_params->mi_rows && bsize > BLOCK_8X8) {
2320 pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &dummy_cost,
2321 PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
2322 invalid_rd, PICK_MODE_NONRD);
2323 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, 0, subsize,
2324 PARTITION_HORZ, &pc_tree->horizontal[1], NULL);
2325 }
2326 break;
2327 case PARTITION_SPLIT:
2328 if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode &&
2329 is_leaf_split_partition(cm, mi_row, mi_col, bsize) &&
2330 !frame_is_intra_only(cm) && bsize <= BLOCK_32X32) {
2331 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2332 RD_STATS split_rdc, none_rdc;
2333 av1_invalid_rd_stats(&split_rdc);
2334 av1_invalid_rd_stats(&none_rdc);
2335 save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2336 xd->above_txfm_context =
2337 cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2338 xd->left_txfm_context =
2339 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2340 pc_tree->partitioning = PARTITION_NONE;
2341 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
2342 PARTITION_NONE, bsize, &pc_tree->none, invalid_rd,
2343 PICK_MODE_NONRD);
2344 none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
2345 none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
2346 restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2347 if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode != 2 ||
2348 none_rdc.skip != 1 || pc_tree->none.mic.mode == NEWMV) {
2349 av1_init_rd_stats(&split_rdc);
2350 for (int i = 0; i < 4; i++) {
2351 RD_STATS block_rdc;
2352 av1_invalid_rd_stats(&block_rdc);
2353 int x_idx = (i & 1) * hbs;
2354 int y_idx = (i >> 1) * hbs;
2355 if ((mi_row + y_idx >= mi_params->mi_rows) ||
2356 (mi_col + x_idx >= mi_params->mi_cols))
2357 continue;
2358 xd->above_txfm_context =
2359 cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
2360 xd->left_txfm_context = xd->left_txfm_context_buffer +
2361 ((mi_row + y_idx) & MAX_MIB_MASK);
2362 pc_tree->split[i]->partitioning = PARTITION_NONE;
2363 pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2364 &block_rdc, PARTITION_NONE, subsize,
2365 &pc_tree->split[i]->none, invalid_rd,
2366 PICK_MODE_NONRD);
2367 split_rdc.rate += block_rdc.rate;
2368 split_rdc.dist += block_rdc.dist;
2369
2370 encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
2371 subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
2372 }
2373 restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
2374 split_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
2375 split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
2376 }
2377 if (none_rdc.rdcost < split_rdc.rdcost) {
2378 mib[0]->sb_type = bsize;
2379 pc_tree->partitioning = PARTITION_NONE;
2380 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize, partition,
2381 &pc_tree->none, NULL);
2382 } else {
2383 mib[0]->sb_type = subsize;
2384 pc_tree->partitioning = PARTITION_SPLIT;
2385 for (int i = 0; i < 4; i++) {
2386 int x_idx = (i & 1) * hbs;
2387 int y_idx = (i >> 1) * hbs;
2388 if ((mi_row + y_idx >= mi_params->mi_rows) ||
2389 (mi_col + x_idx >= mi_params->mi_cols))
2390 continue;
2391
2392 encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 0,
2393 subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
2394 }
2395 }
2396 } else {
2397 for (int i = 0; i < 4; i++) {
2398 int x_idx = (i & 1) * hbs;
2399 int y_idx = (i >> 1) * hbs;
2400 int jj = i >> 1, ii = i & 0x01;
2401 if ((mi_row + y_idx >= mi_params->mi_rows) ||
2402 (mi_col + x_idx >= mi_params->mi_cols))
2403 continue;
2404 nonrd_use_partition(cpi, td, tile_data,
2405 mib + jj * hbs * mi_params->mi_stride + ii * hbs,
2406 tp, mi_row + y_idx, mi_col + x_idx, subsize,
2407 pc_tree->split[i]);
2408 }
2409 }
2410 break;
2411 case PARTITION_VERT_A:
2412 case PARTITION_VERT_B:
2413 case PARTITION_HORZ_A:
2414 case PARTITION_HORZ_B:
2415 case PARTITION_HORZ_4:
2416 case PARTITION_VERT_4:
2417 assert(0 && "Cannot handle extended partition types");
2418 default: assert(0); break;
2419 }
2420 }
2421
2422 #if !CONFIG_REALTIME_ONLY
read_one_frame_stats(const TWO_PASS * p,int frm)2423 static const FIRSTPASS_STATS *read_one_frame_stats(const TWO_PASS *p, int frm) {
2424 assert(frm >= 0);
2425 if (frm < 0 ||
2426 p->stats_buf_ctx->stats_in_start + frm > p->stats_buf_ctx->stats_in_end) {
2427 return NULL;
2428 }
2429
2430 return &p->stats_buf_ctx->stats_in_start[frm];
2431 }
2432 // Checks to see if a super block is on a horizontal image edge.
2433 // In most cases this is the "real" edge unless there are formatting
2434 // bars embedded in the stream.
active_h_edge(const AV1_COMP * cpi,int mi_row,int mi_step)2435 static int active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
2436 int top_edge = 0;
2437 int bottom_edge = cpi->common.mi_params.mi_rows;
2438 int is_active_h_edge = 0;
2439
2440 // For two pass account for any formatting bars detected.
2441 if (is_stat_consumption_stage_twopass(cpi)) {
2442 const AV1_COMMON *const cm = &cpi->common;
2443 const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats(
2444 &cpi->twopass, cm->current_frame.display_order_hint);
2445 if (this_frame_stats == NULL) return AOM_CODEC_ERROR;
2446
2447 // The inactive region is specified in MBs not mi units.
2448 // The image edge is in the following MB row.
2449 top_edge += (int)(this_frame_stats->inactive_zone_rows * 4);
2450
2451 bottom_edge -= (int)(this_frame_stats->inactive_zone_rows * 4);
2452 bottom_edge = AOMMAX(top_edge, bottom_edge);
2453 }
2454
2455 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
2456 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
2457 is_active_h_edge = 1;
2458 }
2459 return is_active_h_edge;
2460 }
2461
2462 // Checks to see if a super block is on a vertical image edge.
2463 // In most cases this is the "real" edge unless there are formatting
2464 // bars embedded in the stream.
active_v_edge(const AV1_COMP * cpi,int mi_col,int mi_step)2465 static int active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
2466 int left_edge = 0;
2467 int right_edge = cpi->common.mi_params.mi_cols;
2468 int is_active_v_edge = 0;
2469
2470 // For two pass account for any formatting bars detected.
2471 if (is_stat_consumption_stage_twopass(cpi)) {
2472 const AV1_COMMON *const cm = &cpi->common;
2473 const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats(
2474 &cpi->twopass, cm->current_frame.display_order_hint);
2475 if (this_frame_stats == NULL) return AOM_CODEC_ERROR;
2476
2477 // The inactive region is specified in MBs not mi units.
2478 // The image edge is in the following MB row.
2479 left_edge += (int)(this_frame_stats->inactive_zone_cols * 4);
2480
2481 right_edge -= (int)(this_frame_stats->inactive_zone_cols * 4);
2482 right_edge = AOMMAX(left_edge, right_edge);
2483 }
2484
2485 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
2486 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
2487 is_active_v_edge = 1;
2488 }
2489 return is_active_v_edge;
2490 }
2491 #endif // !CONFIG_REALTIME_ONLY
2492
store_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)2493 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2494 memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
2495 }
2496
load_pred_mv(MACROBLOCK * x,const PICK_MODE_CONTEXT * const ctx)2497 static INLINE void load_pred_mv(MACROBLOCK *x,
2498 const PICK_MODE_CONTEXT *const ctx) {
2499 memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
2500 }
2501
2502 #if !CONFIG_REALTIME_ONLY
2503 // Try searching for an encoding for the given subblock. Returns zero if the
2504 // rdcost is already too high (to tell the caller not to bother searching for
2505 // encodings of further subblocks)
rd_try_subblock(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int is_last,int mi_row,int mi_col,BLOCK_SIZE subsize,RD_STATS best_rdcost,RD_STATS * sum_rdc,PARTITION_TYPE partition,PICK_MODE_CONTEXT * prev_ctx,PICK_MODE_CONTEXT * this_ctx)2506 static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
2507 TileDataEnc *tile_data, TOKENEXTRA **tp, int is_last,
2508 int mi_row, int mi_col, BLOCK_SIZE subsize,
2509 RD_STATS best_rdcost, RD_STATS *sum_rdc,
2510 PARTITION_TYPE partition,
2511 PICK_MODE_CONTEXT *prev_ctx,
2512 PICK_MODE_CONTEXT *this_ctx) {
2513 MACROBLOCK *const x = &td->mb;
2514 const int orig_mult = x->rdmult;
2515 setup_block_rdmult(cpi, x, mi_row, mi_col, subsize, NO_AQ, NULL);
2516
2517 av1_rd_cost_update(x->rdmult, &best_rdcost);
2518 if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, prev_ctx);
2519
2520 RD_STATS rdcost_remaining;
2521 av1_rd_stats_subtraction(x->rdmult, &best_rdcost, sum_rdc, &rdcost_remaining);
2522 RD_STATS this_rdc;
2523 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, partition,
2524 subsize, this_ctx, rdcost_remaining, PICK_MODE_RD);
2525
2526 if (this_rdc.rate == INT_MAX) {
2527 sum_rdc->rdcost = INT64_MAX;
2528 } else {
2529 sum_rdc->rate += this_rdc.rate;
2530 sum_rdc->dist += this_rdc.dist;
2531 av1_rd_cost_update(x->rdmult, sum_rdc);
2532 }
2533
2534 if (sum_rdc->rdcost >= best_rdcost.rdcost) {
2535 x->rdmult = orig_mult;
2536 return 0;
2537 }
2538
2539 if (!is_last) {
2540 update_state(cpi, td, this_ctx, mi_row, mi_col, subsize, 1);
2541 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL);
2542 }
2543
2544 x->rdmult = orig_mult;
2545 return 1;
2546 }
2547
rd_test_partition3(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,PC_TREE * pc_tree,RD_STATS * best_rdc,PICK_MODE_CONTEXT ctxs[3],PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,PARTITION_TYPE partition,int mi_row0,int mi_col0,BLOCK_SIZE subsize0,int mi_row1,int mi_col1,BLOCK_SIZE subsize1,int mi_row2,int mi_col2,BLOCK_SIZE subsize2)2548 static bool rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
2549 TileDataEnc *tile_data, TOKENEXTRA **tp,
2550 PC_TREE *pc_tree, RD_STATS *best_rdc,
2551 PICK_MODE_CONTEXT ctxs[3],
2552 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
2553 BLOCK_SIZE bsize, PARTITION_TYPE partition,
2554 int mi_row0, int mi_col0, BLOCK_SIZE subsize0,
2555 int mi_row1, int mi_col1, BLOCK_SIZE subsize1,
2556 int mi_row2, int mi_col2, BLOCK_SIZE subsize2) {
2557 const MACROBLOCK *const x = &td->mb;
2558 const MACROBLOCKD *const xd = &x->e_mbd;
2559 const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2560 RD_STATS sum_rdc;
2561 av1_init_rd_stats(&sum_rdc);
2562 sum_rdc.rate = x->partition_cost[pl][partition];
2563 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
2564 if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0,
2565 *best_rdc, &sum_rdc, partition, ctx, &ctxs[0]))
2566 return false;
2567
2568 if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1,
2569 *best_rdc, &sum_rdc, partition, &ctxs[0], &ctxs[1]))
2570 return false;
2571
2572 if (!rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2,
2573 *best_rdc, &sum_rdc, partition, &ctxs[1], &ctxs[2]))
2574 return false;
2575
2576 av1_rd_cost_update(x->rdmult, &sum_rdc);
2577 if (sum_rdc.rdcost >= best_rdc->rdcost) return false;
2578 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
2579 if (sum_rdc.rdcost >= best_rdc->rdcost) return false;
2580
2581 *best_rdc = sum_rdc;
2582 pc_tree->partitioning = partition;
2583 return true;
2584 }
2585
reset_partition(PC_TREE * pc_tree,BLOCK_SIZE bsize)2586 static AOM_INLINE void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
2587 pc_tree->partitioning = PARTITION_NONE;
2588 pc_tree->none.rd_stats.skip = 0;
2589
2590 if (bsize >= BLOCK_8X8) {
2591 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2592 for (int idx = 0; idx < 4; ++idx)
2593 reset_partition(pc_tree->split[idx], subsize);
2594 }
2595 }
2596
2597 // Record the ref frames that have been selected by square partition blocks.
update_picked_ref_frames_mask(MACROBLOCK * const x,int ref_type,BLOCK_SIZE bsize,int mib_size,int mi_row,int mi_col)2598 static AOM_INLINE void update_picked_ref_frames_mask(MACROBLOCK *const x,
2599 int ref_type,
2600 BLOCK_SIZE bsize,
2601 int mib_size, int mi_row,
2602 int mi_col) {
2603 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2604 const int sb_size_mask = mib_size - 1;
2605 const int mi_row_in_sb = mi_row & sb_size_mask;
2606 const int mi_col_in_sb = mi_col & sb_size_mask;
2607 const int mi_size = mi_size_wide[bsize];
2608 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_size; ++i) {
2609 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_size; ++j) {
2610 x->picked_ref_frames_mask[i * 32 + j] |= 1 << ref_type;
2611 }
2612 }
2613 }
2614
2615 // Structure to keep win flags for HORZ and VERT partition evaluations
2616 typedef struct {
2617 bool horz_win;
2618 bool vert_win;
2619 } RD_RECT_PART_WIN_INFO;
2620
2621 // Decide whether to evaluate the AB partition specified by part_type based on
2622 // split and HORZ/VERT info
evaluate_ab_partition_based_on_split(PC_TREE * pc_tree,PARTITION_TYPE rect_part,RD_RECT_PART_WIN_INFO * rect_part_win_info,int qindex,int split_idx1,int split_idx2)2623 int evaluate_ab_partition_based_on_split(
2624 PC_TREE *pc_tree, PARTITION_TYPE rect_part,
2625 RD_RECT_PART_WIN_INFO *rect_part_win_info, int qindex, int split_idx1,
2626 int split_idx2) {
2627 int num_win = 0;
2628 // Threshold for number of winners
2629 // Conservative pruning for high quantizers
2630 const int num_win_thresh = AOMMIN(3 * (2 * (MAXQ - qindex) / MAXQ), 3);
2631 bool sub_part_win = (rect_part_win_info == NULL)
2632 ? (pc_tree->partitioning == rect_part)
2633 : (rect_part == PARTITION_HORZ)
2634 ? rect_part_win_info->horz_win
2635 : rect_part_win_info->vert_win;
2636 num_win += (sub_part_win) ? 1 : 0;
2637 num_win +=
2638 (pc_tree->split[split_idx1]->partitioning == PARTITION_NONE) ? 1 : 0;
2639 num_win +=
2640 (pc_tree->split[split_idx2]->partitioning == PARTITION_NONE) ? 1 : 0;
2641 if (num_win < num_win_thresh) {
2642 return 0;
2643 }
2644 return 1;
2645 }
2646
2647 // Searches for the best partition pattern for a block based on the
2648 // rate-distortion cost, and returns a bool value to indicate whether a valid
2649 // partition pattern is found. The partition can recursively go down to
2650 // the smallest block size.
2651 //
2652 // Inputs:
2653 // cpi: the global compressor setting
2654 // td: thread data
2655 // tile_data: tile data
2656 // tp: the pointer to the start token
2657 // mi_row: row coordinate of the block in a step size of MI_SIZE
2658 // mi_col: column coordinate of the block in a step size of MI_SIZE
2659 // bsize: block size
2660 // max_sq_part: the largest square block size for prediction blocks
2661 // min_sq_part: the smallest square block size for prediction blocks
2662 // rd_cost: the pointer to the final rd cost of the current block
2663 // best_rdc: the upper bound of rd cost for a valid partition
2664 // pc_tree: the pointer to the PC_TREE node storing the picked partitions
2665 // and mode info for the current block
2666 // none_rd: the pointer to the rd cost in the case of not splitting the
2667 // current block
2668 // multi_pass_mode: SB_SINGLE_PASS/SB_DRY_PASS/SB_WET_PASS
2669 // rect_part_win_info: the pointer to a struct storing whether horz/vert
2670 // partition outperforms previously tested partitions
2671 //
2672 // Output:
2673 // a bool value indicating whether a valid partition is found
rd_pick_partition(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,BLOCK_SIZE max_sq_part,BLOCK_SIZE min_sq_part,RD_STATS * rd_cost,RD_STATS best_rdc,PC_TREE * pc_tree,int64_t * none_rd,SB_MULTI_PASS_MODE multi_pass_mode,RD_RECT_PART_WIN_INFO * rect_part_win_info)2674 static bool rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
2675 TileDataEnc *tile_data, TOKENEXTRA **tp,
2676 int mi_row, int mi_col, BLOCK_SIZE bsize,
2677 BLOCK_SIZE max_sq_part, BLOCK_SIZE min_sq_part,
2678 RD_STATS *rd_cost, RD_STATS best_rdc,
2679 PC_TREE *pc_tree, int64_t *none_rd,
2680 SB_MULTI_PASS_MODE multi_pass_mode,
2681 RD_RECT_PART_WIN_INFO *rect_part_win_info) {
2682 const AV1_COMMON *const cm = &cpi->common;
2683 const CommonModeInfoParams *const mi_params = &cm->mi_params;
2684 const int num_planes = av1_num_planes(cm);
2685 TileInfo *const tile_info = &tile_data->tile_info;
2686 MACROBLOCK *const x = &td->mb;
2687 MACROBLOCKD *const xd = &x->e_mbd;
2688 const int mi_step = mi_size_wide[bsize] / 2;
2689 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2690 const TOKENEXTRA *const tp_orig = *tp;
2691 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
2692 int tmp_partition_cost[PARTITION_TYPES];
2693 BLOCK_SIZE subsize;
2694 RD_STATS this_rdc, sum_rdc;
2695 const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
2696 int do_square_split = bsize_at_least_8x8;
2697 const int pl = bsize_at_least_8x8
2698 ? partition_plane_context(xd, mi_row, mi_col, bsize)
2699 : 0;
2700 const int *partition_cost = x->partition_cost[pl];
2701
2702 int do_rectangular_split = cpi->oxcf.enable_rect_partitions;
2703 int64_t cur_none_rd = 0;
2704 int64_t split_rd[4] = { 0, 0, 0, 0 };
2705 int64_t horz_rd[2] = { 0, 0 };
2706 int64_t vert_rd[2] = { 0, 0 };
2707 int prune_horz = 0;
2708 int prune_vert = 0;
2709 int terminate_partition_search = 0;
2710
2711 int split_ctx_is_ready[2] = { 0, 0 };
2712 int horz_ctx_is_ready = 0;
2713 int vert_ctx_is_ready = 0;
2714 BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
2715 // Initialise HORZ and VERT win flags as true for all split partitions
2716 RD_RECT_PART_WIN_INFO split_part_rect_win[4] = {
2717 { true, true }, { true, true }, { true, true }, { true, true }
2718 };
2719
2720 bool found_best_partition = false;
2721 if (best_rdc.rdcost < 0) {
2722 av1_invalid_rd_stats(rd_cost);
2723 return found_best_partition;
2724 }
2725
2726 if (frame_is_intra_only(cm) && bsize == BLOCK_64X64) {
2727 x->quad_tree_idx = 0;
2728 x->cnn_output_valid = 0;
2729 }
2730
2731 if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0;
2732
2733 // Override skipping rectangular partition operations for edge blocks
2734 const int has_rows = (mi_row + mi_step < mi_params->mi_rows);
2735 const int has_cols = (mi_col + mi_step < mi_params->mi_cols);
2736 const int xss = x->e_mbd.plane[1].subsampling_x;
2737 const int yss = x->e_mbd.plane[1].subsampling_y;
2738
2739 if (none_rd) *none_rd = 0;
2740 int partition_none_allowed = has_rows && has_cols;
2741 int partition_horz_allowed =
2742 has_cols && bsize_at_least_8x8 && cpi->oxcf.enable_rect_partitions &&
2743 get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ), xss,
2744 yss) != BLOCK_INVALID;
2745 int partition_vert_allowed =
2746 has_rows && bsize_at_least_8x8 && cpi->oxcf.enable_rect_partitions &&
2747 get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT), xss,
2748 yss) != BLOCK_INVALID;
2749
2750 (void)*tp_orig;
2751
2752 #if CONFIG_COLLECT_PARTITION_STATS
2753 int partition_decisions[EXT_PARTITION_TYPES] = { 0 };
2754 int partition_attempts[EXT_PARTITION_TYPES] = { 0 };
2755 int64_t partition_times[EXT_PARTITION_TYPES] = { 0 };
2756 struct aom_usec_timer partition_timer = { 0 };
2757 int partition_timer_on = 0;
2758 #if CONFIG_COLLECT_PARTITION_STATS == 2
2759 PartitionStats *part_stats = &cpi->partition_stats;
2760 #endif
2761 #endif
2762
2763 // Override partition costs at the edges of the frame in the same
2764 // way as in read_partition (see decodeframe.c)
2765 if (!(has_rows && has_cols)) {
2766 assert(bsize_at_least_8x8 && pl >= 0);
2767 const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
2768 const int max_cost = av1_cost_symbol(0);
2769 for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = max_cost;
2770 if (has_cols) {
2771 // At the bottom, the two possibilities are HORZ and SPLIT
2772 aom_cdf_prob bot_cdf[2];
2773 partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
2774 static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
2775 av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
2776 } else if (has_rows) {
2777 // At the right, the two possibilities are VERT and SPLIT
2778 aom_cdf_prob rhs_cdf[2];
2779 partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
2780 static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
2781 av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
2782 } else {
2783 // At the bottom right, we always split
2784 tmp_partition_cost[PARTITION_SPLIT] = 0;
2785 }
2786
2787 partition_cost = tmp_partition_cost;
2788 }
2789
2790 #ifndef NDEBUG
2791 // Nothing should rely on the default value of this array (which is just
2792 // leftover from encoding the previous block. Setting it to fixed pattern
2793 // when debugging.
2794 // bit 0, 1, 2 are blk_skip of each plane
2795 // bit 4, 5, 6 are initialization checking of each plane
2796 memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
2797 #endif // NDEBUG
2798
2799 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2800
2801 av1_init_rd_stats(&this_rdc);
2802
2803 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2804
2805 // Save rdmult before it might be changed, so it can be restored later.
2806 const int orig_rdmult = x->rdmult;
2807 setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
2808
2809 av1_rd_cost_update(x->rdmult, &best_rdc);
2810
2811 if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
2812 x->mb_energy = av1_log_block_var(cpi, x, bsize);
2813
2814 if (bsize > cpi->sf.part_sf.use_square_partition_only_threshold) {
2815 partition_horz_allowed &= !has_rows;
2816 partition_vert_allowed &= !has_cols;
2817 }
2818
2819 xd->above_txfm_context =
2820 cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
2821 xd->left_txfm_context =
2822 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2823 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2824
2825 const int try_intra_cnn_split =
2826 !cpi->is_screen_content_type && frame_is_intra_only(cm) &&
2827 cpi->sf.part_sf.intra_cnn_split &&
2828 cm->seq_params.sb_size >= BLOCK_64X64 && bsize <= BLOCK_64X64 &&
2829 bsize >= BLOCK_8X8 &&
2830 mi_row + mi_size_high[bsize] <= mi_params->mi_rows &&
2831 mi_col + mi_size_wide[bsize] <= mi_params->mi_cols;
2832
2833 if (try_intra_cnn_split) {
2834 av1_intra_mode_cnn_partition(
2835 &cpi->common, x, bsize, x->quad_tree_idx, &partition_none_allowed,
2836 &partition_horz_allowed, &partition_vert_allowed, &do_rectangular_split,
2837 &do_square_split);
2838 }
2839
2840 // Use simple_motion_search to prune partitions. This must be done prior to
2841 // PARTITION_SPLIT to propagate the initial mvs to a smaller blocksize.
2842 const int try_split_only =
2843 !cpi->is_screen_content_type &&
2844 cpi->sf.part_sf.simple_motion_search_split && do_square_split &&
2845 bsize >= BLOCK_8X8 &&
2846 mi_row + mi_size_high[bsize] <= mi_params->mi_rows &&
2847 mi_col + mi_size_wide[bsize] <= mi_params->mi_cols &&
2848 !frame_is_intra_only(cm) && !av1_superres_scaled(cm);
2849
2850 if (try_split_only) {
2851 av1_simple_motion_search_based_split(
2852 cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_none_allowed,
2853 &partition_horz_allowed, &partition_vert_allowed, &do_rectangular_split,
2854 &do_square_split);
2855 }
2856
2857 const int try_prune_rect =
2858 !cpi->is_screen_content_type &&
2859 cpi->sf.part_sf.simple_motion_search_prune_rect &&
2860 !frame_is_intra_only(cm) && do_rectangular_split &&
2861 (do_square_split || partition_none_allowed ||
2862 (prune_horz && prune_vert)) &&
2863 (partition_horz_allowed || partition_vert_allowed) && bsize >= BLOCK_8X8;
2864
2865 if (try_prune_rect) {
2866 av1_simple_motion_search_prune_rect(
2867 cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_horz_allowed,
2868 &partition_vert_allowed, &prune_horz, &prune_vert);
2869 }
2870
2871 // Max and min square partition levels are defined as the partition nodes that
2872 // the recursive function rd_pick_partition() can reach. To implement this:
2873 // only PARTITION_NONE is allowed if the current node equals min_sq_part,
2874 // only PARTITION_SPLIT is allowed if the current node exceeds max_sq_part.
2875 assert(block_size_wide[min_sq_part] == block_size_high[min_sq_part]);
2876 assert(block_size_wide[max_sq_part] == block_size_high[max_sq_part]);
2877 assert(min_sq_part <= max_sq_part);
2878 assert(block_size_wide[bsize] == block_size_high[bsize]);
2879 const int max_partition_size = block_size_wide[max_sq_part];
2880 const int min_partition_size = block_size_wide[min_sq_part];
2881 const int blksize = block_size_wide[bsize];
2882 assert(min_partition_size <= max_partition_size);
2883 const int is_le_min_sq_part = blksize <= min_partition_size;
2884 const int is_gt_max_sq_part = blksize > max_partition_size;
2885 if (is_gt_max_sq_part) {
2886 // If current block size is larger than max, only allow split.
2887 partition_none_allowed = 0;
2888 partition_horz_allowed = 0;
2889 partition_vert_allowed = 0;
2890 do_square_split = 1;
2891 } else if (is_le_min_sq_part) {
2892 // If current block size is less or equal to min, only allow none if valid
2893 // block large enough; only allow split otherwise.
2894 partition_horz_allowed = 0;
2895 partition_vert_allowed = 0;
2896 // only disable square split when current block is not at the picture
2897 // boundary. otherwise, inherit the square split flag from previous logic
2898 if (has_rows && has_cols) do_square_split = 0;
2899 partition_none_allowed = !do_square_split;
2900 }
2901
2902 BEGIN_PARTITION_SEARCH:
2903 if (x->must_find_valid_partition) {
2904 do_square_split = bsize_at_least_8x8 && (blksize > min_partition_size);
2905 partition_none_allowed =
2906 has_rows && has_cols && (blksize >= min_partition_size);
2907 partition_horz_allowed =
2908 has_cols && bsize_at_least_8x8 && cpi->oxcf.enable_rect_partitions &&
2909 (blksize > min_partition_size) &&
2910 get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ), xss,
2911 yss) != BLOCK_INVALID;
2912 partition_vert_allowed =
2913 has_rows && bsize_at_least_8x8 && cpi->oxcf.enable_rect_partitions &&
2914 (blksize > min_partition_size) &&
2915 get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT), xss,
2916 yss) != BLOCK_INVALID;
2917 terminate_partition_search = 0;
2918 }
2919
2920 // Partition block source pixel variance.
2921 unsigned int pb_source_variance = UINT_MAX;
2922
2923 // Partition block sse after simple motion compensation, not in use now,
2924 // but will be used for upcoming speed features
2925 unsigned int pb_simple_motion_pred_sse = UINT_MAX;
2926 (void)pb_simple_motion_pred_sse;
2927
2928 // PARTITION_NONE
2929 if (is_le_min_sq_part && has_rows && has_cols) partition_none_allowed = 1;
2930 assert(terminate_partition_search == 0);
2931 int64_t part_none_rd = INT64_MAX;
2932 if (cpi->is_screen_content_type)
2933 partition_none_allowed = has_rows && has_cols;
2934 if (partition_none_allowed && !is_gt_max_sq_part) {
2935 int pt_cost = 0;
2936 if (bsize_at_least_8x8) {
2937 pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
2938 ? partition_cost[PARTITION_NONE]
2939 : 0;
2940 }
2941 RD_STATS partition_rdcost;
2942 av1_init_rd_stats(&partition_rdcost);
2943 partition_rdcost.rate = pt_cost;
2944 av1_rd_cost_update(x->rdmult, &partition_rdcost);
2945 RD_STATS best_remain_rdcost;
2946 av1_rd_stats_subtraction(x->rdmult, &best_rdc, &partition_rdcost,
2947 &best_remain_rdcost);
2948 #if CONFIG_COLLECT_PARTITION_STATS
2949 if (best_remain_rdcost >= 0) {
2950 partition_attempts[PARTITION_NONE] += 1;
2951 aom_usec_timer_start(&partition_timer);
2952 partition_timer_on = 1;
2953 }
2954 #endif
2955 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE,
2956 bsize, ctx_none, best_remain_rdcost, PICK_MODE_RD);
2957 av1_rd_cost_update(x->rdmult, &this_rdc);
2958 #if CONFIG_COLLECT_PARTITION_STATS
2959 if (partition_timer_on) {
2960 aom_usec_timer_mark(&partition_timer);
2961 int64_t time = aom_usec_timer_elapsed(&partition_timer);
2962 partition_times[PARTITION_NONE] += time;
2963 partition_timer_on = 0;
2964 }
2965 #endif
2966 pb_source_variance = x->source_variance;
2967 pb_simple_motion_pred_sse = x->simple_motion_pred_sse;
2968 if (none_rd) *none_rd = this_rdc.rdcost;
2969 cur_none_rd = this_rdc.rdcost;
2970 if (this_rdc.rate != INT_MAX) {
2971 if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions) {
2972 const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame);
2973 update_picked_ref_frames_mask(x, ref_type, bsize,
2974 cm->seq_params.mib_size, mi_row, mi_col);
2975 }
2976 if (bsize_at_least_8x8) {
2977 this_rdc.rate += pt_cost;
2978 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
2979 }
2980
2981 part_none_rd = this_rdc.rdcost;
2982 if (this_rdc.rdcost < best_rdc.rdcost) {
2983 // Adjust dist breakout threshold according to the partition size.
2984 const int64_t dist_breakout_thr =
2985 cpi->sf.part_sf.partition_search_breakout_dist_thr >>
2986 ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
2987 (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
2988 const int rate_breakout_thr =
2989 cpi->sf.part_sf.partition_search_breakout_rate_thr *
2990 num_pels_log2_lookup[bsize];
2991
2992 best_rdc = this_rdc;
2993 found_best_partition = true;
2994 if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
2995
2996 if (!frame_is_intra_only(cm) &&
2997 (do_square_split || do_rectangular_split) &&
2998 !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
2999 const int use_ml_based_breakout =
3000 bsize <= cpi->sf.part_sf.use_square_partition_only_threshold &&
3001 bsize > BLOCK_4X4 && xd->bd == 8;
3002 if (use_ml_based_breakout) {
3003 if (av1_ml_predict_breakout(cpi, bsize, x, &this_rdc,
3004 pb_source_variance)) {
3005 do_square_split = 0;
3006 do_rectangular_split = 0;
3007 }
3008 }
3009
3010 // If all y, u, v transform blocks in this partition are skippable,
3011 // and the dist & rate are within the thresholds, the partition
3012 // search is terminated for current branch of the partition search
3013 // tree. The dist & rate thresholds are set to 0 at speed 0 to
3014 // disable the early termination at that speed.
3015 if (best_rdc.dist < dist_breakout_thr &&
3016 best_rdc.rate < rate_breakout_thr) {
3017 do_square_split = 0;
3018 do_rectangular_split = 0;
3019 }
3020 }
3021
3022 if (cpi->sf.part_sf.simple_motion_search_early_term_none &&
3023 cm->show_frame && !frame_is_intra_only(cm) &&
3024 bsize >= BLOCK_16X16 && mi_row + mi_step < mi_params->mi_rows &&
3025 mi_col + mi_step < mi_params->mi_cols &&
3026 this_rdc.rdcost < INT64_MAX && this_rdc.rdcost >= 0 &&
3027 this_rdc.rate < INT_MAX && this_rdc.rate >= 0 &&
3028 (do_square_split || do_rectangular_split)) {
3029 av1_simple_motion_search_early_term_none(cpi, x, pc_tree, mi_row,
3030 mi_col, bsize, &this_rdc,
3031 &terminate_partition_search);
3032 }
3033 }
3034 }
3035
3036 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3037 }
3038
3039 // store estimated motion vector
3040 if (cpi->sf.mv_sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
3041
3042 // PARTITION_SPLIT
3043 int64_t part_split_rd = INT64_MAX;
3044 if ((!terminate_partition_search && do_square_split) || is_gt_max_sq_part) {
3045 av1_init_rd_stats(&sum_rdc);
3046 subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
3047 sum_rdc.rate = partition_cost[PARTITION_SPLIT];
3048 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3049
3050 int idx;
3051 #if CONFIG_COLLECT_PARTITION_STATS
3052 if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
3053 partition_attempts[PARTITION_SPLIT] += 1;
3054 aom_usec_timer_start(&partition_timer);
3055 partition_timer_on = 1;
3056 }
3057 #endif
3058 for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
3059 const int x_idx = (idx & 1) * mi_step;
3060 const int y_idx = (idx >> 1) * mi_step;
3061
3062 if (mi_row + y_idx >= mi_params->mi_rows ||
3063 mi_col + x_idx >= mi_params->mi_cols)
3064 continue;
3065
3066 if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3067
3068 pc_tree->split[idx]->index = idx;
3069 int64_t *p_split_rd = &split_rd[idx];
3070
3071 RD_STATS best_remain_rdcost;
3072 av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3073 &best_remain_rdcost);
3074
3075 int curr_quad_tree_idx = 0;
3076 if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
3077 curr_quad_tree_idx = x->quad_tree_idx;
3078 x->quad_tree_idx = 4 * curr_quad_tree_idx + idx + 1;
3079 }
3080 if (!rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
3081 mi_col + x_idx, subsize, max_sq_part, min_sq_part,
3082 &this_rdc, best_remain_rdcost, pc_tree->split[idx],
3083 p_split_rd, multi_pass_mode,
3084 &split_part_rect_win[idx])) {
3085 av1_invalid_rd_stats(&sum_rdc);
3086 break;
3087 }
3088 if (frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
3089 x->quad_tree_idx = curr_quad_tree_idx;
3090 }
3091
3092 sum_rdc.rate += this_rdc.rate;
3093 sum_rdc.dist += this_rdc.dist;
3094 av1_rd_cost_update(x->rdmult, &sum_rdc);
3095 if (idx <= 1 && (bsize <= BLOCK_8X8 ||
3096 pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
3097 const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic;
3098 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3099 // Neither palette mode nor cfl predicted
3100 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3101 if (mbmi->uv_mode != UV_CFL_PRED) split_ctx_is_ready[idx] = 1;
3102 }
3103 }
3104 }
3105 #if CONFIG_COLLECT_PARTITION_STATS
3106 if (partition_timer_on) {
3107 aom_usec_timer_mark(&partition_timer);
3108 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3109 partition_times[PARTITION_SPLIT] += time;
3110 partition_timer_on = 0;
3111 }
3112 #endif
3113 const int reached_last_index = (idx == 4);
3114
3115 part_split_rd = sum_rdc.rdcost;
3116 if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
3117 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3118 if (sum_rdc.rdcost < best_rdc.rdcost) {
3119 best_rdc = sum_rdc;
3120 found_best_partition = true;
3121 pc_tree->partitioning = PARTITION_SPLIT;
3122 }
3123 } else if (cpi->sf.part_sf.less_rectangular_check_level > 0) {
3124 // Skip rectangular partition test when partition type none gives better
3125 // rd than partition type split.
3126 if (cpi->sf.part_sf.less_rectangular_check_level == 2 || idx <= 2) {
3127 const int partition_none_valid = cur_none_rd > 0;
3128 const int partition_none_better = cur_none_rd < sum_rdc.rdcost;
3129 do_rectangular_split &=
3130 !(partition_none_valid && partition_none_better);
3131 }
3132 }
3133
3134 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3135 } // if (do_split)
3136
3137 if (cpi->sf.part_sf.ml_early_term_after_part_split_level &&
3138 !frame_is_intra_only(cm) && !terminate_partition_search &&
3139 do_rectangular_split &&
3140 (partition_horz_allowed || partition_vert_allowed)) {
3141 av1_ml_early_term_after_split(cpi, x, pc_tree, bsize, best_rdc.rdcost,
3142 part_none_rd, part_split_rd, split_rd, mi_row,
3143 mi_col, &terminate_partition_search);
3144 }
3145
3146 if (!cpi->sf.part_sf.ml_early_term_after_part_split_level &&
3147 cpi->sf.part_sf.ml_prune_rect_partition && !frame_is_intra_only(cm) &&
3148 (partition_horz_allowed || partition_vert_allowed) &&
3149 !(prune_horz || prune_vert) && !terminate_partition_search) {
3150 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
3151 av1_ml_prune_rect_partition(cpi, x, bsize, best_rdc.rdcost, cur_none_rd,
3152 split_rd, &prune_horz, &prune_vert);
3153 }
3154
3155 // PARTITION_HORZ
3156 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz_allowed));
3157 if (!terminate_partition_search && partition_horz_allowed && !prune_horz &&
3158 (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
3159 !is_gt_max_sq_part) {
3160 av1_init_rd_stats(&sum_rdc);
3161 subsize = get_partition_subsize(bsize, PARTITION_HORZ);
3162 if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3163 sum_rdc.rate = partition_cost[PARTITION_HORZ];
3164 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3165 RD_STATS best_remain_rdcost;
3166 av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3167 &best_remain_rdcost);
3168 #if CONFIG_COLLECT_PARTITION_STATS
3169 if (best_remain_rdcost >= 0) {
3170 partition_attempts[PARTITION_HORZ] += 1;
3171 aom_usec_timer_start(&partition_timer);
3172 partition_timer_on = 1;
3173 }
3174 #endif
3175 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_HORZ,
3176 subsize, &pc_tree->horizontal[0], best_remain_rdcost,
3177 PICK_MODE_RD);
3178 av1_rd_cost_update(x->rdmult, &this_rdc);
3179
3180 if (this_rdc.rate == INT_MAX) {
3181 sum_rdc.rdcost = INT64_MAX;
3182 } else {
3183 sum_rdc.rate += this_rdc.rate;
3184 sum_rdc.dist += this_rdc.dist;
3185 av1_rd_cost_update(x->rdmult, &sum_rdc);
3186 }
3187 horz_rd[0] = this_rdc.rdcost;
3188
3189 if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) {
3190 const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
3191 const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic;
3192 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3193 // Neither palette mode nor cfl predicted
3194 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3195 if (mbmi->uv_mode != UV_CFL_PRED) horz_ctx_is_ready = 1;
3196 }
3197 update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1);
3198 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL);
3199
3200 if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
3201
3202 av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3203 &best_remain_rdcost);
3204
3205 pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
3206 PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
3207 best_remain_rdcost, PICK_MODE_RD);
3208 av1_rd_cost_update(x->rdmult, &this_rdc);
3209 horz_rd[1] = this_rdc.rdcost;
3210
3211 if (this_rdc.rate == INT_MAX) {
3212 sum_rdc.rdcost = INT64_MAX;
3213 } else {
3214 sum_rdc.rate += this_rdc.rate;
3215 sum_rdc.dist += this_rdc.dist;
3216 av1_rd_cost_update(x->rdmult, &sum_rdc);
3217 }
3218 }
3219 #if CONFIG_COLLECT_PARTITION_STATS
3220 if (partition_timer_on) {
3221 aom_usec_timer_mark(&partition_timer);
3222 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3223 partition_times[PARTITION_HORZ] += time;
3224 partition_timer_on = 0;
3225 }
3226 #endif
3227
3228 if (sum_rdc.rdcost < best_rdc.rdcost) {
3229 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3230 if (sum_rdc.rdcost < best_rdc.rdcost) {
3231 best_rdc = sum_rdc;
3232 found_best_partition = true;
3233 pc_tree->partitioning = PARTITION_HORZ;
3234 }
3235 } else {
3236 // Update HORZ win flag
3237 if (rect_part_win_info != NULL) {
3238 rect_part_win_info->horz_win = false;
3239 }
3240 }
3241
3242 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3243 }
3244
3245 // PARTITION_VERT
3246 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert_allowed));
3247 if (!terminate_partition_search && partition_vert_allowed && !prune_vert &&
3248 (do_rectangular_split || active_v_edge(cpi, mi_col, mi_step)) &&
3249 !is_gt_max_sq_part) {
3250 av1_init_rd_stats(&sum_rdc);
3251 subsize = get_partition_subsize(bsize, PARTITION_VERT);
3252
3253 if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3254
3255 sum_rdc.rate = partition_cost[PARTITION_VERT];
3256 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3257 RD_STATS best_remain_rdcost;
3258 av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3259 &best_remain_rdcost);
3260 #if CONFIG_COLLECT_PARTITION_STATS
3261 if (best_remain_rdcost >= 0) {
3262 partition_attempts[PARTITION_VERT] += 1;
3263 aom_usec_timer_start(&partition_timer);
3264 partition_timer_on = 1;
3265 }
3266 #endif
3267 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_VERT,
3268 subsize, &pc_tree->vertical[0], best_remain_rdcost,
3269 PICK_MODE_RD);
3270 av1_rd_cost_update(x->rdmult, &this_rdc);
3271
3272 if (this_rdc.rate == INT_MAX) {
3273 sum_rdc.rdcost = INT64_MAX;
3274 } else {
3275 sum_rdc.rate += this_rdc.rate;
3276 sum_rdc.dist += this_rdc.dist;
3277 av1_rd_cost_update(x->rdmult, &sum_rdc);
3278 }
3279 vert_rd[0] = this_rdc.rdcost;
3280 if (sum_rdc.rdcost < best_rdc.rdcost && has_cols) {
3281 const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic;
3282 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3283 // Neither palette mode nor cfl predicted
3284 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3285 if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1;
3286 }
3287 update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 1);
3288 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, subsize, NULL);
3289
3290 if (cpi->sf.mv_sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3291
3292 av1_rd_stats_subtraction(x->rdmult, &best_rdc, &sum_rdc,
3293 &best_remain_rdcost);
3294 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
3295 PARTITION_VERT, subsize, &pc_tree->vertical[1],
3296 best_remain_rdcost, PICK_MODE_RD);
3297 av1_rd_cost_update(x->rdmult, &this_rdc);
3298 vert_rd[1] = this_rdc.rdcost;
3299
3300 if (this_rdc.rate == INT_MAX) {
3301 sum_rdc.rdcost = INT64_MAX;
3302 } else {
3303 sum_rdc.rate += this_rdc.rate;
3304 sum_rdc.dist += this_rdc.dist;
3305 av1_rd_cost_update(x->rdmult, &sum_rdc);
3306 }
3307 }
3308 #if CONFIG_COLLECT_PARTITION_STATS
3309 if (partition_timer_on) {
3310 aom_usec_timer_mark(&partition_timer);
3311 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3312 partition_times[PARTITION_VERT] += time;
3313 partition_timer_on = 0;
3314 }
3315 #endif
3316
3317 av1_rd_cost_update(x->rdmult, &sum_rdc);
3318 if (sum_rdc.rdcost < best_rdc.rdcost) {
3319 best_rdc = sum_rdc;
3320 found_best_partition = true;
3321 pc_tree->partitioning = PARTITION_VERT;
3322 } else {
3323 // Update VERT win flag
3324 if (rect_part_win_info != NULL) {
3325 rect_part_win_info->vert_win = false;
3326 }
3327 }
3328
3329 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3330 }
3331
3332 if (pb_source_variance == UINT_MAX) {
3333 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
3334 if (is_cur_buf_hbd(xd)) {
3335 pb_source_variance = av1_high_get_sby_perpixel_variance(
3336 cpi, &x->plane[0].src, bsize, xd->bd);
3337 } else {
3338 pb_source_variance =
3339 av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
3340 }
3341 }
3342
3343 if (use_pb_simple_motion_pred_sse(cpi) &&
3344 pb_simple_motion_pred_sse == UINT_MAX) {
3345 const FULLPEL_MV start_mv = kZeroFullMv;
3346 unsigned int var = 0;
3347
3348 av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, start_mv, 0,
3349 &pb_simple_motion_pred_sse, &var);
3350 }
3351
3352 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !do_rectangular_split));
3353
3354 const int ext_partition_allowed =
3355 do_rectangular_split &&
3356 bsize > cpi->sf.part_sf.ext_partition_eval_thresh && has_rows && has_cols;
3357
3358 // The standard AB partitions are allowed whenever ext-partition-types are
3359 // allowed
3360 int horzab_partition_allowed =
3361 ext_partition_allowed & cpi->oxcf.enable_ab_partitions;
3362 int vertab_partition_allowed =
3363 ext_partition_allowed & cpi->oxcf.enable_ab_partitions;
3364
3365 if (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3366 if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 1) {
3367 // TODO(debargha,huisu@google.com): may need to tune the threshold for
3368 // pb_source_variance.
3369 horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3370 (pc_tree->partitioning == PARTITION_NONE &&
3371 pb_source_variance < 32) ||
3372 pc_tree->partitioning == PARTITION_SPLIT);
3373 vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3374 (pc_tree->partitioning == PARTITION_NONE &&
3375 pb_source_variance < 32) ||
3376 pc_tree->partitioning == PARTITION_SPLIT);
3377 } else {
3378 horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3379 pc_tree->partitioning == PARTITION_SPLIT);
3380 vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3381 pc_tree->partitioning == PARTITION_SPLIT);
3382 }
3383 horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0);
3384 horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0);
3385 vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0);
3386 vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0);
3387 split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0);
3388 split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0);
3389 split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0);
3390 split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0);
3391 }
3392 int horza_partition_allowed = horzab_partition_allowed;
3393 int horzb_partition_allowed = horzab_partition_allowed;
3394 if (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3395 const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1];
3396 const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3];
3397 switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3398 case 1:
3399 horza_partition_allowed &= (horz_a_rd / 16 * 14 < best_rdc.rdcost);
3400 horzb_partition_allowed &= (horz_b_rd / 16 * 14 < best_rdc.rdcost);
3401 break;
3402 case 2:
3403 default:
3404 horza_partition_allowed &= (horz_a_rd / 16 * 15 < best_rdc.rdcost);
3405 horzb_partition_allowed &= (horz_b_rd / 16 * 15 < best_rdc.rdcost);
3406 break;
3407 }
3408 }
3409
3410 int verta_partition_allowed = vertab_partition_allowed;
3411 int vertb_partition_allowed = vertab_partition_allowed;
3412 if (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3413 const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2];
3414 const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3];
3415 switch (cpi->sf.part_sf.prune_ext_partition_types_search_level) {
3416 case 1:
3417 verta_partition_allowed &= (vert_a_rd / 16 * 14 < best_rdc.rdcost);
3418 vertb_partition_allowed &= (vert_b_rd / 16 * 14 < best_rdc.rdcost);
3419 break;
3420 case 2:
3421 default:
3422 verta_partition_allowed &= (vert_a_rd / 16 * 15 < best_rdc.rdcost);
3423 vertb_partition_allowed &= (vert_b_rd / 16 * 15 < best_rdc.rdcost);
3424 break;
3425 }
3426 }
3427
3428 if (cpi->sf.part_sf.ml_prune_ab_partition && ext_partition_allowed &&
3429 partition_horz_allowed && partition_vert_allowed) {
3430 // TODO(huisu@google.com): x->source_variance may not be the current
3431 // block's variance. The correct one to use is pb_source_variance. Need to
3432 // re-train the model to fix it.
3433 av1_ml_prune_ab_partition(
3434 bsize, pc_tree->partitioning, get_unsigned_bits(x->source_variance),
3435 best_rdc.rdcost, horz_rd, vert_rd, split_rd, &horza_partition_allowed,
3436 &horzb_partition_allowed, &verta_partition_allowed,
3437 &vertb_partition_allowed);
3438 }
3439
3440 horza_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3441 horzb_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3442 verta_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3443 vertb_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3444
3445 if (cpi->sf.part_sf.prune_ab_partition_using_split_info &&
3446 horza_partition_allowed) {
3447 horza_partition_allowed &= evaluate_ab_partition_based_on_split(
3448 pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 0, 1);
3449 }
3450
3451 // PARTITION_HORZ_A
3452 if (!terminate_partition_search && partition_horz_allowed &&
3453 horza_partition_allowed && !is_gt_max_sq_part) {
3454 subsize = get_partition_subsize(bsize, PARTITION_HORZ_A);
3455 pc_tree->horizontala[0].rd_mode_is_ready = 0;
3456 pc_tree->horizontala[1].rd_mode_is_ready = 0;
3457 pc_tree->horizontala[2].rd_mode_is_ready = 0;
3458 if (split_ctx_is_ready[0]) {
3459 av1_copy_tree_context(&pc_tree->horizontala[0], &pc_tree->split[0]->none);
3460 pc_tree->horizontala[0].mic.partition = PARTITION_HORZ_A;
3461 pc_tree->horizontala[0].rd_mode_is_ready = 1;
3462 if (split_ctx_is_ready[1]) {
3463 av1_copy_tree_context(&pc_tree->horizontala[1],
3464 &pc_tree->split[1]->none);
3465 pc_tree->horizontala[1].mic.partition = PARTITION_HORZ_A;
3466 pc_tree->horizontala[1].rd_mode_is_ready = 1;
3467 }
3468 }
3469 #if CONFIG_COLLECT_PARTITION_STATS
3470 {
3471 RD_STATS tmp_sum_rdc;
3472 av1_init_rd_stats(&tmp_sum_rdc);
3473 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_A];
3474 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3475 if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3476 partition_attempts[PARTITION_HORZ_A] += 1;
3477 aom_usec_timer_start(&partition_timer);
3478 partition_timer_on = 1;
3479 }
3480 }
3481 #endif
3482 found_best_partition |= rd_test_partition3(
3483 cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->horizontala,
3484 ctx_none, mi_row, mi_col, bsize, PARTITION_HORZ_A, mi_row, mi_col,
3485 bsize2, mi_row, mi_col + mi_step, bsize2, mi_row + mi_step, mi_col,
3486 subsize);
3487 #if CONFIG_COLLECT_PARTITION_STATS
3488 if (partition_timer_on) {
3489 aom_usec_timer_mark(&partition_timer);
3490 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3491 partition_times[PARTITION_HORZ_A] += time;
3492 partition_timer_on = 0;
3493 }
3494 #endif
3495 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3496 }
3497
3498 if (cpi->sf.part_sf.prune_ab_partition_using_split_info &&
3499 horzb_partition_allowed) {
3500 horzb_partition_allowed &= evaluate_ab_partition_based_on_split(
3501 pc_tree, PARTITION_HORZ, rect_part_win_info, x->qindex, 2, 3);
3502 }
3503
3504 // PARTITION_HORZ_B
3505 if (!terminate_partition_search && partition_horz_allowed &&
3506 horzb_partition_allowed && !is_gt_max_sq_part) {
3507 subsize = get_partition_subsize(bsize, PARTITION_HORZ_B);
3508 pc_tree->horizontalb[0].rd_mode_is_ready = 0;
3509 pc_tree->horizontalb[1].rd_mode_is_ready = 0;
3510 pc_tree->horizontalb[2].rd_mode_is_ready = 0;
3511 if (horz_ctx_is_ready) {
3512 av1_copy_tree_context(&pc_tree->horizontalb[0], &pc_tree->horizontal[0]);
3513 pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B;
3514 pc_tree->horizontalb[0].rd_mode_is_ready = 1;
3515 }
3516 #if CONFIG_COLLECT_PARTITION_STATS
3517 {
3518 RD_STATS tmp_sum_rdc;
3519 av1_init_rd_stats(&tmp_sum_rdc);
3520 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_B];
3521 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3522 if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3523 partition_attempts[PARTITION_HORZ_B] += 1;
3524 aom_usec_timer_start(&partition_timer);
3525 partition_timer_on = 1;
3526 }
3527 }
3528 #endif
3529 found_best_partition |= rd_test_partition3(
3530 cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->horizontalb,
3531 ctx_none, mi_row, mi_col, bsize, PARTITION_HORZ_B, mi_row, mi_col,
3532 subsize, mi_row + mi_step, mi_col, bsize2, mi_row + mi_step,
3533 mi_col + mi_step, bsize2);
3534
3535 #if CONFIG_COLLECT_PARTITION_STATS
3536 if (partition_timer_on) {
3537 aom_usec_timer_mark(&partition_timer);
3538 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3539 partition_times[PARTITION_HORZ_B] += time;
3540 partition_timer_on = 0;
3541 }
3542 #endif
3543 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3544 }
3545
3546 if (cpi->sf.part_sf.prune_ab_partition_using_split_info &&
3547 verta_partition_allowed) {
3548 verta_partition_allowed &= evaluate_ab_partition_based_on_split(
3549 pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 0, 2);
3550 }
3551
3552 // PARTITION_VERT_A
3553 if (!terminate_partition_search && partition_vert_allowed &&
3554 verta_partition_allowed && !is_gt_max_sq_part) {
3555 subsize = get_partition_subsize(bsize, PARTITION_VERT_A);
3556 pc_tree->verticala[0].rd_mode_is_ready = 0;
3557 pc_tree->verticala[1].rd_mode_is_ready = 0;
3558 pc_tree->verticala[2].rd_mode_is_ready = 0;
3559 if (split_ctx_is_ready[0]) {
3560 av1_copy_tree_context(&pc_tree->verticala[0], &pc_tree->split[0]->none);
3561 pc_tree->verticala[0].mic.partition = PARTITION_VERT_A;
3562 pc_tree->verticala[0].rd_mode_is_ready = 1;
3563 }
3564 #if CONFIG_COLLECT_PARTITION_STATS
3565 {
3566 RD_STATS tmp_sum_rdc;
3567 av1_init_rd_stats(&tmp_sum_rdc);
3568 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_A];
3569 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3570 if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3571 partition_attempts[PARTITION_VERT_A] += 1;
3572 aom_usec_timer_start(&partition_timer);
3573 partition_timer_on = 1;
3574 }
3575 }
3576 #endif
3577 found_best_partition |= rd_test_partition3(
3578 cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->verticala,
3579 ctx_none, mi_row, mi_col, bsize, PARTITION_VERT_A, mi_row, mi_col,
3580 bsize2, mi_row + mi_step, mi_col, bsize2, mi_row, mi_col + mi_step,
3581 subsize);
3582 #if CONFIG_COLLECT_PARTITION_STATS
3583 if (partition_timer_on) {
3584 aom_usec_timer_mark(&partition_timer);
3585 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3586 partition_times[PARTITION_VERT_A] += time;
3587 partition_timer_on = 0;
3588 }
3589 #endif
3590 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3591 }
3592
3593 if (cpi->sf.part_sf.prune_ab_partition_using_split_info &&
3594 vertb_partition_allowed) {
3595 vertb_partition_allowed &= evaluate_ab_partition_based_on_split(
3596 pc_tree, PARTITION_VERT, rect_part_win_info, x->qindex, 1, 3);
3597 }
3598
3599 // PARTITION_VERT_B
3600 if (!terminate_partition_search && partition_vert_allowed &&
3601 vertb_partition_allowed && !is_gt_max_sq_part) {
3602 subsize = get_partition_subsize(bsize, PARTITION_VERT_B);
3603 pc_tree->verticalb[0].rd_mode_is_ready = 0;
3604 pc_tree->verticalb[1].rd_mode_is_ready = 0;
3605 pc_tree->verticalb[2].rd_mode_is_ready = 0;
3606 if (vert_ctx_is_ready) {
3607 av1_copy_tree_context(&pc_tree->verticalb[0], &pc_tree->vertical[0]);
3608 pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B;
3609 pc_tree->verticalb[0].rd_mode_is_ready = 1;
3610 }
3611 #if CONFIG_COLLECT_PARTITION_STATS
3612 {
3613 RD_STATS tmp_sum_rdc;
3614 av1_init_rd_stats(&tmp_sum_rdc);
3615 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_B];
3616 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3617 if (!frame_is_intra_only(cm) &&
3618 best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3619 partition_attempts[PARTITION_VERT_B] += 1;
3620 aom_usec_timer_start(&partition_timer);
3621 partition_timer_on = 1;
3622 }
3623 }
3624 #endif
3625 found_best_partition |= rd_test_partition3(
3626 cpi, td, tile_data, tp, pc_tree, &best_rdc, pc_tree->verticalb,
3627 ctx_none, mi_row, mi_col, bsize, PARTITION_VERT_B, mi_row, mi_col,
3628 subsize, mi_row, mi_col + mi_step, bsize2, mi_row + mi_step,
3629 mi_col + mi_step, bsize2);
3630 #if CONFIG_COLLECT_PARTITION_STATS
3631 if (partition_timer_on) {
3632 aom_usec_timer_mark(&partition_timer);
3633 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3634 partition_times[PARTITION_VERT_B] += time;
3635 partition_timer_on = 0;
3636 }
3637 #endif
3638 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3639 }
3640
3641 // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or
3642 // PARTITION_VERT_4 for this block. This is almost the same as
3643 // ext_partition_allowed, except that we don't allow 128x32 or 32x128
3644 // blocks, so we require that bsize is not BLOCK_128X128.
3645 const int partition4_allowed = cpi->oxcf.enable_1to4_partitions &&
3646 ext_partition_allowed &&
3647 bsize != BLOCK_128X128;
3648
3649 int partition_horz4_allowed =
3650 partition4_allowed && partition_horz_allowed &&
3651 get_plane_block_size(get_partition_subsize(bsize, PARTITION_HORZ_4), xss,
3652 yss) != BLOCK_INVALID;
3653 int partition_vert4_allowed =
3654 partition4_allowed && partition_vert_allowed &&
3655 get_plane_block_size(get_partition_subsize(bsize, PARTITION_VERT_4), xss,
3656 yss) != BLOCK_INVALID;
3657 if (cpi->sf.part_sf.prune_ext_partition_types_search_level == 2) {
3658 partition_horz4_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3659 pc_tree->partitioning == PARTITION_HORZ_A ||
3660 pc_tree->partitioning == PARTITION_HORZ_B ||
3661 pc_tree->partitioning == PARTITION_SPLIT ||
3662 pc_tree->partitioning == PARTITION_NONE);
3663 partition_vert4_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3664 pc_tree->partitioning == PARTITION_VERT_A ||
3665 pc_tree->partitioning == PARTITION_VERT_B ||
3666 pc_tree->partitioning == PARTITION_SPLIT ||
3667 pc_tree->partitioning == PARTITION_NONE);
3668 }
3669 if (cpi->sf.part_sf.ml_prune_4_partition && partition4_allowed &&
3670 partition_horz_allowed && partition_vert_allowed) {
3671 av1_ml_prune_4_partition(cpi, x, bsize, pc_tree->partitioning,
3672 best_rdc.rdcost, horz_rd, vert_rd, split_rd,
3673 &partition_horz4_allowed, &partition_vert4_allowed,
3674 pb_source_variance, mi_row, mi_col);
3675 }
3676
3677 if (blksize < (min_partition_size << 2)) {
3678 partition_horz4_allowed = 0;
3679 partition_vert4_allowed = 0;
3680 }
3681
3682 if (cpi->sf.part_sf.prune_4_partition_using_split_info &&
3683 (partition_horz4_allowed || partition_vert4_allowed)) {
3684 // Count of child blocks in which HORZ or VERT partition has won
3685 int num_child_horz_win = 0, num_child_vert_win = 0;
3686 for (int idx = 0; idx < 4; idx++) {
3687 num_child_horz_win += (split_part_rect_win[idx].horz_win) ? 1 : 0;
3688 num_child_vert_win += (split_part_rect_win[idx].vert_win) ? 1 : 0;
3689 }
3690
3691 // Prune HORZ4/VERT4 partitions based on number of HORZ/VERT winners of
3692 // split partiitons.
3693 // Conservative pruning for high quantizers
3694 const int num_win_thresh = AOMMIN(3 * (MAXQ - x->qindex) / MAXQ + 1, 3);
3695 if (num_child_horz_win < num_win_thresh) {
3696 partition_horz4_allowed = 0;
3697 }
3698 if (num_child_vert_win < num_win_thresh) {
3699 partition_vert4_allowed = 0;
3700 }
3701 }
3702
3703 // PARTITION_HORZ_4
3704 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz4_allowed));
3705 if (!terminate_partition_search && partition_horz4_allowed && has_rows &&
3706 (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
3707 !is_gt_max_sq_part) {
3708 av1_init_rd_stats(&sum_rdc);
3709 const int quarter_step = mi_size_high[bsize] / 4;
3710 PICK_MODE_CONTEXT *ctx_prev = ctx_none;
3711
3712 subsize = get_partition_subsize(bsize, PARTITION_HORZ_4);
3713 sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
3714 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3715
3716 #if CONFIG_COLLECT_PARTITION_STATS
3717 if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
3718 partition_attempts[PARTITION_HORZ_4] += 1;
3719 aom_usec_timer_start(&partition_timer);
3720 partition_timer_on = 1;
3721 }
3722 #endif
3723 for (int i = 0; i < 4; ++i) {
3724 const int this_mi_row = mi_row + i * quarter_step;
3725
3726 if (i > 0 && this_mi_row >= mi_params->mi_rows) break;
3727
3728 PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i];
3729
3730 ctx_this->rd_mode_is_ready = 0;
3731 if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row,
3732 mi_col, subsize, best_rdc, &sum_rdc,
3733 PARTITION_HORZ_4, ctx_prev, ctx_this)) {
3734 av1_invalid_rd_stats(&sum_rdc);
3735 break;
3736 }
3737
3738 ctx_prev = ctx_this;
3739 }
3740
3741 av1_rd_cost_update(x->rdmult, &sum_rdc);
3742 if (sum_rdc.rdcost < best_rdc.rdcost) {
3743 best_rdc = sum_rdc;
3744 found_best_partition = true;
3745 pc_tree->partitioning = PARTITION_HORZ_4;
3746 }
3747
3748 #if CONFIG_COLLECT_PARTITION_STATS
3749 if (partition_timer_on) {
3750 aom_usec_timer_mark(&partition_timer);
3751 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3752 partition_times[PARTITION_HORZ_4] += time;
3753 partition_timer_on = 0;
3754 }
3755 #endif
3756 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3757 }
3758
3759 // PARTITION_VERT_4
3760 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert4_allowed));
3761 if (!terminate_partition_search && partition_vert4_allowed && has_cols &&
3762 (do_rectangular_split || active_v_edge(cpi, mi_row, mi_step)) &&
3763 !is_gt_max_sq_part) {
3764 av1_init_rd_stats(&sum_rdc);
3765 const int quarter_step = mi_size_wide[bsize] / 4;
3766 PICK_MODE_CONTEXT *ctx_prev = ctx_none;
3767
3768 subsize = get_partition_subsize(bsize, PARTITION_VERT_4);
3769 sum_rdc.rate = partition_cost[PARTITION_VERT_4];
3770 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3771
3772 #if CONFIG_COLLECT_PARTITION_STATS
3773 if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
3774 partition_attempts[PARTITION_VERT_4] += 1;
3775 aom_usec_timer_start(&partition_timer);
3776 partition_timer_on = 1;
3777 }
3778 #endif
3779 for (int i = 0; i < 4; ++i) {
3780 const int this_mi_col = mi_col + i * quarter_step;
3781
3782 if (i > 0 && this_mi_col >= mi_params->mi_cols) break;
3783
3784 PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i];
3785
3786 ctx_this->rd_mode_is_ready = 0;
3787 if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row,
3788 this_mi_col, subsize, best_rdc, &sum_rdc,
3789 PARTITION_VERT_4, ctx_prev, ctx_this)) {
3790 av1_invalid_rd_stats(&sum_rdc);
3791 break;
3792 }
3793
3794 ctx_prev = ctx_this;
3795 }
3796
3797 av1_rd_cost_update(x->rdmult, &sum_rdc);
3798 if (sum_rdc.rdcost < best_rdc.rdcost) {
3799 best_rdc = sum_rdc;
3800 found_best_partition = true;
3801 pc_tree->partitioning = PARTITION_VERT_4;
3802 }
3803 #if CONFIG_COLLECT_PARTITION_STATS
3804 if (partition_timer_on) {
3805 aom_usec_timer_mark(&partition_timer);
3806 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3807 partition_times[PARTITION_VERT_4] += time;
3808 partition_timer_on = 0;
3809 }
3810 #endif
3811 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3812 }
3813
3814 if (bsize == cm->seq_params.sb_size && !found_best_partition) {
3815 // Did not find a valid partition, go back and search again, with less
3816 // constraint on which partition types to search.
3817 x->must_find_valid_partition = 1;
3818 #if CONFIG_COLLECT_PARTITION_STATS == 2
3819 part_stats->partition_redo += 1;
3820 #endif
3821 goto BEGIN_PARTITION_SEARCH;
3822 }
3823
3824 *rd_cost = best_rdc;
3825
3826 #if CONFIG_COLLECT_PARTITION_STATS
3827 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) {
3828 partition_decisions[pc_tree->partitioning] += 1;
3829 }
3830 #endif
3831
3832 #if CONFIG_COLLECT_PARTITION_STATS == 1
3833 // If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each
3834 // prediction block
3835 FILE *f = fopen("data.csv", "a");
3836 fprintf(f, "%d,%d,%d,", bsize, cm->show_frame, frame_is_intra_only(cm));
3837 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3838 fprintf(f, "%d,", partition_decisions[idx]);
3839 }
3840 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3841 fprintf(f, "%d,", partition_attempts[idx]);
3842 }
3843 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3844 fprintf(f, "%ld,", partition_times[idx]);
3845 }
3846 fprintf(f, "\n");
3847 fclose(f);
3848 #endif
3849
3850 #if CONFIG_COLLECT_PARTITION_STATS == 2
3851 // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for
3852 // the whole clip. So we need to pass the information upstream to the encoder
3853 const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
3854 int *agg_attempts = part_stats->partition_attempts[bsize_idx];
3855 int *agg_decisions = part_stats->partition_decisions[bsize_idx];
3856 int64_t *agg_times = part_stats->partition_times[bsize_idx];
3857 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
3858 agg_attempts[idx] += partition_attempts[idx];
3859 agg_decisions[idx] += partition_decisions[idx];
3860 agg_times[idx] += partition_times[idx];
3861 }
3862 #endif
3863
3864 if (found_best_partition && pc_tree->index != 3) {
3865 if (bsize == cm->seq_params.sb_size) {
3866 const int emit_output = multi_pass_mode != SB_DRY_PASS;
3867 const RUN_TYPE run_type = emit_output ? OUTPUT_ENABLED : DRY_RUN_NORMAL;
3868
3869 x->cb_offset = 0;
3870 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, run_type, bsize,
3871 pc_tree, NULL);
3872 } else {
3873 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
3874 pc_tree, NULL);
3875 }
3876 }
3877
3878 if (bsize == cm->seq_params.sb_size) {
3879 assert(best_rdc.rate < INT_MAX);
3880 assert(best_rdc.dist < INT64_MAX);
3881 } else {
3882 assert(tp_orig == *tp);
3883 }
3884
3885 x->rdmult = orig_rdmult;
3886 return found_best_partition;
3887 }
3888 #endif // !CONFIG_REALTIME_ONLY
3889 #undef NUM_SIMPLE_MOTION_FEATURES
3890
3891 #if !CONFIG_REALTIME_ONLY
3892
get_rdmult_delta(AV1_COMP * cpi,BLOCK_SIZE bsize,int analysis_type,int mi_row,int mi_col,int orig_rdmult)3893 static int get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int analysis_type,
3894 int mi_row, int mi_col, int orig_rdmult) {
3895 AV1_COMMON *const cm = &cpi->common;
3896 assert(IMPLIES(cpi->gf_group.size > 0,
3897 cpi->gf_group.index < cpi->gf_group.size));
3898 const int tpl_idx = cpi->gf_group.index;
3899 TplParams *const tpl_data = &cpi->tpl_data;
3900 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
3901 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
3902 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
3903 int tpl_stride = tpl_frame->stride;
3904 int64_t intra_cost = 0;
3905 int64_t mc_dep_cost = 0;
3906 const int mi_wide = mi_size_wide[bsize];
3907 const int mi_high = mi_size_high[bsize];
3908
3909 if (tpl_frame->is_valid == 0) return orig_rdmult;
3910
3911 if (!is_frame_tpl_eligible(cpi)) return orig_rdmult;
3912
3913 if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult;
3914
3915 int64_t mc_count = 0, mc_saved = 0;
3916 int mi_count = 0;
3917 const int mi_col_sr =
3918 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
3919 const int mi_col_end_sr =
3920 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
3921 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
3922 const int step = 1 << block_mis_log2;
3923 for (int row = mi_row; row < mi_row + mi_high; row += step) {
3924 for (int col = mi_col_sr; col < mi_col_end_sr; col += step) {
3925 if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue;
3926 TplDepStats *this_stats =
3927 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
3928 int64_t mc_dep_delta =
3929 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
3930 this_stats->mc_dep_dist);
3931 intra_cost += this_stats->recrf_dist << RDDIV_BITS;
3932 mc_dep_cost += (this_stats->recrf_dist << RDDIV_BITS) + mc_dep_delta;
3933 mc_count += this_stats->mc_count;
3934 mc_saved += this_stats->mc_saved;
3935 mi_count++;
3936 }
3937 }
3938
3939 aom_clear_system_state();
3940
3941 double beta = 1.0;
3942 if (analysis_type == 0) {
3943 if (mc_dep_cost > 0 && intra_cost > 0) {
3944 const double r0 = cpi->rd.r0;
3945 const double rk = (double)intra_cost / mc_dep_cost;
3946 beta = (r0 / rk);
3947 }
3948 } else if (analysis_type == 1) {
3949 const double mc_count_base = (mi_count * cpi->rd.mc_count_base);
3950 beta = (mc_count + 1.0) / (mc_count_base + 1.0);
3951 beta = pow(beta, 0.5);
3952 } else if (analysis_type == 2) {
3953 const double mc_saved_base = (mi_count * cpi->rd.mc_saved_base);
3954 beta = (mc_saved + 1.0) / (mc_saved_base + 1.0);
3955 beta = pow(beta, 0.5);
3956 }
3957
3958 int rdmult = av1_get_adaptive_rdmult(cpi, beta);
3959
3960 aom_clear_system_state();
3961
3962 rdmult = AOMMIN(rdmult, orig_rdmult * 3 / 2);
3963 rdmult = AOMMAX(rdmult, orig_rdmult * 1 / 2);
3964
3965 rdmult = AOMMAX(1, rdmult);
3966
3967 return rdmult;
3968 }
3969
get_tpl_stats_b(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * intra_cost_b,int64_t * inter_cost_b,int_mv mv_b[][INTER_REFS_PER_FRAME],int * stride)3970 static int get_tpl_stats_b(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
3971 int mi_col, int64_t *intra_cost_b,
3972 int64_t *inter_cost_b,
3973 int_mv mv_b[][INTER_REFS_PER_FRAME], int *stride) {
3974 if (!cpi->oxcf.enable_tpl_model) return 0;
3975 if (cpi->superres_mode != SUPERRES_NONE) return 0;
3976 if (cpi->common.current_frame.frame_type == KEY_FRAME) return 0;
3977 const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
3978 if (update_type == INTNL_OVERLAY_UPDATE || update_type == OVERLAY_UPDATE)
3979 return 0;
3980 assert(IMPLIES(cpi->gf_group.size > 0,
3981 cpi->gf_group.index < cpi->gf_group.size));
3982
3983 AV1_COMMON *const cm = &cpi->common;
3984 const int gf_group_index = cpi->gf_group.index;
3985 TplParams *const tpl_data = &cpi->tpl_data;
3986 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_group_index];
3987 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
3988 int tpl_stride = tpl_frame->stride;
3989 const int mi_wide = mi_size_wide[bsize];
3990 const int mi_high = mi_size_high[bsize];
3991
3992 if (tpl_frame->is_valid == 0) return 0;
3993 if (gf_group_index >= MAX_LAG_BUFFERS) return 0;
3994
3995 int mi_count = 0;
3996 int count = 0;
3997 const int mi_col_sr =
3998 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
3999 const int mi_col_end_sr =
4000 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
4001 // mi_cols_sr is mi_cols at superres case.
4002 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
4003
4004 // TPL store unit size is not the same as the motion estimation unit size.
4005 // Here always use motion estimation size to avoid getting repetitive inter/
4006 // intra cost.
4007 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(MC_FLOW_BSIZE_1D);
4008 const int step = mi_size_wide[tpl_bsize];
4009 assert(mi_size_wide[tpl_bsize] == mi_size_high[tpl_bsize]);
4010
4011 // Stride is only based on SB size, and we fill in values for every 16x16
4012 // block in a SB.
4013 *stride = (mi_col_end_sr - mi_col_sr) / step;
4014
4015 for (int row = mi_row; row < mi_row + mi_high; row += step) {
4016 for (int col = mi_col_sr; col < mi_col_end_sr; col += step) {
4017 // Handle partial SB, so that no invalid values are used later.
4018 if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) {
4019 inter_cost_b[count] = INT64_MAX;
4020 intra_cost_b[count] = INT64_MAX;
4021 for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
4022 mv_b[count][i].as_int = INVALID_MV;
4023 }
4024 count++;
4025 continue;
4026 }
4027
4028 TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
4029 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
4030 inter_cost_b[count] = this_stats->inter_cost;
4031 intra_cost_b[count] = this_stats->intra_cost;
4032 memcpy(mv_b[count], this_stats->mv, sizeof(this_stats->mv));
4033 mi_count++;
4034 count++;
4035 }
4036 }
4037
4038 return mi_count;
4039 }
4040
4041 // analysis_type 0: Use mc_dep_cost and intra_cost
4042 // analysis_type 1: Use count of best inter predictor chosen
4043 // analysis_type 2: Use cost reduction from intra to inter for best inter
4044 // predictor chosen
get_q_for_deltaq_objective(AV1_COMP * const cpi,BLOCK_SIZE bsize,int mi_row,int mi_col)4045 static int get_q_for_deltaq_objective(AV1_COMP *const cpi, BLOCK_SIZE bsize,
4046 int mi_row, int mi_col) {
4047 AV1_COMMON *const cm = &cpi->common;
4048 assert(IMPLIES(cpi->gf_group.size > 0,
4049 cpi->gf_group.index < cpi->gf_group.size));
4050 const int tpl_idx = cpi->gf_group.index;
4051 TplParams *const tpl_data = &cpi->tpl_data;
4052 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
4053 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
4054 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
4055 int tpl_stride = tpl_frame->stride;
4056 int64_t intra_cost = 0;
4057 int64_t mc_dep_cost = 0;
4058 const int mi_wide = mi_size_wide[bsize];
4059 const int mi_high = mi_size_high[bsize];
4060 const int base_qindex = cm->quant_params.base_qindex;
4061
4062 if (tpl_frame->is_valid == 0) return base_qindex;
4063
4064 if (!is_frame_tpl_eligible(cpi)) return base_qindex;
4065
4066 if (cpi->gf_group.index >= MAX_LAG_BUFFERS) return base_qindex;
4067
4068 int64_t mc_count = 0, mc_saved = 0;
4069 int mi_count = 0;
4070 const int mi_col_sr =
4071 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
4072 const int mi_col_end_sr =
4073 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
4074 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
4075 const int step = 1 << block_mis_log2;
4076 for (int row = mi_row; row < mi_row + mi_high; row += step) {
4077 for (int col = mi_col_sr; col < mi_col_end_sr; col += step) {
4078 if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue;
4079 TplDepStats *this_stats =
4080 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
4081 int64_t mc_dep_delta =
4082 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
4083 this_stats->mc_dep_dist);
4084 intra_cost += this_stats->recrf_dist << RDDIV_BITS;
4085 mc_dep_cost += (this_stats->recrf_dist << RDDIV_BITS) + mc_dep_delta;
4086 mc_count += this_stats->mc_count;
4087 mc_saved += this_stats->mc_saved;
4088 mi_count++;
4089 }
4090 }
4091
4092 aom_clear_system_state();
4093
4094 int offset = 0;
4095 double beta = 1.0;
4096 if (mc_dep_cost > 0 && intra_cost > 0) {
4097 const double r0 = cpi->rd.r0;
4098 const double rk = (double)intra_cost / mc_dep_cost;
4099 beta = (r0 / rk);
4100 assert(beta > 0.0);
4101 }
4102 offset = av1_get_deltaq_offset(cpi, base_qindex, beta);
4103 aom_clear_system_state();
4104
4105 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
4106 offset = AOMMIN(offset, delta_q_info->delta_q_res * 9 - 1);
4107 offset = AOMMAX(offset, -delta_q_info->delta_q_res * 9 + 1);
4108 int qindex = cm->quant_params.base_qindex + offset;
4109 qindex = AOMMIN(qindex, MAXQ);
4110 qindex = AOMMAX(qindex, MINQ);
4111
4112 return qindex;
4113 }
4114
setup_delta_q(AV1_COMP * const cpi,ThreadData * td,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)4115 static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
4116 MACROBLOCK *const x,
4117 const TileInfo *const tile_info,
4118 int mi_row, int mi_col, int num_planes) {
4119 AV1_COMMON *const cm = &cpi->common;
4120 const CommonModeInfoParams *const mi_params = &cm->mi_params;
4121 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
4122 assert(delta_q_info->delta_q_present_flag);
4123
4124 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4125 // Delta-q modulation based on variance
4126 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
4127
4128 int current_qindex = cm->quant_params.base_qindex;
4129 if (cpi->oxcf.deltaq_mode == DELTA_Q_PERCEPTUAL) {
4130 if (DELTA_Q_PERCEPTUAL_MODULATION == 1) {
4131 const int block_wavelet_energy_level =
4132 av1_block_wavelet_energy_level(cpi, x, sb_size);
4133 x->sb_energy_level = block_wavelet_energy_level;
4134 current_qindex = av1_compute_q_from_energy_level_deltaq_mode(
4135 cpi, block_wavelet_energy_level);
4136 } else {
4137 const int block_var_level = av1_log_block_var(cpi, x, sb_size);
4138 x->sb_energy_level = block_var_level;
4139 current_qindex =
4140 av1_compute_q_from_energy_level_deltaq_mode(cpi, block_var_level);
4141 }
4142 } else if (cpi->oxcf.deltaq_mode == DELTA_Q_OBJECTIVE &&
4143 cpi->oxcf.enable_tpl_model) {
4144 // Setup deltaq based on tpl stats
4145 current_qindex = get_q_for_deltaq_objective(cpi, sb_size, mi_row, mi_col);
4146 }
4147
4148 const int delta_q_res = delta_q_info->delta_q_res;
4149 // Right now aq only works with tpl model. So if tpl is disabled, we set the
4150 // current_qindex to base_qindex.
4151 if (cpi->oxcf.enable_tpl_model && cpi->oxcf.deltaq_mode != NO_DELTA_Q) {
4152 current_qindex =
4153 clamp(current_qindex, delta_q_res, 256 - delta_q_info->delta_q_res);
4154 } else {
4155 current_qindex = cm->quant_params.base_qindex;
4156 }
4157
4158 MACROBLOCKD *const xd = &x->e_mbd;
4159 const int sign_deltaq_index =
4160 current_qindex - xd->current_qindex >= 0 ? 1 : -1;
4161 const int deltaq_deadzone = delta_q_res / 4;
4162 const int qmask = ~(delta_q_res - 1);
4163 int abs_deltaq_index = abs(current_qindex - xd->current_qindex);
4164 abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
4165 current_qindex = xd->current_qindex + sign_deltaq_index * abs_deltaq_index;
4166 current_qindex = AOMMAX(current_qindex, MINQ + 1);
4167 assert(current_qindex > 0);
4168
4169 xd->delta_qindex = current_qindex - cm->quant_params.base_qindex;
4170 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4171 xd->mi[0]->current_qindex = current_qindex;
4172 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
4173
4174 // keep track of any non-zero delta-q used
4175 td->deltaq_used |= (xd->delta_qindex != 0);
4176
4177 if (cpi->oxcf.deltalf_mode) {
4178 const int delta_lf_res = delta_q_info->delta_lf_res;
4179 const int lfmask = ~(delta_lf_res - 1);
4180 const int delta_lf_from_base =
4181 ((xd->delta_qindex / 2 + delta_lf_res / 2) & lfmask);
4182 const int8_t delta_lf =
4183 (int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
4184 const int frame_lf_count =
4185 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
4186 const int mib_size = cm->seq_params.mib_size;
4187
4188 // pre-set the delta lf for loop filter. Note that this value is set
4189 // before mi is assigned for each block in current superblock
4190 for (int j = 0; j < AOMMIN(mib_size, mi_params->mi_rows - mi_row); j++) {
4191 for (int k = 0; k < AOMMIN(mib_size, mi_params->mi_cols - mi_col); k++) {
4192 const int grid_idx = get_mi_grid_idx(mi_params, mi_row + j, mi_col + k);
4193 mi_params->mi_grid_base[grid_idx]->delta_lf_from_base = delta_lf;
4194 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
4195 mi_params->mi_grid_base[grid_idx]->delta_lf[lf_id] = delta_lf;
4196 }
4197 }
4198 }
4199 }
4200 }
4201 #endif // !CONFIG_REALTIME_ONLY
4202
4203 #define AVG_CDF_WEIGHT_LEFT 3
4204 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
4205
avg_cdf_symbol(aom_cdf_prob * cdf_ptr_left,aom_cdf_prob * cdf_ptr_tr,int num_cdfs,int cdf_stride,int nsymbs,int wt_left,int wt_tr)4206 static AOM_INLINE void avg_cdf_symbol(aom_cdf_prob *cdf_ptr_left,
4207 aom_cdf_prob *cdf_ptr_tr, int num_cdfs,
4208 int cdf_stride, int nsymbs, int wt_left,
4209 int wt_tr) {
4210 for (int i = 0; i < num_cdfs; i++) {
4211 for (int j = 0; j <= nsymbs; j++) {
4212 cdf_ptr_left[i * cdf_stride + j] =
4213 (aom_cdf_prob)(((int)cdf_ptr_left[i * cdf_stride + j] * wt_left +
4214 (int)cdf_ptr_tr[i * cdf_stride + j] * wt_tr +
4215 ((wt_left + wt_tr) / 2)) /
4216 (wt_left + wt_tr));
4217 assert(cdf_ptr_left[i * cdf_stride + j] >= 0 &&
4218 cdf_ptr_left[i * cdf_stride + j] < CDF_PROB_TOP);
4219 }
4220 }
4221 }
4222
4223 #define AVERAGE_CDF(cname_left, cname_tr, nsymbs) \
4224 AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, CDF_SIZE(nsymbs))
4225
4226 #define AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, cdf_stride) \
4227 do { \
4228 aom_cdf_prob *cdf_ptr_left = (aom_cdf_prob *)cname_left; \
4229 aom_cdf_prob *cdf_ptr_tr = (aom_cdf_prob *)cname_tr; \
4230 int array_size = (int)sizeof(cname_left) / sizeof(aom_cdf_prob); \
4231 int num_cdfs = array_size / cdf_stride; \
4232 avg_cdf_symbol(cdf_ptr_left, cdf_ptr_tr, num_cdfs, cdf_stride, nsymbs, \
4233 wt_left, wt_tr); \
4234 } while (0)
4235
avg_nmv(nmv_context * nmv_left,nmv_context * nmv_tr,int wt_left,int wt_tr)4236 static AOM_INLINE void avg_nmv(nmv_context *nmv_left, nmv_context *nmv_tr,
4237 int wt_left, int wt_tr) {
4238 AVERAGE_CDF(nmv_left->joints_cdf, nmv_tr->joints_cdf, 4);
4239 for (int i = 0; i < 2; i++) {
4240 AVERAGE_CDF(nmv_left->comps[i].classes_cdf, nmv_tr->comps[i].classes_cdf,
4241 MV_CLASSES);
4242 AVERAGE_CDF(nmv_left->comps[i].class0_fp_cdf,
4243 nmv_tr->comps[i].class0_fp_cdf, MV_FP_SIZE);
4244 AVERAGE_CDF(nmv_left->comps[i].fp_cdf, nmv_tr->comps[i].fp_cdf, MV_FP_SIZE);
4245 AVERAGE_CDF(nmv_left->comps[i].sign_cdf, nmv_tr->comps[i].sign_cdf, 2);
4246 AVERAGE_CDF(nmv_left->comps[i].class0_hp_cdf,
4247 nmv_tr->comps[i].class0_hp_cdf, 2);
4248 AVERAGE_CDF(nmv_left->comps[i].hp_cdf, nmv_tr->comps[i].hp_cdf, 2);
4249 AVERAGE_CDF(nmv_left->comps[i].class0_cdf, nmv_tr->comps[i].class0_cdf,
4250 CLASS0_SIZE);
4251 AVERAGE_CDF(nmv_left->comps[i].bits_cdf, nmv_tr->comps[i].bits_cdf, 2);
4252 }
4253 }
4254
4255 // In case of row-based multi-threading of encoder, since we always
4256 // keep a top - right sync, we can average the top - right SB's CDFs and
4257 // the left SB's CDFs and use the same for current SB's encoding to
4258 // improve the performance. This function facilitates the averaging
4259 // of CDF and used only when row-mt is enabled in encoder.
avg_cdf_symbols(FRAME_CONTEXT * ctx_left,FRAME_CONTEXT * ctx_tr,int wt_left,int wt_tr)4260 static AOM_INLINE void avg_cdf_symbols(FRAME_CONTEXT *ctx_left,
4261 FRAME_CONTEXT *ctx_tr, int wt_left,
4262 int wt_tr) {
4263 AVERAGE_CDF(ctx_left->txb_skip_cdf, ctx_tr->txb_skip_cdf, 2);
4264 AVERAGE_CDF(ctx_left->eob_extra_cdf, ctx_tr->eob_extra_cdf, 2);
4265 AVERAGE_CDF(ctx_left->dc_sign_cdf, ctx_tr->dc_sign_cdf, 2);
4266 AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, 5);
4267 AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, 6);
4268 AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, 7);
4269 AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, 8);
4270 AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, 9);
4271 AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, 10);
4272 AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, 11);
4273 AVERAGE_CDF(ctx_left->coeff_base_eob_cdf, ctx_tr->coeff_base_eob_cdf, 3);
4274 AVERAGE_CDF(ctx_left->coeff_base_cdf, ctx_tr->coeff_base_cdf, 4);
4275 AVERAGE_CDF(ctx_left->coeff_br_cdf, ctx_tr->coeff_br_cdf, BR_CDF_SIZE);
4276 AVERAGE_CDF(ctx_left->newmv_cdf, ctx_tr->newmv_cdf, 2);
4277 AVERAGE_CDF(ctx_left->zeromv_cdf, ctx_tr->zeromv_cdf, 2);
4278 AVERAGE_CDF(ctx_left->refmv_cdf, ctx_tr->refmv_cdf, 2);
4279 AVERAGE_CDF(ctx_left->drl_cdf, ctx_tr->drl_cdf, 2);
4280 AVERAGE_CDF(ctx_left->inter_compound_mode_cdf,
4281 ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_MODES);
4282 AVERAGE_CDF(ctx_left->compound_type_cdf, ctx_tr->compound_type_cdf,
4283 MASKED_COMPOUND_TYPES);
4284 AVERAGE_CDF(ctx_left->wedge_idx_cdf, ctx_tr->wedge_idx_cdf, 16);
4285 AVERAGE_CDF(ctx_left->interintra_cdf, ctx_tr->interintra_cdf, 2);
4286 AVERAGE_CDF(ctx_left->wedge_interintra_cdf, ctx_tr->wedge_interintra_cdf, 2);
4287 AVERAGE_CDF(ctx_left->interintra_mode_cdf, ctx_tr->interintra_mode_cdf,
4288 INTERINTRA_MODES);
4289 AVERAGE_CDF(ctx_left->motion_mode_cdf, ctx_tr->motion_mode_cdf, MOTION_MODES);
4290 AVERAGE_CDF(ctx_left->obmc_cdf, ctx_tr->obmc_cdf, 2);
4291 AVERAGE_CDF(ctx_left->palette_y_size_cdf, ctx_tr->palette_y_size_cdf,
4292 PALETTE_SIZES);
4293 AVERAGE_CDF(ctx_left->palette_uv_size_cdf, ctx_tr->palette_uv_size_cdf,
4294 PALETTE_SIZES);
4295 for (int j = 0; j < PALETTE_SIZES; j++) {
4296 int nsymbs = j + PALETTE_MIN_SIZE;
4297 AVG_CDF_STRIDE(ctx_left->palette_y_color_index_cdf[j],
4298 ctx_tr->palette_y_color_index_cdf[j], nsymbs,
4299 CDF_SIZE(PALETTE_COLORS));
4300 AVG_CDF_STRIDE(ctx_left->palette_uv_color_index_cdf[j],
4301 ctx_tr->palette_uv_color_index_cdf[j], nsymbs,
4302 CDF_SIZE(PALETTE_COLORS));
4303 }
4304 AVERAGE_CDF(ctx_left->palette_y_mode_cdf, ctx_tr->palette_y_mode_cdf, 2);
4305 AVERAGE_CDF(ctx_left->palette_uv_mode_cdf, ctx_tr->palette_uv_mode_cdf, 2);
4306 AVERAGE_CDF(ctx_left->comp_inter_cdf, ctx_tr->comp_inter_cdf, 2);
4307 AVERAGE_CDF(ctx_left->single_ref_cdf, ctx_tr->single_ref_cdf, 2);
4308 AVERAGE_CDF(ctx_left->comp_ref_type_cdf, ctx_tr->comp_ref_type_cdf, 2);
4309 AVERAGE_CDF(ctx_left->uni_comp_ref_cdf, ctx_tr->uni_comp_ref_cdf, 2);
4310 AVERAGE_CDF(ctx_left->comp_ref_cdf, ctx_tr->comp_ref_cdf, 2);
4311 AVERAGE_CDF(ctx_left->comp_bwdref_cdf, ctx_tr->comp_bwdref_cdf, 2);
4312 AVERAGE_CDF(ctx_left->txfm_partition_cdf, ctx_tr->txfm_partition_cdf, 2);
4313 AVERAGE_CDF(ctx_left->compound_index_cdf, ctx_tr->compound_index_cdf, 2);
4314 AVERAGE_CDF(ctx_left->comp_group_idx_cdf, ctx_tr->comp_group_idx_cdf, 2);
4315 AVERAGE_CDF(ctx_left->skip_mode_cdfs, ctx_tr->skip_mode_cdfs, 2);
4316 AVERAGE_CDF(ctx_left->skip_cdfs, ctx_tr->skip_cdfs, 2);
4317 AVERAGE_CDF(ctx_left->intra_inter_cdf, ctx_tr->intra_inter_cdf, 2);
4318 avg_nmv(&ctx_left->nmvc, &ctx_tr->nmvc, wt_left, wt_tr);
4319 avg_nmv(&ctx_left->ndvc, &ctx_tr->ndvc, wt_left, wt_tr);
4320 AVERAGE_CDF(ctx_left->intrabc_cdf, ctx_tr->intrabc_cdf, 2);
4321 AVERAGE_CDF(ctx_left->seg.tree_cdf, ctx_tr->seg.tree_cdf, MAX_SEGMENTS);
4322 AVERAGE_CDF(ctx_left->seg.pred_cdf, ctx_tr->seg.pred_cdf, 2);
4323 AVERAGE_CDF(ctx_left->seg.spatial_pred_seg_cdf,
4324 ctx_tr->seg.spatial_pred_seg_cdf, MAX_SEGMENTS);
4325 AVERAGE_CDF(ctx_left->filter_intra_cdfs, ctx_tr->filter_intra_cdfs, 2);
4326 AVERAGE_CDF(ctx_left->filter_intra_mode_cdf, ctx_tr->filter_intra_mode_cdf,
4327 FILTER_INTRA_MODES);
4328 AVERAGE_CDF(ctx_left->switchable_restore_cdf, ctx_tr->switchable_restore_cdf,
4329 RESTORE_SWITCHABLE_TYPES);
4330 AVERAGE_CDF(ctx_left->wiener_restore_cdf, ctx_tr->wiener_restore_cdf, 2);
4331 AVERAGE_CDF(ctx_left->sgrproj_restore_cdf, ctx_tr->sgrproj_restore_cdf, 2);
4332 AVERAGE_CDF(ctx_left->y_mode_cdf, ctx_tr->y_mode_cdf, INTRA_MODES);
4333 AVG_CDF_STRIDE(ctx_left->uv_mode_cdf[0], ctx_tr->uv_mode_cdf[0],
4334 UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES));
4335 AVERAGE_CDF(ctx_left->uv_mode_cdf[1], ctx_tr->uv_mode_cdf[1], UV_INTRA_MODES);
4336 for (int i = 0; i < PARTITION_CONTEXTS; i++) {
4337 if (i < 4) {
4338 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 4,
4339 CDF_SIZE(10));
4340 } else if (i < 16) {
4341 AVERAGE_CDF(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 10);
4342 } else {
4343 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 8,
4344 CDF_SIZE(10));
4345 }
4346 }
4347 AVERAGE_CDF(ctx_left->switchable_interp_cdf, ctx_tr->switchable_interp_cdf,
4348 SWITCHABLE_FILTERS);
4349 AVERAGE_CDF(ctx_left->kf_y_cdf, ctx_tr->kf_y_cdf, INTRA_MODES);
4350 AVERAGE_CDF(ctx_left->angle_delta_cdf, ctx_tr->angle_delta_cdf,
4351 2 * MAX_ANGLE_DELTA + 1);
4352 AVG_CDF_STRIDE(ctx_left->tx_size_cdf[0], ctx_tr->tx_size_cdf[0], MAX_TX_DEPTH,
4353 CDF_SIZE(MAX_TX_DEPTH + 1));
4354 AVERAGE_CDF(ctx_left->tx_size_cdf[1], ctx_tr->tx_size_cdf[1],
4355 MAX_TX_DEPTH + 1);
4356 AVERAGE_CDF(ctx_left->tx_size_cdf[2], ctx_tr->tx_size_cdf[2],
4357 MAX_TX_DEPTH + 1);
4358 AVERAGE_CDF(ctx_left->tx_size_cdf[3], ctx_tr->tx_size_cdf[3],
4359 MAX_TX_DEPTH + 1);
4360 AVERAGE_CDF(ctx_left->delta_q_cdf, ctx_tr->delta_q_cdf, DELTA_Q_PROBS + 1);
4361 AVERAGE_CDF(ctx_left->delta_lf_cdf, ctx_tr->delta_lf_cdf, DELTA_LF_PROBS + 1);
4362 for (int i = 0; i < FRAME_LF_COUNT; i++) {
4363 AVERAGE_CDF(ctx_left->delta_lf_multi_cdf[i], ctx_tr->delta_lf_multi_cdf[i],
4364 DELTA_LF_PROBS + 1);
4365 }
4366 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[1], ctx_tr->intra_ext_tx_cdf[1], 7,
4367 CDF_SIZE(TX_TYPES));
4368 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[2], ctx_tr->intra_ext_tx_cdf[2], 5,
4369 CDF_SIZE(TX_TYPES));
4370 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[1], ctx_tr->inter_ext_tx_cdf[1], 16,
4371 CDF_SIZE(TX_TYPES));
4372 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[2], ctx_tr->inter_ext_tx_cdf[2], 12,
4373 CDF_SIZE(TX_TYPES));
4374 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[3], ctx_tr->inter_ext_tx_cdf[3], 2,
4375 CDF_SIZE(TX_TYPES));
4376 AVERAGE_CDF(ctx_left->cfl_sign_cdf, ctx_tr->cfl_sign_cdf, CFL_JOINT_SIGNS);
4377 AVERAGE_CDF(ctx_left->cfl_alpha_cdf, ctx_tr->cfl_alpha_cdf,
4378 CFL_ALPHABET_SIZE);
4379 }
4380
4381 #if !CONFIG_REALTIME_ONLY
adjust_rdmult_tpl_model(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)4382 static AOM_INLINE void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
4383 int mi_row, int mi_col) {
4384 const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size;
4385 const int orig_rdmult = cpi->rd.RDMULT;
4386
4387 assert(IMPLIES(cpi->gf_group.size > 0,
4388 cpi->gf_group.index < cpi->gf_group.size));
4389 const int gf_group_index = cpi->gf_group.index;
4390 if (cpi->oxcf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ &&
4391 cpi->oxcf.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
4392 cpi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
4393 const int dr =
4394 get_rdmult_delta(cpi, sb_size, 0, mi_row, mi_col, orig_rdmult);
4395 x->rdmult = dr;
4396 }
4397 }
4398 #endif
4399
source_content_sb(AV1_COMP * cpi,MACROBLOCK * x,int shift)4400 static void source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int shift) {
4401 unsigned int tmp_sse;
4402 unsigned int tmp_variance;
4403 const BLOCK_SIZE bsize = BLOCK_64X64;
4404 uint8_t *src_y = cpi->source->y_buffer;
4405 int src_ystride = cpi->source->y_stride;
4406 uint8_t *last_src_y = cpi->last_source->y_buffer;
4407 int last_src_ystride = cpi->last_source->y_stride;
4408 uint64_t avg_source_sse_threshold = 100000; // ~5*5*(64*64)
4409 uint64_t avg_source_sse_threshold_high = 1000000; // ~15*15*(64*64)
4410 uint64_t sum_sq_thresh = 10000; // sum = sqrt(thresh / 64*64)) ~1.5
4411 #if CONFIG_AV1_HIGHBITDEPTH
4412 MACROBLOCKD *xd = &x->e_mbd;
4413 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) return;
4414 #endif
4415 src_y += shift;
4416 last_src_y += shift;
4417 tmp_variance = cpi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
4418 last_src_ystride, &tmp_sse);
4419 // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
4420 // Detect large lighting change.
4421 if (tmp_variance < (tmp_sse >> 1) && (tmp_sse - tmp_variance) > sum_sq_thresh)
4422 x->content_state_sb = kLowVarHighSumdiff;
4423 else if (tmp_sse < avg_source_sse_threshold)
4424 x->content_state_sb = kLowSad;
4425 else if (tmp_sse > avg_source_sse_threshold_high)
4426 x->content_state_sb = kHighSad;
4427 }
4428
encode_nonrd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,PC_TREE * const pc_root,TOKENEXTRA ** tp,const int mi_row,const int mi_col,const int seg_skip)4429 static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
4430 TileDataEnc *tile_data,
4431 PC_TREE *const pc_root, TOKENEXTRA **tp,
4432 const int mi_row, const int mi_col,
4433 const int seg_skip) {
4434 AV1_COMMON *const cm = &cpi->common;
4435 MACROBLOCK *const x = &td->mb;
4436 const SPEED_FEATURES *const sf = &cpi->sf;
4437 const TileInfo *const tile_info = &tile_data->tile_info;
4438 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
4439 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
4440 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4441 if (sf->rt_sf.source_metrics_sb_nonrd && sb_size == BLOCK_64X64 &&
4442 cpi->svc.number_spatial_layers <= 1 &&
4443 cm->current_frame.frame_type != KEY_FRAME) {
4444 int shift = cpi->source->y_stride * (mi_row << 2) + (mi_col << 2);
4445 source_content_sb(cpi, x, shift);
4446 }
4447 if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
4448 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4449 const BLOCK_SIZE bsize =
4450 seg_skip ? sb_size : sf->part_sf.always_this_block_size;
4451 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4452 } else if (cpi->partition_search_skippable_frame) {
4453 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4454 const BLOCK_SIZE bsize =
4455 get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
4456 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4457 } else if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
4458 set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, sb_size);
4459 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
4460 }
4461 assert(sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip ||
4462 cpi->partition_search_skippable_frame ||
4463 sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
4464 td->mb.cb_offset = 0;
4465 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4466 pc_root);
4467 }
4468
4469 // Memset the mbmis at the current superblock to 0
reset_mbmi(CommonModeInfoParams * const mi_params,BLOCK_SIZE sb_size,int mi_row,int mi_col)4470 static INLINE void reset_mbmi(CommonModeInfoParams *const mi_params,
4471 BLOCK_SIZE sb_size, int mi_row, int mi_col) {
4472 // size of sb in unit of mi (BLOCK_4X4)
4473 const int sb_size_mi = mi_size_wide[sb_size];
4474 const int mi_alloc_size_1d = mi_size_wide[mi_params->mi_alloc_bsize];
4475 // size of sb in unit of allocated mi size
4476 const int sb_size_alloc_mi = mi_size_wide[sb_size] / mi_alloc_size_1d;
4477 assert(mi_params->mi_alloc_stride % sb_size_alloc_mi == 0 &&
4478 "mi is not allocated as a multiple of sb!");
4479 assert(mi_params->mi_stride % sb_size_mi == 0 &&
4480 "mi_grid_base is not allocated as a multiple of sb!");
4481
4482 const int mi_rows = mi_size_high[sb_size];
4483 for (int cur_mi_row = 0; cur_mi_row < mi_rows; cur_mi_row++) {
4484 assert(get_mi_grid_idx(mi_params, 0, mi_col + mi_alloc_size_1d) <
4485 mi_params->mi_stride);
4486 const int mi_grid_idx =
4487 get_mi_grid_idx(mi_params, mi_row + cur_mi_row, mi_col);
4488 const int alloc_mi_idx =
4489 get_alloc_mi_idx(mi_params, mi_row + cur_mi_row, mi_col);
4490 memset(&mi_params->mi_grid_base[mi_grid_idx], 0,
4491 sb_size_mi * sizeof(*mi_params->mi_grid_base));
4492 memset(&mi_params->tx_type_map[mi_grid_idx], 0,
4493 sb_size_mi * sizeof(*mi_params->tx_type_map));
4494 if (cur_mi_row % mi_alloc_size_1d == 0) {
4495 memset(&mi_params->mi_alloc[alloc_mi_idx], 0,
4496 sb_size_alloc_mi * sizeof(*mi_params->mi_alloc));
4497 }
4498 }
4499 }
4500
backup_sb_state(SB_FIRST_PASS_STATS * sb_fp_stats,const AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,int mi_row,int mi_col)4501 static INLINE void backup_sb_state(SB_FIRST_PASS_STATS *sb_fp_stats,
4502 const AV1_COMP *cpi, ThreadData *td,
4503 const TileDataEnc *tile_data, int mi_row,
4504 int mi_col) {
4505 MACROBLOCK *x = &td->mb;
4506 MACROBLOCKD *xd = &x->e_mbd;
4507 const TileInfo *tile_info = &tile_data->tile_info;
4508
4509 const AV1_COMMON *cm = &cpi->common;
4510 const int num_planes = av1_num_planes(cm);
4511 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4512
4513 xd->above_txfm_context =
4514 cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
4515 xd->left_txfm_context =
4516 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
4517 save_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, num_planes);
4518
4519 sb_fp_stats->rd_count = cpi->td.rd_counts;
4520 sb_fp_stats->split_count = cpi->td.mb.txb_split_count;
4521
4522 sb_fp_stats->fc = *td->counts;
4523
4524 memcpy(sb_fp_stats->inter_mode_rd_models, tile_data->inter_mode_rd_models,
4525 sizeof(sb_fp_stats->inter_mode_rd_models));
4526
4527 memcpy(sb_fp_stats->thresh_freq_fact, x->thresh_freq_fact,
4528 sizeof(sb_fp_stats->thresh_freq_fact));
4529
4530 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
4531 sb_fp_stats->current_qindex =
4532 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
4533
4534 #if CONFIG_INTERNAL_STATS
4535 memcpy(sb_fp_stats->mode_chosen_counts, cpi->mode_chosen_counts,
4536 sizeof(sb_fp_stats->mode_chosen_counts));
4537 #endif // CONFIG_INTERNAL_STATS
4538 }
4539
restore_sb_state(const SB_FIRST_PASS_STATS * sb_fp_stats,AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,int mi_col)4540 static INLINE void restore_sb_state(const SB_FIRST_PASS_STATS *sb_fp_stats,
4541 AV1_COMP *cpi, ThreadData *td,
4542 TileDataEnc *tile_data, int mi_row,
4543 int mi_col) {
4544 MACROBLOCK *x = &td->mb;
4545
4546 const AV1_COMMON *cm = &cpi->common;
4547 const int num_planes = av1_num_planes(cm);
4548 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4549
4550 restore_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, num_planes);
4551
4552 cpi->td.rd_counts = sb_fp_stats->rd_count;
4553 cpi->td.mb.txb_split_count = sb_fp_stats->split_count;
4554
4555 *td->counts = sb_fp_stats->fc;
4556
4557 memcpy(tile_data->inter_mode_rd_models, sb_fp_stats->inter_mode_rd_models,
4558 sizeof(sb_fp_stats->inter_mode_rd_models));
4559 memcpy(x->thresh_freq_fact, sb_fp_stats->thresh_freq_fact,
4560 sizeof(sb_fp_stats->thresh_freq_fact));
4561
4562 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
4563 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
4564 sb_fp_stats->current_qindex;
4565
4566 #if CONFIG_INTERNAL_STATS
4567 memcpy(cpi->mode_chosen_counts, sb_fp_stats->mode_chosen_counts,
4568 sizeof(sb_fp_stats->mode_chosen_counts));
4569 #endif // CONFIG_INTERNAL_STATS
4570 }
4571
4572 #if !CONFIG_REALTIME_ONLY
init_ref_frame_space(AV1_COMP * cpi,ThreadData * td,int mi_row,int mi_col)4573 static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
4574 int mi_col) {
4575 const AV1_COMMON *cm = &cpi->common;
4576 const CommonModeInfoParams *const mi_params = &cm->mi_params;
4577 MACROBLOCK *x = &td->mb;
4578 const int frame_idx = cpi->gf_group.index;
4579 TplParams *const tpl_data = &cpi->tpl_data;
4580 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
4581 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
4582
4583 av1_zero(x->search_ref_frame);
4584
4585 if (tpl_frame->is_valid == 0) return;
4586 if (!is_frame_tpl_eligible(cpi)) return;
4587 if (frame_idx >= MAX_LAG_BUFFERS) return;
4588 if (cpi->superres_mode != SUPERRES_NONE) return;
4589 if (cpi->oxcf.aq_mode != NO_AQ) return;
4590
4591 const int is_overlay = cpi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
4592 if (is_overlay) {
4593 memset(x->search_ref_frame, 1, sizeof(x->search_ref_frame));
4594 return;
4595 }
4596
4597 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
4598 const int tpl_stride = tpl_frame->stride;
4599 int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
4600 const int step = 1 << block_mis_log2;
4601 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4602 const int mi_row_end =
4603 AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
4604 const int mi_col_end =
4605 AOMMIN(mi_size_wide[sb_size] + mi_col, mi_params->mi_cols);
4606
4607 for (int row = mi_row; row < mi_row_end; row += step) {
4608 for (int col = mi_col; col < mi_col_end; col += step) {
4609 const TplDepStats *this_stats =
4610 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
4611 int64_t tpl_pred_error[INTER_REFS_PER_FRAME] = { 0 };
4612 // Find the winner ref frame idx for the current block
4613 int64_t best_inter_cost = this_stats->pred_error[0];
4614 int best_rf_idx = 0;
4615 for (int idx = 1; idx < INTER_REFS_PER_FRAME; ++idx) {
4616 if ((this_stats->pred_error[idx] < best_inter_cost) &&
4617 (this_stats->pred_error[idx] != 0)) {
4618 best_inter_cost = this_stats->pred_error[idx];
4619 best_rf_idx = idx;
4620 }
4621 }
4622 // tpl_pred_error is the pred_error reduction of best_ref w.r.t.
4623 // LAST_FRAME.
4624 tpl_pred_error[best_rf_idx] = this_stats->pred_error[best_rf_idx] -
4625 this_stats->pred_error[LAST_FRAME - 1];
4626
4627 for (int rf_idx = 1; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx)
4628 inter_cost[rf_idx] += tpl_pred_error[rf_idx];
4629 }
4630 }
4631
4632 int rank_index[INTER_REFS_PER_FRAME - 1];
4633 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
4634 rank_index[idx] = idx + 1;
4635 for (int i = idx; i > 0; --i) {
4636 if (inter_cost[rank_index[i - 1]] > inter_cost[rank_index[i]]) {
4637 const int tmp = rank_index[i - 1];
4638 rank_index[i - 1] = rank_index[i];
4639 rank_index[i] = tmp;
4640 }
4641 }
4642 }
4643
4644 x->search_ref_frame[INTRA_FRAME] = 1;
4645 x->search_ref_frame[LAST_FRAME] = 1;
4646
4647 int cutoff_ref = 0;
4648 for (int idx = 0; idx < INTER_REFS_PER_FRAME - 1; ++idx) {
4649 x->search_ref_frame[rank_index[idx] + LAST_FRAME] = 1;
4650 if (idx > 2) {
4651 if (!cutoff_ref) {
4652 // If the predictive coding gains are smaller than the previous more
4653 // relevant frame over certain amount, discard this frame and all the
4654 // frames afterwards.
4655 if (llabs(inter_cost[rank_index[idx]]) <
4656 llabs(inter_cost[rank_index[idx - 1]]) / 8 ||
4657 inter_cost[rank_index[idx]] == 0)
4658 cutoff_ref = 1;
4659 }
4660
4661 if (cutoff_ref) x->search_ref_frame[rank_index[idx] + LAST_FRAME] = 0;
4662 }
4663 }
4664 }
4665 #endif // !CONFIG_REALTIME_ONLY
4666
4667 // This function initializes the stats for encode_rd_sb.
init_encode_rd_sb(AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,PC_TREE * pc_root,RD_STATS * rd_cost,int mi_row,int mi_col,int gather_tpl_data)4668 static INLINE void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
4669 const TileDataEnc *tile_data,
4670 PC_TREE *pc_root, RD_STATS *rd_cost,
4671 int mi_row, int mi_col,
4672 int gather_tpl_data) {
4673 const AV1_COMMON *cm = &cpi->common;
4674 const TileInfo *tile_info = &tile_data->tile_info;
4675 MACROBLOCK *x = &td->mb;
4676
4677 const SPEED_FEATURES *sf = &cpi->sf;
4678 const int use_simple_motion_search =
4679 (sf->part_sf.simple_motion_search_split ||
4680 sf->part_sf.simple_motion_search_prune_rect ||
4681 sf->part_sf.simple_motion_search_early_term_none ||
4682 sf->part_sf.ml_early_term_after_part_split_level) &&
4683 !frame_is_intra_only(cm);
4684 if (use_simple_motion_search) {
4685 init_simple_motion_search_mvs(pc_root);
4686 }
4687
4688 #if !CONFIG_REALTIME_ONLY
4689 init_ref_frame_space(cpi, td, mi_row, mi_col);
4690 x->sb_energy_level = 0;
4691 x->cnn_output_valid = 0;
4692 if (gather_tpl_data) {
4693 if (cm->delta_q_info.delta_q_present_flag) {
4694 const int num_planes = av1_num_planes(cm);
4695 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4696 setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
4697 av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
4698 }
4699 if (cpi->oxcf.enable_tpl_model) {
4700 adjust_rdmult_tpl_model(cpi, x, mi_row, mi_col);
4701 }
4702 }
4703 #else
4704 (void)tile_info;
4705 (void)mi_row;
4706 (void)mi_col;
4707 (void)gather_tpl_data;
4708 #endif
4709
4710 // Reset hash state for transform/mode rd hash information
4711 reset_hash_records(x, cpi->sf.tx_sf.use_inter_txb_hash);
4712 av1_zero(x->picked_ref_frames_mask);
4713 av1_zero(x->pred_mv);
4714 av1_invalid_rd_stats(rd_cost);
4715 }
4716
encode_rd_sb(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,PC_TREE * const pc_root,TOKENEXTRA ** tp,const int mi_row,const int mi_col,const int seg_skip)4717 static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
4718 TileDataEnc *tile_data,
4719 PC_TREE *const pc_root, TOKENEXTRA **tp,
4720 const int mi_row, const int mi_col,
4721 const int seg_skip) {
4722 AV1_COMMON *const cm = &cpi->common;
4723 MACROBLOCK *const x = &td->mb;
4724 const SPEED_FEATURES *const sf = &cpi->sf;
4725 const TileInfo *const tile_info = &tile_data->tile_info;
4726 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
4727 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
4728 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4729 int dummy_rate;
4730 int64_t dummy_dist;
4731 RD_STATS dummy_rdc;
4732
4733 #if CONFIG_REALTIME_ONLY
4734 (void)seg_skip;
4735 #endif // CONFIG_REALTIME_ONLY
4736
4737 init_encode_rd_sb(cpi, td, tile_data, pc_root, &dummy_rdc, mi_row, mi_col, 1);
4738
4739 if (sf->part_sf.partition_search_type == VAR_BASED_PARTITION) {
4740 set_offsets_without_segment_id(cpi, tile_info, x, mi_row, mi_col, sb_size);
4741 av1_choose_var_based_partitioning(cpi, tile_info, td, x, mi_row, mi_col);
4742 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4743 &dummy_rate, &dummy_dist, 1, pc_root);
4744 }
4745 #if !CONFIG_REALTIME_ONLY
4746 else if (sf->part_sf.partition_search_type == FIXED_PARTITION || seg_skip) {
4747 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4748 const BLOCK_SIZE bsize =
4749 seg_skip ? sb_size : sf->part_sf.always_this_block_size;
4750 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4751 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4752 &dummy_rate, &dummy_dist, 1, pc_root);
4753 } else if (cpi->partition_search_skippable_frame) {
4754 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4755 const BLOCK_SIZE bsize =
4756 get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
4757 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4758 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4759 &dummy_rate, &dummy_dist, 1, pc_root);
4760 } else {
4761 // No stats for overlay frames. Exclude key frame.
4762 x->valid_cost_b =
4763 get_tpl_stats_b(cpi, sb_size, mi_row, mi_col, x->intra_cost_b,
4764 x->inter_cost_b, x->mv_b, &x->cost_stride);
4765
4766 reset_partition(pc_root, sb_size);
4767
4768 #if CONFIG_COLLECT_COMPONENT_TIMING
4769 start_timing(cpi, rd_pick_partition_time);
4770 #endif
4771 BLOCK_SIZE max_sq_size = x->max_partition_size;
4772 BLOCK_SIZE min_sq_size = x->min_partition_size;
4773
4774 if (use_auto_max_partition(cpi, sb_size, mi_row, mi_col)) {
4775 float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f };
4776
4777 av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features);
4778 max_sq_size = AOMMAX(
4779 AOMMIN(av1_predict_max_partition(cpi, x, features), max_sq_size),
4780 min_sq_size);
4781 }
4782
4783 const int num_passes = cpi->oxcf.sb_multipass_unit_test ? 2 : 1;
4784
4785 if (num_passes == 1) {
4786 rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4787 max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
4788 pc_root, NULL, SB_SINGLE_PASS, NULL);
4789 } else {
4790 // First pass
4791 SB_FIRST_PASS_STATS sb_fp_stats;
4792 backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
4793 rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4794 max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
4795 pc_root, NULL, SB_DRY_PASS, NULL);
4796
4797 // Second pass
4798 init_encode_rd_sb(cpi, td, tile_data, pc_root, &dummy_rdc, mi_row, mi_col,
4799 0);
4800 reset_mbmi(&cm->mi_params, sb_size, mi_row, mi_col);
4801 reset_partition(pc_root, sb_size);
4802
4803 restore_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
4804
4805 rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4806 max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
4807 pc_root, NULL, SB_WET_PASS, NULL);
4808 }
4809 // Reset to 0 so that it wouldn't be used elsewhere mistakenly.
4810 x->valid_cost_b = 0;
4811 #if CONFIG_COLLECT_COMPONENT_TIMING
4812 end_timing(cpi, rd_pick_partition_time);
4813 #endif
4814 }
4815 #endif // !CONFIG_REALTIME_ONLY
4816
4817 // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
4818 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 &&
4819 cm->tiles.cols == 1 && cm->tiles.rows == 1) {
4820 av1_inter_mode_data_fit(tile_data, x->rdmult);
4821 }
4822 }
4823
set_cost_upd_freq(AV1_COMP * cpi,ThreadData * td,const TileInfo * const tile_info,const int mi_row,const int mi_col)4824 static AOM_INLINE void set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td,
4825 const TileInfo *const tile_info,
4826 const int mi_row, const int mi_col) {
4827 AV1_COMMON *const cm = &cpi->common;
4828 const int num_planes = av1_num_planes(cm);
4829 MACROBLOCK *const x = &td->mb;
4830 MACROBLOCKD *const xd = &x->e_mbd;
4831
4832 switch (cpi->oxcf.coeff_cost_upd_freq) {
4833 case COST_UPD_TILE: // Tile level
4834 if (mi_row != tile_info->mi_row_start) break;
4835 AOM_FALLTHROUGH_INTENDED;
4836 case COST_UPD_SBROW: // SB row level in tile
4837 if (mi_col != tile_info->mi_col_start) break;
4838 AOM_FALLTHROUGH_INTENDED;
4839 case COST_UPD_SB: // SB level
4840 if (cpi->sf.inter_sf.disable_sb_level_coeff_cost_upd &&
4841 mi_col != tile_info->mi_col_start)
4842 break;
4843 av1_fill_coeff_costs(&td->mb, xd->tile_ctx, num_planes);
4844 break;
4845 default: assert(0);
4846 }
4847
4848 switch (cpi->oxcf.mode_cost_upd_freq) {
4849 case COST_UPD_TILE: // Tile level
4850 if (mi_row != tile_info->mi_row_start) break;
4851 AOM_FALLTHROUGH_INTENDED;
4852 case COST_UPD_SBROW: // SB row level in tile
4853 if (mi_col != tile_info->mi_col_start) break;
4854 AOM_FALLTHROUGH_INTENDED;
4855 case COST_UPD_SB: // SB level
4856 av1_fill_mode_rates(cm, x, xd->tile_ctx);
4857 break;
4858 default: assert(0);
4859 }
4860 switch (cpi->oxcf.mv_cost_upd_freq) {
4861 case COST_UPD_OFF: break;
4862 case COST_UPD_TILE: // Tile level
4863 if (mi_row != tile_info->mi_row_start) break;
4864 AOM_FALLTHROUGH_INTENDED;
4865 case COST_UPD_SBROW: // SB row level in tile
4866 if (mi_col != tile_info->mi_col_start) break;
4867 AOM_FALLTHROUGH_INTENDED;
4868 case COST_UPD_SB: // SB level
4869 if (cpi->sf.inter_sf.disable_sb_level_mv_cost_upd &&
4870 mi_col != tile_info->mi_col_start)
4871 break;
4872 av1_fill_mv_costs(xd->tile_ctx, cm->features.cur_frame_force_integer_mv,
4873 cm->features.allow_high_precision_mv, x);
4874 break;
4875 default: assert(0);
4876 }
4877 }
4878
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp)4879 static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
4880 TileDataEnc *tile_data, int mi_row,
4881 TOKENEXTRA **tp) {
4882 AV1_COMMON *const cm = &cpi->common;
4883 const TileInfo *const tile_info = &tile_data->tile_info;
4884 MACROBLOCK *const x = &td->mb;
4885 MACROBLOCKD *const xd = &x->e_mbd;
4886 const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_data->tile_info);
4887 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4888 const int mib_size = cm->seq_params.mib_size;
4889 const int mib_size_log2 = cm->seq_params.mib_size_log2;
4890 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
4891 const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
4892
4893 #if CONFIG_COLLECT_COMPONENT_TIMING
4894 start_timing(cpi, encode_sb_time);
4895 #endif
4896
4897 // Initialize the left context for the new SB row
4898 av1_zero_left_context(xd);
4899
4900 // Reset delta for every tile
4901 if (mi_row == tile_info->mi_row_start || cpi->row_mt) {
4902 if (cm->delta_q_info.delta_q_present_flag)
4903 xd->current_qindex = cm->quant_params.base_qindex;
4904 if (cm->delta_q_info.delta_lf_present_flag) {
4905 av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
4906 }
4907 }
4908 reset_thresh_freq_fact(x);
4909
4910 // Code each SB in the row
4911 for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
4912 mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
4913 (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
4914 sb_col_in_tile);
4915 if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
4916 (tile_info->mi_row_start != mi_row)) {
4917 if ((tile_info->mi_col_start == mi_col)) {
4918 // restore frame context of 1st column sb
4919 memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
4920 } else {
4921 int wt_left = AVG_CDF_WEIGHT_LEFT;
4922 int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
4923 if (tile_info->mi_col_end > (mi_col + mib_size))
4924 avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile, wt_left,
4925 wt_tr);
4926 else
4927 avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
4928 wt_left, wt_tr);
4929 }
4930 }
4931
4932 set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
4933
4934 x->color_sensitivity[0] = 0;
4935 x->color_sensitivity[1] = 0;
4936 x->content_state_sb = 0;
4937
4938 PC_TREE *const pc_root = td->pc_root;
4939 pc_root->index = 0;
4940
4941 xd->cur_frame_force_integer_mv = cm->features.cur_frame_force_integer_mv;
4942 td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
4943 x->source_variance = UINT_MAX;
4944 x->simple_motion_pred_sse = UINT_MAX;
4945
4946 const struct segmentation *const seg = &cm->seg;
4947 int seg_skip = 0;
4948 if (seg->enabled) {
4949 const uint8_t *const map =
4950 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
4951 const int segment_id =
4952 map ? get_segment_id(&cm->mi_params, map, sb_size, mi_row, mi_col)
4953 : 0;
4954 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
4955 }
4956
4957 if (use_nonrd_mode) {
4958 encode_nonrd_sb(cpi, td, tile_data, pc_root, tp, mi_row, mi_col,
4959 seg_skip);
4960 } else {
4961 encode_rd_sb(cpi, td, tile_data, pc_root, tp, mi_row, mi_col, seg_skip);
4962 }
4963
4964 if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
4965 (tile_info->mi_row_end > (mi_row + mib_size))) {
4966 if (sb_cols_in_tile == 1)
4967 memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
4968 else if (sb_col_in_tile >= 1)
4969 memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
4970 sizeof(*xd->tile_ctx));
4971 }
4972 (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
4973 sb_col_in_tile, sb_cols_in_tile);
4974 }
4975 #if CONFIG_COLLECT_COMPONENT_TIMING
4976 end_timing(cpi, encode_sb_time);
4977 #endif
4978 }
4979
init_encode_frame_mb_context(AV1_COMP * cpi)4980 static AOM_INLINE void init_encode_frame_mb_context(AV1_COMP *cpi) {
4981 AV1_COMMON *const cm = &cpi->common;
4982 const int num_planes = av1_num_planes(cm);
4983 MACROBLOCK *const x = &cpi->td.mb;
4984 MACROBLOCKD *const xd = &x->e_mbd;
4985
4986 // Copy data over into macro block data structures.
4987 av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
4988 cm->seq_params.sb_size);
4989
4990 av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
4991 cm->seq_params.subsampling_y, num_planes);
4992 }
4993
av1_alloc_tile_data(AV1_COMP * cpi)4994 void av1_alloc_tile_data(AV1_COMP *cpi) {
4995 AV1_COMMON *const cm = &cpi->common;
4996 const int tile_cols = cm->tiles.cols;
4997 const int tile_rows = cm->tiles.rows;
4998
4999 if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
5000 CHECK_MEM_ERROR(
5001 cm, cpi->tile_data,
5002 aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
5003
5004 cpi->allocated_tiles = tile_cols * tile_rows;
5005 }
5006
av1_init_tile_data(AV1_COMP * cpi)5007 void av1_init_tile_data(AV1_COMP *cpi) {
5008 AV1_COMMON *const cm = &cpi->common;
5009 const int num_planes = av1_num_planes(cm);
5010 const int tile_cols = cm->tiles.cols;
5011 const int tile_rows = cm->tiles.rows;
5012 int tile_col, tile_row;
5013 TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
5014 TOKENLIST *tplist = cpi->tplist[0][0];
5015 unsigned int tile_tok = 0;
5016 int tplist_count = 0;
5017
5018 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
5019 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
5020 TileDataEnc *const tile_data =
5021 &cpi->tile_data[tile_row * tile_cols + tile_col];
5022 TileInfo *const tile_info = &tile_data->tile_info;
5023 av1_tile_init(tile_info, cm, tile_row, tile_col);
5024
5025 cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
5026 pre_tok = cpi->tile_tok[tile_row][tile_col];
5027 tile_tok = allocated_tokens(
5028 *tile_info, cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
5029 cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
5030 tplist = cpi->tplist[tile_row][tile_col];
5031 tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
5032 tile_data->allow_update_cdf = !cm->tiles.large_scale;
5033 tile_data->allow_update_cdf =
5034 tile_data->allow_update_cdf && !cm->features.disable_cdf_update;
5035 tile_data->tctx = *cm->fc;
5036 }
5037 }
5038 }
5039
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)5040 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
5041 int tile_col, int mi_row) {
5042 AV1_COMMON *const cm = &cpi->common;
5043 const int num_planes = av1_num_planes(cm);
5044 const int tile_cols = cm->tiles.cols;
5045 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
5046 const TileInfo *const tile_info = &this_tile->tile_info;
5047 TOKENEXTRA *tok = NULL;
5048 const int sb_row_in_tile =
5049 (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2;
5050 const int tile_mb_cols =
5051 (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
5052 const int num_mb_rows_in_sb =
5053 ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
5054
5055 get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
5056 cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
5057 cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok;
5058
5059 encode_sb_row(cpi, td, this_tile, mi_row, &tok);
5060
5061 cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok;
5062 cpi->tplist[tile_row][tile_col][sb_row_in_tile].count =
5063 (unsigned int)(cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop -
5064 cpi->tplist[tile_row][tile_col][sb_row_in_tile].start);
5065
5066 assert(
5067 (unsigned int)(tok -
5068 cpi->tplist[tile_row][tile_col][sb_row_in_tile].start) <=
5069 get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
5070 cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes));
5071
5072 (void)tile_mb_cols;
5073 (void)num_mb_rows_in_sb;
5074 }
5075
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)5076 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
5077 int tile_col) {
5078 AV1_COMMON *const cm = &cpi->common;
5079 TileDataEnc *const this_tile =
5080 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
5081 const TileInfo *const tile_info = &this_tile->tile_info;
5082
5083 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) av1_inter_mode_data_init(this_tile);
5084
5085 av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
5086 tile_info->mi_col_end, tile_row);
5087 av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
5088 &td->mb.e_mbd);
5089
5090 if (cpi->oxcf.enable_cfl_intra) cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params);
5091
5092 av1_crc32c_calculator_init(&td->mb.mb_rd_record.crc_calculator);
5093
5094 for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
5095 mi_row += cm->seq_params.mib_size) {
5096 av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
5097 }
5098 }
5099
encode_tiles(AV1_COMP * cpi)5100 static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
5101 AV1_COMMON *const cm = &cpi->common;
5102 const int tile_cols = cm->tiles.cols;
5103 const int tile_rows = cm->tiles.rows;
5104 int tile_col, tile_row;
5105
5106 if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
5107 av1_alloc_tile_data(cpi);
5108
5109 av1_init_tile_data(cpi);
5110
5111 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
5112 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
5113 TileDataEnc *const this_tile =
5114 &cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
5115 cpi->td.intrabc_used = 0;
5116 cpi->td.deltaq_used = 0;
5117 cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
5118 cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
5119 av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
5120 cpi->intrabc_used |= cpi->td.intrabc_used;
5121 cpi->deltaq_used |= cpi->td.deltaq_used;
5122 }
5123 }
5124 }
5125
5126 #define GLOBAL_TRANS_TYPES_ENC 3 // highest motion model to search
gm_get_params_cost(const WarpedMotionParams * gm,const WarpedMotionParams * ref_gm,int allow_hp)5127 static int gm_get_params_cost(const WarpedMotionParams *gm,
5128 const WarpedMotionParams *ref_gm, int allow_hp) {
5129 int params_cost = 0;
5130 int trans_bits, trans_prec_diff;
5131 switch (gm->wmtype) {
5132 case AFFINE:
5133 case ROTZOOM:
5134 params_cost += aom_count_signed_primitive_refsubexpfin(
5135 GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5136 (ref_gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS),
5137 (gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
5138 params_cost += aom_count_signed_primitive_refsubexpfin(
5139 GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5140 (ref_gm->wmmat[3] >> GM_ALPHA_PREC_DIFF),
5141 (gm->wmmat[3] >> GM_ALPHA_PREC_DIFF));
5142 if (gm->wmtype >= AFFINE) {
5143 params_cost += aom_count_signed_primitive_refsubexpfin(
5144 GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5145 (ref_gm->wmmat[4] >> GM_ALPHA_PREC_DIFF),
5146 (gm->wmmat[4] >> GM_ALPHA_PREC_DIFF));
5147 params_cost += aom_count_signed_primitive_refsubexpfin(
5148 GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5149 (ref_gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
5150 (1 << GM_ALPHA_PREC_BITS),
5151 (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
5152 }
5153 AOM_FALLTHROUGH_INTENDED;
5154 case TRANSLATION:
5155 trans_bits = (gm->wmtype == TRANSLATION)
5156 ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
5157 : GM_ABS_TRANS_BITS;
5158 trans_prec_diff = (gm->wmtype == TRANSLATION)
5159 ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
5160 : GM_TRANS_PREC_DIFF;
5161 params_cost += aom_count_signed_primitive_refsubexpfin(
5162 (1 << trans_bits) + 1, SUBEXPFIN_K,
5163 (ref_gm->wmmat[0] >> trans_prec_diff),
5164 (gm->wmmat[0] >> trans_prec_diff));
5165 params_cost += aom_count_signed_primitive_refsubexpfin(
5166 (1 << trans_bits) + 1, SUBEXPFIN_K,
5167 (ref_gm->wmmat[1] >> trans_prec_diff),
5168 (gm->wmmat[1] >> trans_prec_diff));
5169 AOM_FALLTHROUGH_INTENDED;
5170 case IDENTITY: break;
5171 default: assert(0);
5172 }
5173 return (params_cost << AV1_PROB_COST_SHIFT);
5174 }
5175
do_gm_search_logic(SPEED_FEATURES * const sf,int frame)5176 static int do_gm_search_logic(SPEED_FEATURES *const sf, int frame) {
5177 (void)frame;
5178 switch (sf->gm_sf.gm_search_type) {
5179 case GM_FULL_SEARCH: return 1;
5180 case GM_REDUCED_REF_SEARCH_SKIP_L2_L3:
5181 return !(frame == LAST2_FRAME || frame == LAST3_FRAME);
5182 case GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2:
5183 return !(frame == LAST2_FRAME || frame == LAST3_FRAME ||
5184 (frame == ALTREF2_FRAME));
5185 case GM_DISABLE_SEARCH: return 0;
5186 default: assert(0);
5187 }
5188 return 1;
5189 }
5190
5191 // Set the relative distance of a reference frame w.r.t. current frame
set_rel_frame_dist(AV1_COMP * cpi)5192 static AOM_INLINE void set_rel_frame_dist(AV1_COMP *cpi) {
5193 const AV1_COMMON *const cm = &cpi->common;
5194 const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
5195 MV_REFERENCE_FRAME ref_frame;
5196 int min_past_dist = INT32_MAX, min_future_dist = INT32_MAX;
5197 cpi->nearest_past_ref = NONE_FRAME;
5198 cpi->nearest_future_ref = NONE_FRAME;
5199 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
5200 cpi->ref_relative_dist[ref_frame - LAST_FRAME] = 0;
5201 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
5202 int dist = av1_encoder_get_relative_dist(
5203 order_hint_info,
5204 cm->cur_frame->ref_display_order_hint[ref_frame - LAST_FRAME],
5205 cm->current_frame.display_order_hint);
5206 cpi->ref_relative_dist[ref_frame - LAST_FRAME] = dist;
5207 // Get the nearest ref_frame in the past
5208 if (abs(dist) < min_past_dist && dist < 0) {
5209 cpi->nearest_past_ref = ref_frame;
5210 min_past_dist = abs(dist);
5211 }
5212 // Get the nearest ref_frame in the future
5213 if (dist < min_future_dist && dist > 0) {
5214 cpi->nearest_future_ref = ref_frame;
5215 min_future_dist = dist;
5216 }
5217 }
5218 }
5219 }
5220
refs_are_one_sided(const AV1_COMMON * cm)5221 static INLINE int refs_are_one_sided(const AV1_COMMON *cm) {
5222 assert(!frame_is_intra_only(cm));
5223
5224 int one_sided_refs = 1;
5225 for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
5226 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
5227 if (buf == NULL) continue;
5228
5229 const int ref_display_order_hint = buf->display_order_hint;
5230 if (av1_encoder_get_relative_dist(
5231 &cm->seq_params.order_hint_info, ref_display_order_hint,
5232 (int)cm->current_frame.display_order_hint) > 0) {
5233 one_sided_refs = 0; // bwd reference
5234 break;
5235 }
5236 }
5237 return one_sided_refs;
5238 }
5239
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])5240 static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
5241 int ref_order_hint[2]) {
5242 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
5243 ref_order_hint[0] = ref_order_hint[1] = 0;
5244 if (!skip_mode_info->skip_mode_allowed) return;
5245
5246 const RefCntBuffer *const buf_0 =
5247 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
5248 const RefCntBuffer *const buf_1 =
5249 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
5250 assert(buf_0 != NULL && buf_1 != NULL);
5251
5252 ref_order_hint[0] = buf_0->order_hint;
5253 ref_order_hint[1] = buf_1->order_hint;
5254 }
5255
check_skip_mode_enabled(AV1_COMP * const cpi)5256 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
5257 AV1_COMMON *const cm = &cpi->common;
5258
5259 av1_setup_skip_mode_allowed(cm);
5260 if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
5261
5262 // Turn off skip mode if the temporal distances of the reference pair to the
5263 // current frame are different by more than 1 frame.
5264 const int cur_offset = (int)cm->current_frame.order_hint;
5265 int ref_offset[2];
5266 get_skip_mode_ref_offsets(cm, ref_offset);
5267 const int cur_to_ref0 = get_relative_dist(&cm->seq_params.order_hint_info,
5268 cur_offset, ref_offset[0]);
5269 const int cur_to_ref1 = abs(get_relative_dist(&cm->seq_params.order_hint_info,
5270 cur_offset, ref_offset[1]));
5271 if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
5272
5273 // High Latency: Turn off skip mode if all refs are fwd.
5274 if (cpi->all_one_sided_refs && cpi->oxcf.lag_in_frames > 0) return 0;
5275
5276 static const int flag_list[REF_FRAMES] = { 0,
5277 AOM_LAST_FLAG,
5278 AOM_LAST2_FLAG,
5279 AOM_LAST3_FLAG,
5280 AOM_GOLD_FLAG,
5281 AOM_BWD_FLAG,
5282 AOM_ALT2_FLAG,
5283 AOM_ALT_FLAG };
5284 const int ref_frame[2] = {
5285 cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
5286 cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
5287 };
5288 if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) ||
5289 !(cpi->ref_frame_flags & flag_list[ref_frame[1]]))
5290 return 0;
5291
5292 return 1;
5293 }
5294
5295 // Function to decide if we can skip the global motion parameter computation
5296 // for a particular ref frame
skip_gm_frame(AV1_COMMON * const cm,int ref_frame)5297 static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) {
5298 if ((ref_frame == LAST3_FRAME || ref_frame == LAST2_FRAME) &&
5299 cm->global_motion[GOLDEN_FRAME].wmtype != IDENTITY) {
5300 return get_relative_dist(
5301 &cm->seq_params.order_hint_info,
5302 cm->cur_frame->ref_order_hints[ref_frame - LAST_FRAME],
5303 cm->cur_frame->ref_order_hints[GOLDEN_FRAME - LAST_FRAME]) <= 0;
5304 }
5305 return 0;
5306 }
5307
set_default_interp_skip_flags(const AV1_COMMON * cm,InterpSearchFlags * interp_search_flags)5308 static AOM_INLINE void set_default_interp_skip_flags(
5309 const AV1_COMMON *cm, InterpSearchFlags *interp_search_flags) {
5310 const int num_planes = av1_num_planes(cm);
5311 interp_search_flags->default_interp_skip_flags =
5312 (num_planes == 1) ? INTERP_SKIP_LUMA_EVAL_CHROMA
5313 : INTERP_SKIP_LUMA_SKIP_CHROMA;
5314 }
5315
5316 // TODO(Remya): Can include erroradv_prod_tr[] for threshold calculation
calc_erroradv_threshold(AV1_COMP * cpi,int64_t ref_frame_error)5317 static INLINE int64_t calc_erroradv_threshold(AV1_COMP *cpi,
5318 int64_t ref_frame_error) {
5319 if (!cpi->sf.gm_sf.disable_adaptive_warp_error_thresh)
5320 return (int64_t)(
5321 ref_frame_error * erroradv_tr[cpi->sf.gm_sf.gm_erroradv_type] + 0.5);
5322 else
5323 return INT64_MAX;
5324 }
5325
compute_global_motion_for_ref_frame(AV1_COMP * cpi,YV12_BUFFER_CONFIG * ref_buf[REF_FRAMES],int frame,int * num_frm_corners,int * frm_corners,unsigned char * frm_buffer,MotionModel * params_by_motion,uint8_t * segment_map,const int segment_map_w,const int segment_map_h,const WarpedMotionParams * ref_params)5326 static void compute_global_motion_for_ref_frame(
5327 AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES], int frame,
5328 int *num_frm_corners, int *frm_corners, unsigned char *frm_buffer,
5329 MotionModel *params_by_motion, uint8_t *segment_map,
5330 const int segment_map_w, const int segment_map_h,
5331 const WarpedMotionParams *ref_params) {
5332 ThreadData *const td = &cpi->td;
5333 MACROBLOCK *const x = &td->mb;
5334 AV1_COMMON *const cm = &cpi->common;
5335 MACROBLOCKD *const xd = &x->e_mbd;
5336 int i;
5337 // clang-format off
5338 static const double kIdentityParams[MAX_PARAMDIM - 1] = {
5339 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
5340 };
5341 // clang-format on
5342 WarpedMotionParams tmp_wm_params;
5343 const double *params_this_motion;
5344 int inliers_by_motion[RANSAC_NUM_MOTIONS];
5345 assert(ref_buf[frame] != NULL);
5346 if (*num_frm_corners < 0) {
5347 // compute interest points using FAST features
5348 *num_frm_corners = av1_fast_corner_detect(
5349 frm_buffer, cpi->source->y_width, cpi->source->y_height,
5350 cpi->source->y_stride, frm_corners, MAX_CORNERS);
5351 }
5352 TransformationType model;
5353
5354 aom_clear_system_state();
5355
5356 // TODO(sarahparker, debargha): Explore do_adaptive_gm_estimation = 1
5357 const int do_adaptive_gm_estimation = 0;
5358
5359 const int ref_frame_dist = get_relative_dist(
5360 &cm->seq_params.order_hint_info, cm->current_frame.order_hint,
5361 cm->cur_frame->ref_order_hints[frame - LAST_FRAME]);
5362 const GlobalMotionEstimationType gm_estimation_type =
5363 cm->seq_params.order_hint_info.enable_order_hint &&
5364 abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation
5365 ? GLOBAL_MOTION_DISFLOW_BASED
5366 : GLOBAL_MOTION_FEATURE_BASED;
5367 for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
5368 int64_t best_warp_error = INT64_MAX;
5369 // Initially set all params to identity.
5370 for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
5371 memcpy(params_by_motion[i].params, kIdentityParams,
5372 (MAX_PARAMDIM - 1) * sizeof(*(params_by_motion[i].params)));
5373 params_by_motion[i].num_inliers = 0;
5374 }
5375
5376 av1_compute_global_motion(
5377 model, frm_buffer, cpi->source->y_width, cpi->source->y_height,
5378 cpi->source->y_stride, frm_corners, *num_frm_corners, ref_buf[frame],
5379 cpi->common.seq_params.bit_depth, gm_estimation_type, inliers_by_motion,
5380 params_by_motion, RANSAC_NUM_MOTIONS);
5381 int64_t ref_frame_error = 0;
5382 for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
5383 if (inliers_by_motion[i] == 0) continue;
5384
5385 params_this_motion = params_by_motion[i].params;
5386 av1_convert_model_to_params(params_this_motion, &tmp_wm_params);
5387
5388 if (tmp_wm_params.wmtype != IDENTITY) {
5389 av1_compute_feature_segmentation_map(
5390 segment_map, segment_map_w, segment_map_h,
5391 params_by_motion[i].inliers, params_by_motion[i].num_inliers);
5392
5393 ref_frame_error = av1_segmented_frame_error(
5394 is_cur_buf_hbd(xd), xd->bd, ref_buf[frame]->y_buffer,
5395 ref_buf[frame]->y_stride, cpi->source->y_buffer,
5396 cpi->source->y_width, cpi->source->y_height, cpi->source->y_stride,
5397 segment_map, segment_map_w);
5398
5399 int64_t erroradv_threshold =
5400 calc_erroradv_threshold(cpi, ref_frame_error);
5401
5402 const int64_t warp_error = av1_refine_integerized_param(
5403 &tmp_wm_params, tmp_wm_params.wmtype, is_cur_buf_hbd(xd), xd->bd,
5404 ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
5405 ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
5406 cpi->source->y_buffer, cpi->source->y_width, cpi->source->y_height,
5407 cpi->source->y_stride, GM_REFINEMENT_COUNT, best_warp_error,
5408 segment_map, segment_map_w, erroradv_threshold);
5409
5410 if (warp_error < best_warp_error) {
5411 best_warp_error = warp_error;
5412 // Save the wm_params modified by
5413 // av1_refine_integerized_param() rather than motion index to
5414 // avoid rerunning refine() below.
5415 memcpy(&(cm->global_motion[frame]), &tmp_wm_params,
5416 sizeof(WarpedMotionParams));
5417 }
5418 }
5419 }
5420 if (cm->global_motion[frame].wmtype <= AFFINE)
5421 if (!av1_get_shear_params(&cm->global_motion[frame]))
5422 cm->global_motion[frame] = default_warp_params;
5423
5424 if (cm->global_motion[frame].wmtype == TRANSLATION) {
5425 cm->global_motion[frame].wmmat[0] =
5426 convert_to_trans_prec(cm->features.allow_high_precision_mv,
5427 cm->global_motion[frame].wmmat[0]) *
5428 GM_TRANS_ONLY_DECODE_FACTOR;
5429 cm->global_motion[frame].wmmat[1] =
5430 convert_to_trans_prec(cm->features.allow_high_precision_mv,
5431 cm->global_motion[frame].wmmat[1]) *
5432 GM_TRANS_ONLY_DECODE_FACTOR;
5433 }
5434
5435 if (cm->global_motion[frame].wmtype == IDENTITY) continue;
5436
5437 if (ref_frame_error == 0) continue;
5438
5439 // If the best error advantage found doesn't meet the threshold for
5440 // this motion type, revert to IDENTITY.
5441 if (!av1_is_enough_erroradvantage(
5442 (double)best_warp_error / ref_frame_error,
5443 gm_get_params_cost(&cm->global_motion[frame], ref_params,
5444 cm->features.allow_high_precision_mv),
5445 cpi->sf.gm_sf.gm_erroradv_type)) {
5446 cm->global_motion[frame] = default_warp_params;
5447 }
5448
5449 if (cm->global_motion[frame].wmtype != IDENTITY) break;
5450 }
5451
5452 aom_clear_system_state();
5453 }
5454
5455 typedef struct {
5456 int distance;
5457 MV_REFERENCE_FRAME frame;
5458 } FrameDistPair;
5459
update_valid_ref_frames_for_gm(AV1_COMP * cpi,YV12_BUFFER_CONFIG * ref_buf[REF_FRAMES],FrameDistPair * past_ref_frame,FrameDistPair * future_ref_frame,int * num_past_ref_frames,int * num_future_ref_frames)5460 static INLINE void update_valid_ref_frames_for_gm(
5461 AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES],
5462 FrameDistPair *past_ref_frame, FrameDistPair *future_ref_frame,
5463 int *num_past_ref_frames, int *num_future_ref_frames) {
5464 AV1_COMMON *const cm = &cpi->common;
5465 const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
5466 for (int frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) {
5467 const MV_REFERENCE_FRAME ref_frame[2] = { frame, NONE_FRAME };
5468 RefCntBuffer *buf = get_ref_frame_buf(cm, frame);
5469 const int ref_disabled =
5470 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[frame]);
5471 ref_buf[frame] = NULL;
5472 cm->global_motion[frame] = default_warp_params;
5473 // Skip global motion estimation for invalid ref frames
5474 if (buf == NULL ||
5475 (ref_disabled && cpi->sf.hl_sf.recode_loop != DISALLOW_RECODE)) {
5476 cpi->gm_info.params_cost[frame] = 0;
5477 continue;
5478 } else {
5479 ref_buf[frame] = &buf->buf;
5480 }
5481
5482 if (ref_buf[frame]->y_crop_width == cpi->source->y_crop_width &&
5483 ref_buf[frame]->y_crop_height == cpi->source->y_crop_height &&
5484 do_gm_search_logic(&cpi->sf, frame) &&
5485 !prune_ref_by_selective_ref_frame(
5486 cpi, NULL, ref_frame, cm->cur_frame->ref_display_order_hint) &&
5487 !(cpi->sf.gm_sf.selective_ref_gm && skip_gm_frame(cm, frame))) {
5488 assert(ref_buf[frame] != NULL);
5489 int relative_frame_dist = av1_encoder_get_relative_dist(
5490 order_hint_info, buf->display_order_hint,
5491 cm->cur_frame->display_order_hint);
5492 // Populate past and future ref frames
5493 if (relative_frame_dist <= 0) {
5494 past_ref_frame[*num_past_ref_frames].distance =
5495 abs(relative_frame_dist);
5496 past_ref_frame[*num_past_ref_frames].frame = frame;
5497 (*num_past_ref_frames)++;
5498 } else {
5499 future_ref_frame[*num_future_ref_frames].distance =
5500 abs(relative_frame_dist);
5501 future_ref_frame[*num_future_ref_frames].frame = frame;
5502 (*num_future_ref_frames)++;
5503 }
5504 }
5505 }
5506 }
5507
compute_gm_for_valid_ref_frames(AV1_COMP * cpi,YV12_BUFFER_CONFIG * ref_buf[REF_FRAMES],int frame,int * num_frm_corners,int * frm_corners,unsigned char * frm_buffer,MotionModel * params_by_motion,uint8_t * segment_map,const int segment_map_w,const int segment_map_h)5508 static INLINE void compute_gm_for_valid_ref_frames(
5509 AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES], int frame,
5510 int *num_frm_corners, int *frm_corners, unsigned char *frm_buffer,
5511 MotionModel *params_by_motion, uint8_t *segment_map,
5512 const int segment_map_w, const int segment_map_h) {
5513 AV1_COMMON *const cm = &cpi->common;
5514 GlobalMotionInfo *const gm_info = &cpi->gm_info;
5515 const WarpedMotionParams *ref_params =
5516 cm->prev_frame ? &cm->prev_frame->global_motion[frame]
5517 : &default_warp_params;
5518
5519 compute_global_motion_for_ref_frame(
5520 cpi, ref_buf, frame, num_frm_corners, frm_corners, frm_buffer,
5521 params_by_motion, segment_map, segment_map_w, segment_map_h, ref_params);
5522
5523 gm_info->params_cost[frame] =
5524 gm_get_params_cost(&cm->global_motion[frame], ref_params,
5525 cm->features.allow_high_precision_mv) +
5526 gm_info->type_cost[cm->global_motion[frame].wmtype] -
5527 gm_info->type_cost[IDENTITY];
5528 }
5529
compare_distance(const void * a,const void * b)5530 static int compare_distance(const void *a, const void *b) {
5531 const int diff =
5532 ((FrameDistPair *)a)->distance - ((FrameDistPair *)b)->distance;
5533 if (diff > 0)
5534 return 1;
5535 else if (diff < 0)
5536 return -1;
5537 return 0;
5538 }
5539
compute_global_motion_for_references(AV1_COMP * cpi,YV12_BUFFER_CONFIG * ref_buf[REF_FRAMES],FrameDistPair reference_frame[REF_FRAMES-1],int num_ref_frames,int * num_frm_corners,int * frm_corners,unsigned char * frm_buffer,MotionModel * params_by_motion,uint8_t * segment_map,const int segment_map_w,const int segment_map_h)5540 static INLINE void compute_global_motion_for_references(
5541 AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES],
5542 FrameDistPair reference_frame[REF_FRAMES - 1], int num_ref_frames,
5543 int *num_frm_corners, int *frm_corners, unsigned char *frm_buffer,
5544 MotionModel *params_by_motion, uint8_t *segment_map,
5545 const int segment_map_w, const int segment_map_h) {
5546 AV1_COMMON *const cm = &cpi->common;
5547 // Compute global motion w.r.t. reference frames starting from the nearest ref
5548 // frame in a given direction
5549 for (int frame = 0; frame < num_ref_frames; frame++) {
5550 int ref_frame = reference_frame[frame].frame;
5551 compute_gm_for_valid_ref_frames(cpi, ref_buf, ref_frame, num_frm_corners,
5552 frm_corners, frm_buffer, params_by_motion,
5553 segment_map, segment_map_w, segment_map_h);
5554 // If global motion w.r.t. current ref frame is
5555 // INVALID/TRANSLATION/IDENTITY, skip the evaluation of global motion w.r.t
5556 // the remaining ref frames in that direction. The below exit is disabled
5557 // when ref frame distance w.r.t. current frame is zero. E.g.:
5558 // source_alt_ref_frame w.r.t. ARF frames
5559 if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search &&
5560 reference_frame[frame].distance != 0 &&
5561 cm->global_motion[ref_frame].wmtype != ROTZOOM)
5562 break;
5563 }
5564 }
5565
setup_prune_ref_frame_mask(AV1_COMP * cpi)5566 static AOM_INLINE void setup_prune_ref_frame_mask(AV1_COMP *cpi) {
5567 if (!cpi->sf.rt_sf.use_nonrd_pick_mode &&
5568 cpi->sf.inter_sf.selective_ref_frame >= 2) {
5569 AV1_COMMON *const cm = &cpi->common;
5570 const OrderHintInfo *const order_hint_info =
5571 &cm->seq_params.order_hint_info;
5572 const int cur_frame_display_order_hint =
5573 cm->current_frame.display_order_hint;
5574 unsigned int *ref_display_order_hint =
5575 cm->cur_frame->ref_display_order_hint;
5576 const int arf2_dist = av1_encoder_get_relative_dist(
5577 order_hint_info, ref_display_order_hint[ALTREF2_FRAME - LAST_FRAME],
5578 cur_frame_display_order_hint);
5579 const int bwd_dist = av1_encoder_get_relative_dist(
5580 order_hint_info, ref_display_order_hint[BWDREF_FRAME - LAST_FRAME],
5581 cur_frame_display_order_hint);
5582
5583 for (int ref_idx = REF_FRAMES; ref_idx < MODE_CTX_REF_FRAMES; ++ref_idx) {
5584 MV_REFERENCE_FRAME rf[2];
5585 av1_set_ref_frame(rf, ref_idx);
5586 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) ||
5587 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]])) {
5588 continue;
5589 }
5590
5591 if (!cpi->all_one_sided_refs) {
5592 int ref_dist[2];
5593 for (int i = 0; i < 2; ++i) {
5594 ref_dist[i] = av1_encoder_get_relative_dist(
5595 order_hint_info, ref_display_order_hint[rf[i] - LAST_FRAME],
5596 cur_frame_display_order_hint);
5597 }
5598
5599 // One-sided compound is used only when all reference frames are
5600 // one-sided.
5601 if ((ref_dist[0] > 0) == (ref_dist[1] > 0)) {
5602 cpi->prune_ref_frame_mask |= 1 << ref_idx;
5603 }
5604 }
5605
5606 if (cpi->sf.inter_sf.selective_ref_frame >= 4 &&
5607 (rf[0] == ALTREF2_FRAME || rf[1] == ALTREF2_FRAME) &&
5608 (cpi->ref_frame_flags & av1_ref_frame_flag_list[BWDREF_FRAME])) {
5609 // Check if both ALTREF2_FRAME and BWDREF_FRAME are future references.
5610 if (arf2_dist > 0 && bwd_dist > 0 && bwd_dist <= arf2_dist) {
5611 // Drop ALTREF2_FRAME as a reference if BWDREF_FRAME is a closer
5612 // reference to the current frame than ALTREF2_FRAME
5613 cpi->prune_ref_frame_mask |= 1 << ref_idx;
5614 }
5615 }
5616 }
5617 }
5618 }
5619
5620 #define CHECK_PRECOMPUTED_REF_FRAME_MAP 0
5621
encode_frame_internal(AV1_COMP * cpi)5622 static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
5623 ThreadData *const td = &cpi->td;
5624 MACROBLOCK *const x = &td->mb;
5625 AV1_COMMON *const cm = &cpi->common;
5626 CommonModeInfoParams *const mi_params = &cm->mi_params;
5627 FeatureFlags *const features = &cm->features;
5628 MACROBLOCKD *const xd = &x->e_mbd;
5629 RD_COUNTS *const rdc = &cpi->td.rd_counts;
5630 GlobalMotionInfo *const gm_info = &cpi->gm_info;
5631 FrameProbInfo *const frame_probs = &cpi->frame_probs;
5632 IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
5633 int i;
5634
5635 if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
5636 mi_params->setup_mi(mi_params);
5637 }
5638
5639 set_mi_offsets(mi_params, xd, 0, 0);
5640
5641 #if CONFIG_AV1_HIGHBITDEPTH
5642 x->fwd_txfm4x4 = aom_fdct4x4;
5643 #else
5644 x->fwd_txfm4x4 = aom_fdct4x4_lp;
5645 #endif
5646
5647 av1_zero(*td->counts);
5648 av1_zero(rdc->comp_pred_diff);
5649 av1_zero(rdc->tx_type_used);
5650 av1_zero(rdc->obmc_used);
5651 av1_zero(rdc->warped_used);
5652
5653 // Reset the flag.
5654 cpi->intrabc_used = 0;
5655 // Need to disable intrabc when superres is selected
5656 if (av1_superres_scaled(cm)) {
5657 features->allow_intrabc = 0;
5658 }
5659
5660 features->allow_intrabc &= (cpi->oxcf.enable_intrabc);
5661
5662 if (features->allow_warped_motion &&
5663 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
5664 const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5665 if (frame_probs->warped_probs[update_type] <
5666 cpi->sf.inter_sf.prune_warped_prob_thresh)
5667 features->allow_warped_motion = 0;
5668 }
5669
5670 int hash_table_created = 0;
5671 if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
5672 !cpi->sf.rt_sf.use_nonrd_pick_mode) {
5673 // TODO(any): move this outside of the recoding loop to avoid recalculating
5674 // the hash table.
5675 // add to hash table
5676 const int pic_width = cpi->source->y_crop_width;
5677 const int pic_height = cpi->source->y_crop_height;
5678 uint32_t *block_hash_values[2][2];
5679 int8_t *is_block_same[2][3];
5680 int k, j;
5681
5682 for (k = 0; k < 2; k++) {
5683 for (j = 0; j < 2; j++) {
5684 CHECK_MEM_ERROR(cm, block_hash_values[k][j],
5685 aom_malloc(sizeof(uint32_t) * pic_width * pic_height));
5686 }
5687
5688 for (j = 0; j < 3; j++) {
5689 CHECK_MEM_ERROR(cm, is_block_same[k][j],
5690 aom_malloc(sizeof(int8_t) * pic_width * pic_height));
5691 }
5692 }
5693
5694 av1_hash_table_init(intrabc_hash_info);
5695 av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table);
5696 hash_table_created = 1;
5697 av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
5698 block_hash_values[0], is_block_same[0]);
5699 // Hash data generated for screen contents is used for intraBC ME
5700 const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
5701 const int max_sb_size =
5702 (1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2));
5703 int src_idx = 0;
5704 for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
5705 const int dst_idx = !src_idx;
5706 av1_generate_block_hash_value(
5707 intrabc_hash_info, cpi->source, size, block_hash_values[src_idx],
5708 block_hash_values[dst_idx], is_block_same[src_idx],
5709 is_block_same[dst_idx]);
5710 if (size >= min_alloc_size) {
5711 av1_add_to_hash_map_by_row_with_precal_data(
5712 &intrabc_hash_info->intrabc_hash_table, block_hash_values[dst_idx],
5713 is_block_same[dst_idx][2], pic_width, pic_height, size);
5714 }
5715 }
5716
5717 for (k = 0; k < 2; k++) {
5718 for (j = 0; j < 2; j++) {
5719 aom_free(block_hash_values[k][j]);
5720 }
5721
5722 for (j = 0; j < 3; j++) {
5723 aom_free(is_block_same[k][j]);
5724 }
5725 }
5726 }
5727
5728 const CommonQuantParams *quant_params = &cm->quant_params;
5729 for (i = 0; i < MAX_SEGMENTS; ++i) {
5730 const int qindex =
5731 cm->seg.enabled ? av1_get_qindex(&cm->seg, i, quant_params->base_qindex)
5732 : quant_params->base_qindex;
5733 xd->lossless[i] =
5734 qindex == 0 && quant_params->y_dc_delta_q == 0 &&
5735 quant_params->u_dc_delta_q == 0 && quant_params->u_ac_delta_q == 0 &&
5736 quant_params->v_dc_delta_q == 0 && quant_params->v_ac_delta_q == 0;
5737 if (xd->lossless[i]) cpi->enc_seg.has_lossless_segment = 1;
5738 xd->qindex[i] = qindex;
5739 if (xd->lossless[i]) {
5740 cpi->optimize_seg_arr[i] = NO_TRELLIS_OPT;
5741 } else {
5742 cpi->optimize_seg_arr[i] = cpi->sf.rd_sf.optimize_coefficients;
5743 }
5744 }
5745 features->coded_lossless = is_coded_lossless(cm, xd);
5746 features->all_lossless = features->coded_lossless && !av1_superres_scaled(cm);
5747
5748 // Fix delta q resolution for the moment
5749 cm->delta_q_info.delta_q_res = 0;
5750 if (cpi->oxcf.deltaq_mode == DELTA_Q_OBJECTIVE)
5751 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_OBJECTIVE;
5752 else if (cpi->oxcf.deltaq_mode == DELTA_Q_PERCEPTUAL)
5753 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES_PERCEPTUAL;
5754 // Set delta_q_present_flag before it is used for the first time
5755 cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
5756 cm->delta_q_info.delta_q_present_flag = cpi->oxcf.deltaq_mode != NO_DELTA_Q;
5757
5758 // Turn off cm->delta_q_info.delta_q_present_flag if objective delta_q is used
5759 // for ineligible frames. That effectively will turn off row_mt usage.
5760 // Note objective delta_q and tpl eligible frames are only altref frames
5761 // currently.
5762 if (cm->delta_q_info.delta_q_present_flag) {
5763 if (cpi->oxcf.deltaq_mode == DELTA_Q_OBJECTIVE &&
5764 !is_frame_tpl_eligible(cpi))
5765 cm->delta_q_info.delta_q_present_flag = 0;
5766 }
5767
5768 // Reset delta_q_used flag
5769 cpi->deltaq_used = 0;
5770
5771 cm->delta_q_info.delta_lf_present_flag =
5772 cm->delta_q_info.delta_q_present_flag && cpi->oxcf.deltalf_mode;
5773 cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
5774
5775 // update delta_q_present_flag and delta_lf_present_flag based on
5776 // base_qindex
5777 cm->delta_q_info.delta_q_present_flag &= quant_params->base_qindex > 0;
5778 cm->delta_q_info.delta_lf_present_flag &= quant_params->base_qindex > 0;
5779
5780 av1_frame_init_quantizer(cpi);
5781 av1_initialize_rd_consts(cpi);
5782 av1_initialize_me_consts(cpi, x, quant_params->base_qindex);
5783
5784 init_encode_frame_mb_context(cpi);
5785 set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
5786 if (cm->prev_frame && cm->prev_frame->seg.enabled)
5787 cm->last_frame_seg_map = cm->prev_frame->seg_map;
5788 else
5789 cm->last_frame_seg_map = NULL;
5790 if (features->allow_intrabc || features->coded_lossless) {
5791 av1_set_default_ref_deltas(cm->lf.ref_deltas);
5792 av1_set_default_mode_deltas(cm->lf.mode_deltas);
5793 } else if (cm->prev_frame) {
5794 memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
5795 memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
5796 }
5797 memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
5798 memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
5799
5800 cpi->all_one_sided_refs =
5801 frame_is_intra_only(cm) ? 0 : refs_are_one_sided(cm);
5802
5803 cpi->prune_ref_frame_mask = 0;
5804 // Figure out which ref frames can be skipped at frame level.
5805 setup_prune_ref_frame_mask(cpi);
5806
5807 x->txb_split_count = 0;
5808 #if CONFIG_SPEED_STATS
5809 x->tx_search_count = 0;
5810 #endif // CONFIG_SPEED_STATS
5811
5812 #if CONFIG_COLLECT_COMPONENT_TIMING
5813 start_timing(cpi, av1_compute_global_motion_time);
5814 #endif
5815 av1_zero(rdc->global_motion_used);
5816 av1_zero(gm_info->params_cost);
5817 if (cpi->common.current_frame.frame_type == INTER_FRAME && cpi->source &&
5818 cpi->oxcf.enable_global_motion && !gm_info->search_done) {
5819 YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES];
5820 MotionModel params_by_motion[RANSAC_NUM_MOTIONS];
5821 for (int m = 0; m < RANSAC_NUM_MOTIONS; m++) {
5822 memset(¶ms_by_motion[m], 0, sizeof(params_by_motion[m]));
5823 params_by_motion[m].inliers =
5824 aom_malloc(sizeof(*(params_by_motion[m].inliers)) * 2 * MAX_CORNERS);
5825 }
5826
5827 int num_frm_corners = -1;
5828 int frm_corners[2 * MAX_CORNERS];
5829 unsigned char *frm_buffer = cpi->source->y_buffer;
5830 if (cpi->source->flags & YV12_FLAG_HIGHBITDEPTH) {
5831 // The frame buffer is 16-bit, so we need to convert to 8 bits for the
5832 // following code. We cache the result until the frame is released.
5833 frm_buffer =
5834 av1_downconvert_frame(cpi->source, cpi->common.seq_params.bit_depth);
5835 }
5836 const int segment_map_w =
5837 (cpi->source->y_width + WARP_ERROR_BLOCK) >> WARP_ERROR_BLOCK_LOG;
5838 const int segment_map_h =
5839 (cpi->source->y_height + WARP_ERROR_BLOCK) >> WARP_ERROR_BLOCK_LOG;
5840
5841 uint8_t *segment_map =
5842 aom_malloc(sizeof(*segment_map) * segment_map_w * segment_map_h);
5843 memset(segment_map, 0,
5844 sizeof(*segment_map) * segment_map_w * segment_map_h);
5845
5846 FrameDistPair future_ref_frame[REF_FRAMES - 1] = {
5847 { -1, NONE_FRAME }, { -1, NONE_FRAME }, { -1, NONE_FRAME },
5848 { -1, NONE_FRAME }, { -1, NONE_FRAME }, { -1, NONE_FRAME },
5849 { -1, NONE_FRAME }
5850 };
5851 FrameDistPair past_ref_frame[REF_FRAMES - 1] = {
5852 { -1, NONE_FRAME }, { -1, NONE_FRAME }, { -1, NONE_FRAME },
5853 { -1, NONE_FRAME }, { -1, NONE_FRAME }, { -1, NONE_FRAME },
5854 { -1, NONE_FRAME }
5855 };
5856 int num_past_ref_frames = 0;
5857 int num_future_ref_frames = 0;
5858 // Populate ref_buf for valid ref frames in global motion
5859 update_valid_ref_frames_for_gm(cpi, ref_buf, past_ref_frame,
5860 future_ref_frame, &num_past_ref_frames,
5861 &num_future_ref_frames);
5862
5863 // Sort the ref frames in the ascending order of their distance from the
5864 // current frame
5865 qsort(past_ref_frame, num_past_ref_frames, sizeof(past_ref_frame[0]),
5866 compare_distance);
5867 qsort(future_ref_frame, num_future_ref_frames, sizeof(future_ref_frame[0]),
5868 compare_distance);
5869
5870 // Compute global motion w.r.t. past reference frames
5871 if (num_past_ref_frames > 0)
5872 compute_global_motion_for_references(
5873 cpi, ref_buf, past_ref_frame, num_past_ref_frames, &num_frm_corners,
5874 frm_corners, frm_buffer, params_by_motion, segment_map, segment_map_w,
5875 segment_map_h);
5876
5877 // Compute global motion w.r.t. future reference frames
5878 if (num_future_ref_frames > 0)
5879 compute_global_motion_for_references(
5880 cpi, ref_buf, future_ref_frame, num_future_ref_frames,
5881 &num_frm_corners, frm_corners, frm_buffer, params_by_motion,
5882 segment_map, segment_map_w, segment_map_h);
5883
5884 aom_free(segment_map);
5885
5886 gm_info->search_done = 1;
5887 for (int m = 0; m < RANSAC_NUM_MOTIONS; m++) {
5888 aom_free(params_by_motion[m].inliers);
5889 }
5890 }
5891 memcpy(cm->cur_frame->global_motion, cm->global_motion,
5892 REF_FRAMES * sizeof(WarpedMotionParams));
5893 #if CONFIG_COLLECT_COMPONENT_TIMING
5894 end_timing(cpi, av1_compute_global_motion_time);
5895 #endif
5896
5897 #if CONFIG_COLLECT_COMPONENT_TIMING
5898 start_timing(cpi, av1_setup_motion_field_time);
5899 #endif
5900 if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
5901 #if CONFIG_COLLECT_COMPONENT_TIMING
5902 end_timing(cpi, av1_setup_motion_field_time);
5903 #endif
5904
5905 cm->current_frame.skip_mode_info.skip_mode_flag =
5906 check_skip_mode_enabled(cpi);
5907
5908 cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read_dummy;
5909 cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write_dummy;
5910 cpi->row_mt = 0;
5911
5912 if (cpi->oxcf.row_mt && (cpi->oxcf.max_threads > 1)) {
5913 cpi->row_mt = 1;
5914 cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read;
5915 cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write;
5916 av1_encode_tiles_row_mt(cpi);
5917 } else {
5918 if (AOMMIN(cpi->oxcf.max_threads, cm->tiles.cols * cm->tiles.rows) > 1)
5919 av1_encode_tiles_mt(cpi);
5920 else
5921 encode_tiles(cpi);
5922 }
5923
5924 // If intrabc is allowed but never selected, reset the allow_intrabc flag.
5925 if (features->allow_intrabc && !cpi->intrabc_used) {
5926 features->allow_intrabc = 0;
5927 }
5928 if (features->allow_intrabc) {
5929 cm->delta_q_info.delta_lf_present_flag = 0;
5930 }
5931
5932 if (cm->delta_q_info.delta_q_present_flag && cpi->deltaq_used == 0) {
5933 cm->delta_q_info.delta_q_present_flag = 0;
5934 }
5935
5936 // Set the transform size appropriately before bitstream creation
5937 const MODE_EVAL_TYPE eval_type =
5938 cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch
5939 ? WINNER_MODE_EVAL
5940 : DEFAULT_EVAL;
5941 const TX_SIZE_SEARCH_METHOD tx_search_type =
5942 cpi->winner_mode_params.tx_size_search_methods[eval_type];
5943 assert(cpi->oxcf.enable_tx64 || tx_search_type != USE_LARGESTALL);
5944 features->tx_mode = select_tx_mode(cm, tx_search_type);
5945
5946 if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats) {
5947 const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5948
5949 for (i = 0; i < TX_SIZES_ALL; i++) {
5950 int sum = 0;
5951 int j;
5952 int left = 1024;
5953
5954 for (j = 0; j < TX_TYPES; j++)
5955 sum += cpi->td.rd_counts.tx_type_used[i][j];
5956
5957 for (j = TX_TYPES - 1; j >= 0; j--) {
5958 const int new_prob =
5959 sum ? 1024 * cpi->td.rd_counts.tx_type_used[i][j] / sum
5960 : (j ? 0 : 1024);
5961 int prob =
5962 (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
5963 left -= prob;
5964 if (j == 0) prob += left;
5965 frame_probs->tx_type_probs[update_type][i][j] = prob;
5966 }
5967 }
5968 }
5969
5970 if (!cpi->sf.inter_sf.disable_obmc &&
5971 cpi->sf.inter_sf.prune_obmc_prob_thresh > 0) {
5972 const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5973
5974 for (i = 0; i < BLOCK_SIZES_ALL; i++) {
5975 int sum = 0;
5976 for (int j = 0; j < 2; j++) sum += cpi->td.rd_counts.obmc_used[i][j];
5977
5978 const int new_prob =
5979 sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
5980 frame_probs->obmc_probs[update_type][i] =
5981 (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
5982 }
5983 }
5984
5985 if (features->allow_warped_motion &&
5986 cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
5987 const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5988 int sum = 0;
5989 for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
5990 const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
5991 frame_probs->warped_probs[update_type] =
5992 (frame_probs->warped_probs[update_type] + new_prob) >> 1;
5993 }
5994
5995 if (cm->current_frame.frame_type != KEY_FRAME &&
5996 cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
5997 features->interp_filter == SWITCHABLE) {
5998 const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
5999
6000 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
6001 int sum = 0;
6002 int j;
6003 int left = 1536;
6004
6005 for (j = 0; j < SWITCHABLE_FILTERS; j++) {
6006 sum += cpi->td.counts->switchable_interp[i][j];
6007 }
6008
6009 for (j = SWITCHABLE_FILTERS - 1; j >= 0; j--) {
6010 const int new_prob =
6011 sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
6012 : (j ? 0 : 1536);
6013 int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
6014 new_prob) >>
6015 1;
6016 left -= prob;
6017 if (j == 0) prob += left;
6018 frame_probs->switchable_interp_probs[update_type][i][j] = prob;
6019 }
6020 }
6021 }
6022
6023 if ((!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi) &&
6024 !cpi->sf.rt_sf.use_nonrd_pick_mode) ||
6025 hash_table_created) {
6026 av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
6027 }
6028 }
6029
av1_encode_frame(AV1_COMP * cpi)6030 void av1_encode_frame(AV1_COMP *cpi) {
6031 AV1_COMMON *const cm = &cpi->common;
6032 CurrentFrame *const current_frame = &cm->current_frame;
6033 FeatureFlags *const features = &cm->features;
6034 const int num_planes = av1_num_planes(cm);
6035 // Indicates whether or not to use a default reduced set for ext-tx
6036 // rather than the potential full set of 16 transforms
6037 features->reduced_tx_set_used = cpi->oxcf.reduced_tx_type_set;
6038
6039 // Make sure segment_id is no larger than last_active_segid.
6040 if (cm->seg.enabled && cm->seg.update_map) {
6041 const int mi_rows = cm->mi_params.mi_rows;
6042 const int mi_cols = cm->mi_params.mi_cols;
6043 const int last_active_segid = cm->seg.last_active_segid;
6044 uint8_t *map = cpi->enc_seg.map;
6045 for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
6046 for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
6047 map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
6048 }
6049 map += mi_cols;
6050 }
6051 }
6052
6053 av1_setup_frame_buf_refs(cm);
6054 enforce_max_ref_frames(cpi, &cpi->ref_frame_flags);
6055 set_rel_frame_dist(cpi);
6056 av1_setup_frame_sign_bias(cm);
6057
6058 #if CHECK_PRECOMPUTED_REF_FRAME_MAP
6059 GF_GROUP *gf_group = &cpi->gf_group;
6060 // TODO(yuec): The check is disabled on OVERLAY frames for now, because info
6061 // in cpi->gf_group has been refreshed for the next GOP when the check is
6062 // performed for OVERLAY frames. Since we have not support inter-GOP ref
6063 // frame map computation, the precomputed ref map for an OVERLAY frame is all
6064 // -1 at this point (although it is meaning before gf_group is refreshed).
6065 if (!frame_is_intra_only(cm) && gf_group->index != 0) {
6066 const RefCntBuffer *const golden_buf = get_ref_frame_buf(cm, GOLDEN_FRAME);
6067
6068 if (golden_buf) {
6069 const int golden_order_hint = golden_buf->order_hint;
6070
6071 for (int ref = LAST_FRAME; ref < EXTREF_FRAME; ++ref) {
6072 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
6073 const int ref_disp_idx_precomputed =
6074 gf_group->ref_frame_disp_idx[gf_group->index][ref - LAST_FRAME];
6075
6076 (void)ref_disp_idx_precomputed;
6077
6078 if (buf != NULL) {
6079 const int ref_disp_idx =
6080 get_relative_dist(&cm->seq_params.order_hint_info,
6081 buf->order_hint, golden_order_hint);
6082
6083 if (ref_disp_idx >= 0)
6084 assert(ref_disp_idx == ref_disp_idx_precomputed);
6085 else
6086 assert(ref_disp_idx_precomputed == -1);
6087 } else {
6088 assert(ref_disp_idx_precomputed == -1);
6089 }
6090 }
6091 }
6092 }
6093 #endif
6094
6095 #if CONFIG_MISMATCH_DEBUG
6096 mismatch_reset_frame(num_planes);
6097 #else
6098 (void)num_planes;
6099 #endif
6100
6101 if (cpi->sf.hl_sf.frame_parameter_update) {
6102 RD_COUNTS *const rdc = &cpi->td.rd_counts;
6103
6104 if (frame_is_intra_only(cm))
6105 current_frame->reference_mode = SINGLE_REFERENCE;
6106 else
6107 current_frame->reference_mode = REFERENCE_MODE_SELECT;
6108
6109 features->interp_filter = SWITCHABLE;
6110 if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
6111
6112 features->switchable_motion_mode = 1;
6113
6114 rdc->compound_ref_used_flag = 0;
6115 rdc->skip_mode_used_flag = 0;
6116
6117 encode_frame_internal(cpi);
6118
6119 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
6120 // Use a flag that includes 4x4 blocks
6121 if (rdc->compound_ref_used_flag == 0) {
6122 current_frame->reference_mode = SINGLE_REFERENCE;
6123 #if CONFIG_ENTROPY_STATS
6124 av1_zero(cpi->td.counts->comp_inter);
6125 #endif // CONFIG_ENTROPY_STATS
6126 }
6127 }
6128 // Re-check on the skip mode status as reference mode may have been
6129 // changed.
6130 SkipModeInfo *const skip_mode_info = ¤t_frame->skip_mode_info;
6131 if (frame_is_intra_only(cm) ||
6132 current_frame->reference_mode == SINGLE_REFERENCE) {
6133 skip_mode_info->skip_mode_allowed = 0;
6134 skip_mode_info->skip_mode_flag = 0;
6135 }
6136 if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
6137 skip_mode_info->skip_mode_flag = 0;
6138
6139 if (!cm->tiles.large_scale) {
6140 if (features->tx_mode == TX_MODE_SELECT &&
6141 cpi->td.mb.txb_split_count == 0)
6142 features->tx_mode = TX_MODE_LARGEST;
6143 }
6144 } else {
6145 encode_frame_internal(cpi);
6146 }
6147 }
6148
update_txfm_count(MACROBLOCK * x,MACROBLOCKD * xd,FRAME_COUNTS * counts,TX_SIZE tx_size,int depth,int blk_row,int blk_col,uint8_t allow_update_cdf)6149 static AOM_INLINE void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
6150 FRAME_COUNTS *counts, TX_SIZE tx_size,
6151 int depth, int blk_row, int blk_col,
6152 uint8_t allow_update_cdf) {
6153 MB_MODE_INFO *mbmi = xd->mi[0];
6154 const BLOCK_SIZE bsize = mbmi->sb_type;
6155 const int max_blocks_high = max_block_high(xd, bsize, 0);
6156 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
6157 int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
6158 xd->left_txfm_context + blk_row,
6159 mbmi->sb_type, tx_size);
6160 const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
6161 const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
6162
6163 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
6164 assert(tx_size > TX_4X4);
6165
6166 if (depth == MAX_VARTX_DEPTH) {
6167 // Don't add to counts in this case
6168 mbmi->tx_size = tx_size;
6169 txfm_partition_update(xd->above_txfm_context + blk_col,
6170 xd->left_txfm_context + blk_row, tx_size, tx_size);
6171 return;
6172 }
6173
6174 if (tx_size == plane_tx_size) {
6175 #if CONFIG_ENTROPY_STATS
6176 ++counts->txfm_partition[ctx][0];
6177 #endif
6178 if (allow_update_cdf)
6179 update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2);
6180 mbmi->tx_size = tx_size;
6181 txfm_partition_update(xd->above_txfm_context + blk_col,
6182 xd->left_txfm_context + blk_row, tx_size, tx_size);
6183 } else {
6184 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
6185 const int bsw = tx_size_wide_unit[sub_txs];
6186 const int bsh = tx_size_high_unit[sub_txs];
6187
6188 #if CONFIG_ENTROPY_STATS
6189 ++counts->txfm_partition[ctx][1];
6190 #endif
6191 if (allow_update_cdf)
6192 update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2);
6193 ++x->txb_split_count;
6194
6195 if (sub_txs == TX_4X4) {
6196 mbmi->inter_tx_size[txb_size_index] = TX_4X4;
6197 mbmi->tx_size = TX_4X4;
6198 txfm_partition_update(xd->above_txfm_context + blk_col,
6199 xd->left_txfm_context + blk_row, TX_4X4, tx_size);
6200 return;
6201 }
6202
6203 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
6204 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
6205 int offsetr = row;
6206 int offsetc = col;
6207
6208 update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr,
6209 blk_col + offsetc, allow_update_cdf);
6210 }
6211 }
6212 }
6213 }
6214
tx_partition_count_update(const AV1_COMMON * const cm,MACROBLOCK * x,BLOCK_SIZE plane_bsize,FRAME_COUNTS * td_counts,uint8_t allow_update_cdf)6215 static AOM_INLINE void tx_partition_count_update(const AV1_COMMON *const cm,
6216 MACROBLOCK *x,
6217 BLOCK_SIZE plane_bsize,
6218 FRAME_COUNTS *td_counts,
6219 uint8_t allow_update_cdf) {
6220 MACROBLOCKD *xd = &x->e_mbd;
6221 const int mi_width = mi_size_wide[plane_bsize];
6222 const int mi_height = mi_size_high[plane_bsize];
6223 const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
6224 const int bh = tx_size_high_unit[max_tx_size];
6225 const int bw = tx_size_wide_unit[max_tx_size];
6226
6227 xd->above_txfm_context =
6228 cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col;
6229 xd->left_txfm_context =
6230 xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK);
6231
6232 for (int idy = 0; idy < mi_height; idy += bh) {
6233 for (int idx = 0; idx < mi_width; idx += bw) {
6234 update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx,
6235 allow_update_cdf);
6236 }
6237 }
6238 }
6239
set_txfm_context(MACROBLOCKD * xd,TX_SIZE tx_size,int blk_row,int blk_col)6240 static AOM_INLINE void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size,
6241 int blk_row, int blk_col) {
6242 MB_MODE_INFO *mbmi = xd->mi[0];
6243 const BLOCK_SIZE bsize = mbmi->sb_type;
6244 const int max_blocks_high = max_block_high(xd, bsize, 0);
6245 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
6246 const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
6247 const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
6248
6249 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
6250
6251 if (tx_size == plane_tx_size) {
6252 mbmi->tx_size = tx_size;
6253 txfm_partition_update(xd->above_txfm_context + blk_col,
6254 xd->left_txfm_context + blk_row, tx_size, tx_size);
6255
6256 } else {
6257 if (tx_size == TX_8X8) {
6258 mbmi->inter_tx_size[txb_size_index] = TX_4X4;
6259 mbmi->tx_size = TX_4X4;
6260 txfm_partition_update(xd->above_txfm_context + blk_col,
6261 xd->left_txfm_context + blk_row, TX_4X4, tx_size);
6262 return;
6263 }
6264 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
6265 const int bsw = tx_size_wide_unit[sub_txs];
6266 const int bsh = tx_size_high_unit[sub_txs];
6267 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
6268 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
6269 const int offsetr = blk_row + row;
6270 const int offsetc = blk_col + col;
6271 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
6272 set_txfm_context(xd, sub_txs, offsetr, offsetc);
6273 }
6274 }
6275 }
6276 }
6277
tx_partition_set_contexts(const AV1_COMMON * const cm,MACROBLOCKD * xd,BLOCK_SIZE plane_bsize)6278 static AOM_INLINE void tx_partition_set_contexts(const AV1_COMMON *const cm,
6279 MACROBLOCKD *xd,
6280 BLOCK_SIZE plane_bsize) {
6281 const int mi_width = mi_size_wide[plane_bsize];
6282 const int mi_height = mi_size_high[plane_bsize];
6283 const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
6284 const int bh = tx_size_high_unit[max_tx_size];
6285 const int bw = tx_size_wide_unit[max_tx_size];
6286
6287 xd->above_txfm_context =
6288 cm->above_contexts.txfm[xd->tile.tile_row] + xd->mi_col;
6289 xd->left_txfm_context =
6290 xd->left_txfm_context_buffer + (xd->mi_row & MAX_MIB_MASK);
6291
6292 for (int idy = 0; idy < mi_height; idy += bh) {
6293 for (int idx = 0; idx < mi_width; idx += bw) {
6294 set_txfm_context(xd, max_tx_size, idy, idx);
6295 }
6296 }
6297 }
6298
encode_superblock(const AV1_COMP * const cpi,TileDataEnc * tile_data,ThreadData * td,TOKENEXTRA ** t,RUN_TYPE dry_run,BLOCK_SIZE bsize,int * rate)6299 static AOM_INLINE void encode_superblock(const AV1_COMP *const cpi,
6300 TileDataEnc *tile_data, ThreadData *td,
6301 TOKENEXTRA **t, RUN_TYPE dry_run,
6302 BLOCK_SIZE bsize, int *rate) {
6303 const AV1_COMMON *const cm = &cpi->common;
6304 const int num_planes = av1_num_planes(cm);
6305 MACROBLOCK *const x = &td->mb;
6306 MACROBLOCKD *const xd = &x->e_mbd;
6307 MB_MODE_INFO **mi_4x4 = xd->mi;
6308 MB_MODE_INFO *mbmi = mi_4x4[0];
6309 const int seg_skip =
6310 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
6311 const int mis = cm->mi_params.mi_stride;
6312 const int mi_width = mi_size_wide[bsize];
6313 const int mi_height = mi_size_high[bsize];
6314 const int is_inter = is_inter_block(mbmi);
6315
6316 // Initialize tx_mode and tx_size_search_method
6317 set_tx_size_search_method(
6318 cm, &cpi->winner_mode_params, x,
6319 cpi->sf.winner_mode_sf.enable_winner_mode_for_tx_size_srch, 1);
6320
6321 const int mi_row = xd->mi_row;
6322 const int mi_col = xd->mi_col;
6323 if (!is_inter) {
6324 xd->cfl.store_y = store_cfl_required(cm, xd);
6325 mbmi->skip = 1;
6326 for (int plane = 0; plane < num_planes; ++plane) {
6327 av1_encode_intra_block_plane(cpi, x, bsize, plane, dry_run,
6328 cpi->optimize_seg_arr[mbmi->segment_id]);
6329 }
6330
6331 // If there is at least one lossless segment, force the skip for intra
6332 // block to be 0, in order to avoid the segment_id to be changed by in
6333 // write_segment_id().
6334 if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map &&
6335 cpi->enc_seg.has_lossless_segment)
6336 mbmi->skip = 0;
6337
6338 xd->cfl.store_y = 0;
6339 if (av1_allow_palette(cm->features.allow_screen_content_tools, bsize)) {
6340 for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) {
6341 if (mbmi->palette_mode_info.palette_size[plane] > 0) {
6342 if (!dry_run) {
6343 av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size,
6344 PALETTE_MAP, tile_data->allow_update_cdf,
6345 td->counts);
6346 } else if (dry_run == DRY_RUN_COSTCOEFFS) {
6347 rate +=
6348 av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP);
6349 }
6350 }
6351 }
6352 }
6353
6354 av1_update_txb_context(cpi, td, dry_run, bsize,
6355 tile_data->allow_update_cdf);
6356 } else {
6357 int ref;
6358 const int is_compound = has_second_ref(mbmi);
6359
6360 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
6361 for (ref = 0; ref < 1 + is_compound; ++ref) {
6362 const YV12_BUFFER_CONFIG *cfg =
6363 get_ref_frame_yv12_buf(cm, mbmi->ref_frame[ref]);
6364 assert(IMPLIES(!is_intrabc_block(mbmi), cfg));
6365 av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
6366 xd->block_ref_scale_factors[ref], num_planes);
6367 }
6368 int start_plane = (cpi->sf.rt_sf.reuse_inter_pred_nonrd) ? 1 : 0;
6369 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
6370 start_plane, av1_num_planes(cm) - 1);
6371 if (mbmi->motion_mode == OBMC_CAUSAL) {
6372 assert(cpi->oxcf.enable_obmc == 1);
6373 av1_build_obmc_inter_predictors_sb(cm, xd);
6374 }
6375
6376 #if CONFIG_MISMATCH_DEBUG
6377 if (dry_run == OUTPUT_ENABLED) {
6378 for (int plane = 0; plane < num_planes; ++plane) {
6379 const struct macroblockd_plane *pd = &xd->plane[plane];
6380 int pixel_c, pixel_r;
6381 mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
6382 pd->subsampling_x, pd->subsampling_y);
6383 if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
6384 pd->subsampling_y))
6385 continue;
6386 mismatch_record_block_pre(pd->dst.buf, pd->dst.stride,
6387 cm->current_frame.order_hint, plane, pixel_c,
6388 pixel_r, pd->width, pd->height,
6389 xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
6390 }
6391 }
6392 #else
6393 (void)num_planes;
6394 #endif
6395
6396 av1_encode_sb(cpi, x, bsize, dry_run);
6397 av1_tokenize_sb_vartx(cpi, td, dry_run, bsize, rate,
6398 tile_data->allow_update_cdf);
6399 }
6400
6401 if (!dry_run) {
6402 if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi)) td->intrabc_used = 1;
6403 if (x->tx_mode_search_type == TX_MODE_SELECT &&
6404 !xd->lossless[mbmi->segment_id] && mbmi->sb_type > BLOCK_4X4 &&
6405 !(is_inter && (mbmi->skip || seg_skip))) {
6406 if (is_inter) {
6407 tx_partition_count_update(cm, x, bsize, td->counts,
6408 tile_data->allow_update_cdf);
6409 } else {
6410 if (mbmi->tx_size != max_txsize_rect_lookup[bsize])
6411 ++x->txb_split_count;
6412 if (block_signals_txsize(bsize)) {
6413 const int tx_size_ctx = get_tx_size_context(xd);
6414 const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
6415 const int depth = tx_size_to_depth(mbmi->tx_size, bsize);
6416 const int max_depths = bsize_to_max_depth(bsize);
6417
6418 if (tile_data->allow_update_cdf)
6419 update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
6420 depth, max_depths + 1);
6421 #if CONFIG_ENTROPY_STATS
6422 ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth];
6423 #endif
6424 }
6425 }
6426 assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
6427 } else {
6428 int i, j;
6429 TX_SIZE intra_tx_size;
6430 // The new intra coding scheme requires no change of transform size
6431 if (is_inter) {
6432 if (xd->lossless[mbmi->segment_id]) {
6433 intra_tx_size = TX_4X4;
6434 } else {
6435 intra_tx_size = tx_size_from_tx_mode(bsize, x->tx_mode_search_type);
6436 }
6437 } else {
6438 intra_tx_size = mbmi->tx_size;
6439 }
6440
6441 for (j = 0; j < mi_height; j++)
6442 for (i = 0; i < mi_width; i++)
6443 if (mi_col + i < cm->mi_params.mi_cols &&
6444 mi_row + j < cm->mi_params.mi_rows)
6445 mi_4x4[mis * j + i]->tx_size = intra_tx_size;
6446
6447 if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count;
6448 }
6449 }
6450
6451 if (x->tx_mode_search_type == TX_MODE_SELECT &&
6452 block_signals_txsize(mbmi->sb_type) && is_inter &&
6453 !(mbmi->skip || seg_skip) && !xd->lossless[mbmi->segment_id]) {
6454 if (dry_run) tx_partition_set_contexts(cm, xd, bsize);
6455 } else {
6456 TX_SIZE tx_size = mbmi->tx_size;
6457 // The new intra coding scheme requires no change of transform size
6458 if (is_inter) {
6459 if (xd->lossless[mbmi->segment_id]) {
6460 tx_size = TX_4X4;
6461 } else {
6462 tx_size = tx_size_from_tx_mode(bsize, x->tx_mode_search_type);
6463 }
6464 } else {
6465 tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
6466 }
6467 mbmi->tx_size = tx_size;
6468 set_txfm_ctxs(tx_size, xd->width, xd->height,
6469 (mbmi->skip || seg_skip) && is_inter_block(mbmi), xd);
6470 }
6471
6472 if (is_inter_block(mbmi) && !xd->is_chroma_ref && is_cfl_allowed(xd)) {
6473 cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
6474 }
6475 }
6476