1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_ports/system_state.h"
27
28 #if CONFIG_MISMATCH_DEBUG
29 #include "aom_util/debug_util.h"
30 #endif // CONFIG_MISMATCH_DEBUG
31
32 #include "av1/common/cfl.h"
33 #include "av1/common/common.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46
47 #include "av1/encoder/aq_complexity.h"
48 #include "av1/encoder/aq_cyclicrefresh.h"
49 #include "av1/encoder/aq_variance.h"
50 #include "av1/encoder/global_motion.h"
51 #include "av1/encoder/encodeframe.h"
52 #include "av1/encoder/encodemb.h"
53 #include "av1/encoder/encodemv.h"
54 #include "av1/encoder/encodetxb.h"
55 #include "av1/encoder/ethread.h"
56 #include "av1/encoder/extend.h"
57 #include "av1/encoder/ml.h"
58 #include "av1/encoder/partition_strategy.h"
59 #include "av1/encoder/partition_model_weights.h"
60 #include "av1/encoder/rd.h"
61 #include "av1/encoder/rdopt.h"
62 #include "av1/encoder/reconinter_enc.h"
63 #include "av1/encoder/segmentation.h"
64 #include "av1/encoder/tokenize.h"
65 #include "av1/encoder/var_based_part.h"
66
67 static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
68 ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
69 int mi_row, int mi_col, BLOCK_SIZE bsize,
70 int *rate);
71 static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
72 const MACROBLOCK *const x,
73 const RD_STATS *const rd_stats,
74 unsigned int pb_source_variance);
75
76 // This is used as a reference when computing the source variance for the
77 // purposes of activity masking.
78 // Eventually this should be replaced by custom no-reference routines,
79 // which will be faster.
80 const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
81 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
82 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
83 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
84 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
85 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89 128, 128, 128, 128, 128, 128, 128, 128
90 };
91
92 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
93 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
95 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
96 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
97 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
98 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128
102 };
103
104 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
105 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
106 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
107 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
108 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
109 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
110 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
121 };
122
123 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
124 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
125 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
126 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
127 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
128 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
129 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142 128 * 16, 128 * 16
143 };
144
av1_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)145 unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
146 const struct buf_2d *ref,
147 BLOCK_SIZE bs) {
148 unsigned int sse;
149 const unsigned int var =
150 cpi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
151 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
152 }
153
av1_high_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)154 unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
155 const struct buf_2d *ref,
156 BLOCK_SIZE bs, int bd) {
157 unsigned int var, sse;
158 switch (bd) {
159 case 10:
160 var =
161 cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
162 CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), 0, &sse);
163 break;
164 case 12:
165 var =
166 cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
167 CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), 0, &sse);
168 break;
169 case 8:
170 default:
171 var =
172 cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
173 CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), 0, &sse);
174 break;
175 }
176 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
177 }
178
get_sby_perpixel_diff_variance(const AV1_COMP * const cpi,const struct buf_2d * ref,int mi_row,int mi_col,BLOCK_SIZE bs)179 static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi,
180 const struct buf_2d *ref,
181 int mi_row, int mi_col,
182 BLOCK_SIZE bs) {
183 unsigned int sse, var;
184 uint8_t *last_y;
185 const YV12_BUFFER_CONFIG *last =
186 get_ref_frame_yv12_buf(&cpi->common, LAST_FRAME);
187
188 assert(last != NULL);
189 last_y =
190 &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
191 var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
192 return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
193 }
194
get_rd_var_based_fixed_partition(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)195 static BLOCK_SIZE get_rd_var_based_fixed_partition(AV1_COMP *cpi, MACROBLOCK *x,
196 int mi_row, int mi_col) {
197 unsigned int var = get_sby_perpixel_diff_variance(
198 cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
199 if (var < 8)
200 return BLOCK_64X64;
201 else if (var < 128)
202 return BLOCK_32X32;
203 else if (var < 2048)
204 return BLOCK_16X16;
205 else
206 return BLOCK_8X8;
207 }
208
set_offsets_without_segment_id(const AV1_COMP * const cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)209 static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
210 const TileInfo *const tile,
211 MACROBLOCK *const x, int mi_row,
212 int mi_col, BLOCK_SIZE bsize) {
213 const AV1_COMMON *const cm = &cpi->common;
214 const int num_planes = av1_num_planes(cm);
215 MACROBLOCKD *const xd = &x->e_mbd;
216 const int mi_width = mi_size_wide[bsize];
217 const int mi_height = mi_size_high[bsize];
218
219 set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
220
221 set_skip_context(xd, mi_row, mi_col, num_planes);
222 xd->above_txfm_context = cm->above_txfm_context[tile->tile_row] + mi_col;
223 xd->left_txfm_context =
224 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
225
226 // Set up destination pointers.
227 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0,
228 num_planes);
229
230 // Set up limit values for MV components.
231 // Mv beyond the range do not produce new/different prediction block.
232 x->mv_limits.row_min =
233 -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND);
234 x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND);
235 x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND;
236 x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND;
237
238 set_plane_n4(xd, mi_width, mi_height, num_planes);
239
240 // Set up distance of MB to edge of frame in 1/8th pel units.
241 assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
242 set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
243 cm->mi_cols);
244
245 // Set up source buffers.
246 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
247
248 // R/D setup.
249 x->rdmult = cpi->rd.RDMULT;
250
251 // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
252 xd->tile = *tile;
253
254 xd->cfl.mi_row = mi_row;
255 xd->cfl.mi_col = mi_col;
256 }
257
set_offsets(const AV1_COMP * const cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)258 static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
259 MACROBLOCK *const x, int mi_row, int mi_col,
260 BLOCK_SIZE bsize) {
261 const AV1_COMMON *const cm = &cpi->common;
262 const struct segmentation *const seg = &cm->seg;
263 MACROBLOCKD *const xd = &x->e_mbd;
264 MB_MODE_INFO *mbmi;
265
266 set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
267
268 // Setup segment ID.
269 mbmi = xd->mi[0];
270 mbmi->segment_id = 0;
271 if (seg->enabled) {
272 if (seg->enabled && !cpi->vaq_refresh) {
273 const uint8_t *const map =
274 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
275 mbmi->segment_id =
276 map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0;
277 }
278 av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
279 }
280 }
281
update_filter_type_count(uint8_t allow_update_cdf,FRAME_COUNTS * counts,const MACROBLOCKD * xd,const MB_MODE_INFO * mbmi)282 static void update_filter_type_count(uint8_t allow_update_cdf,
283 FRAME_COUNTS *counts,
284 const MACROBLOCKD *xd,
285 const MB_MODE_INFO *mbmi) {
286 int dir;
287 for (dir = 0; dir < 2; ++dir) {
288 const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
289 InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir);
290 ++counts->switchable_interp[ctx][filter];
291 if (allow_update_cdf) {
292 update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx], filter,
293 SWITCHABLE_FILTERS);
294 }
295 }
296 }
297
update_global_motion_used(PREDICTION_MODE mode,BLOCK_SIZE bsize,const MB_MODE_INFO * mbmi,RD_COUNTS * rdc)298 static void update_global_motion_used(PREDICTION_MODE mode, BLOCK_SIZE bsize,
299 const MB_MODE_INFO *mbmi,
300 RD_COUNTS *rdc) {
301 if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) {
302 const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize];
303 int ref;
304 for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
305 rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s;
306 }
307 }
308 }
309
reset_tx_size(MACROBLOCK * x,MB_MODE_INFO * mbmi,const TX_MODE tx_mode)310 static void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi,
311 const TX_MODE tx_mode) {
312 MACROBLOCKD *const xd = &x->e_mbd;
313 if (xd->lossless[mbmi->segment_id]) {
314 mbmi->tx_size = TX_4X4;
315 } else if (tx_mode != TX_MODE_SELECT) {
316 mbmi->tx_size = tx_size_from_tx_mode(mbmi->sb_type, tx_mode);
317 } else {
318 BLOCK_SIZE bsize = mbmi->sb_type;
319 TX_SIZE min_tx_size = depth_to_tx_size(MAX_TX_DEPTH, bsize);
320 mbmi->tx_size = (TX_SIZE)TXSIZEMAX(mbmi->tx_size, min_tx_size);
321 }
322 if (is_inter_block(mbmi)) {
323 memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
324 }
325 memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
326 av1_zero(x->blk_skip);
327 x->skip = 0;
328 }
329
update_state(const AV1_COMP * const cpi,const TileDataEnc * const tile_data,ThreadData * td,const PICK_MODE_CONTEXT * const ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,RUN_TYPE dry_run)330 static void update_state(const AV1_COMP *const cpi,
331 const TileDataEnc *const tile_data, ThreadData *td,
332 const PICK_MODE_CONTEXT *const ctx, int mi_row,
333 int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) {
334 int i, x_idx, y;
335 const AV1_COMMON *const cm = &cpi->common;
336 const int num_planes = av1_num_planes(cm);
337 RD_COUNTS *const rdc = &td->rd_counts;
338 MACROBLOCK *const x = &td->mb;
339 MACROBLOCKD *const xd = &x->e_mbd;
340 struct macroblock_plane *const p = x->plane;
341 struct macroblockd_plane *const pd = xd->plane;
342 const MB_MODE_INFO *const mi = &ctx->mic;
343 MB_MODE_INFO *const mi_addr = xd->mi[0];
344 const struct segmentation *const seg = &cm->seg;
345 const int bw = mi_size_wide[mi->sb_type];
346 const int bh = mi_size_high[mi->sb_type];
347 const int mis = cm->mi_stride;
348 const int mi_width = mi_size_wide[bsize];
349 const int mi_height = mi_size_high[bsize];
350
351 assert(mi->sb_type == bsize);
352
353 *mi_addr = *mi;
354 *x->mbmi_ext = ctx->mbmi_ext;
355
356 memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
357
358 x->skip = ctx->skip;
359
360 // If segmentation in use
361 if (seg->enabled) {
362 // For in frame complexity AQ copy the segment id from the segment map.
363 if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
364 const uint8_t *const map =
365 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
366 mi_addr->segment_id =
367 map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0;
368 reset_tx_size(x, mi_addr, cm->tx_mode);
369 }
370 // Else for cyclic refresh mode update the segment map, set the segment id
371 // and then update the quantizer.
372 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
373 av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize,
374 ctx->rate, ctx->dist, x->skip);
375 }
376 if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd))
377 mi_addr->uv_mode = UV_DC_PRED;
378 }
379
380 for (i = 0; i < num_planes; ++i) {
381 p[i].coeff = ctx->coeff[i];
382 p[i].qcoeff = ctx->qcoeff[i];
383 pd[i].dqcoeff = ctx->dqcoeff[i];
384 p[i].eobs = ctx->eobs[i];
385 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
386 }
387 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
388 // Restore the coding context of the MB to that that was in place
389 // when the mode was picked for it
390 for (y = 0; y < mi_height; y++)
391 for (x_idx = 0; x_idx < mi_width; x_idx++)
392 if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
393 (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
394 xd->mi[x_idx + y * mis] = mi_addr;
395 }
396
397 if (cpi->oxcf.aq_mode) av1_init_plane_quantizers(cpi, x, mi_addr->segment_id);
398
399 if (dry_run) return;
400
401 #if CONFIG_INTERNAL_STATS
402 {
403 unsigned int *const mode_chosen_counts =
404 (unsigned int *)cpi->mode_chosen_counts; // Cast const away.
405 if (frame_is_intra_only(cm)) {
406 static const int kf_mode_index[] = {
407 THR_DC /*DC_PRED*/,
408 THR_V_PRED /*V_PRED*/,
409 THR_H_PRED /*H_PRED*/,
410 THR_D45_PRED /*D45_PRED*/,
411 THR_D135_PRED /*D135_PRED*/,
412 THR_D113_PRED /*D113_PRED*/,
413 THR_D157_PRED /*D157_PRED*/,
414 THR_D203_PRED /*D203_PRED*/,
415 THR_D67_PRED /*D67_PRED*/,
416 THR_SMOOTH, /*SMOOTH_PRED*/
417 THR_SMOOTH_V, /*SMOOTH_V_PRED*/
418 THR_SMOOTH_H, /*SMOOTH_H_PRED*/
419 THR_PAETH /*PAETH_PRED*/,
420 };
421 ++mode_chosen_counts[kf_mode_index[mi_addr->mode]];
422 } else {
423 // Note how often each mode chosen as best
424 ++mode_chosen_counts[ctx->best_mode_index];
425 }
426 }
427 #endif
428 if (!frame_is_intra_only(cm)) {
429 if (is_inter_block(mi_addr)) {
430 // TODO(sarahparker): global motion stats need to be handled per-tile
431 // to be compatible with tile-based threading.
432 update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc);
433 }
434
435 if (cm->interp_filter == SWITCHABLE &&
436 mi_addr->motion_mode != WARPED_CAUSAL &&
437 !is_nontrans_global_motion(xd, xd->mi[0])) {
438 update_filter_type_count(tile_data->allow_update_cdf, td->counts, xd,
439 mi_addr);
440 }
441
442 rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
443 rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
444 rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
445 }
446
447 const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
448 const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row);
449 av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
450 }
451
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)452 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
453 int mi_row, int mi_col, const int num_planes,
454 BLOCK_SIZE bsize) {
455 // Set current frame pointer.
456 x->e_mbd.cur_buf = src;
457
458 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
459 // the static analysis warnings.
460 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
461 const int is_uv = i > 0;
462 setup_pred_plane(
463 &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
464 src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
465 x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
466 }
467 }
468
set_segment_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,int8_t segment_id)469 static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
470 int8_t segment_id) {
471 const AV1_COMMON *const cm = &cpi->common;
472 av1_init_plane_quantizers(cpi, x, segment_id);
473 aom_clear_system_state();
474 int segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
475 return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
476 }
477
set_deltaq_rdmult(const AV1_COMP * const cpi,MACROBLOCKD * const xd)478 static int set_deltaq_rdmult(const AV1_COMP *const cpi, MACROBLOCKD *const xd) {
479 const AV1_COMMON *const cm = &cpi->common;
480
481 return av1_compute_rd_mult(
482 cpi, cm->base_qindex + xd->delta_qindex + cm->y_dc_delta_q);
483 }
484
edge_info(const struct buf_2d * ref,const BLOCK_SIZE bsize,const bool high_bd,const int bd)485 static EdgeInfo edge_info(const struct buf_2d *ref, const BLOCK_SIZE bsize,
486 const bool high_bd, const int bd) {
487 const int width = block_size_wide[bsize];
488 const int height = block_size_high[bsize];
489 // Implementation requires width to be a multiple of 8. It also requires
490 // height to be a multiple of 4, but this is always the case.
491 assert(height % 4 == 0);
492 if (width % 8 != 0) {
493 EdgeInfo ei = { .magnitude = 0, .x = 0, .y = 0 };
494 return ei;
495 }
496 return av1_edge_exists(ref->buf, ref->stride, width, height, high_bd, bd);
497 }
498
use_pb_simple_motion_pred_sse(const AV1_COMP * const cpi)499 static int use_pb_simple_motion_pred_sse(const AV1_COMP *const cpi) {
500 // TODO(debargha, yuec): Not in use, need to implement a speed feature
501 // utilizing this data point, and replace '0' by the corresponding speed
502 // feature flag.
503 return 0 && !frame_is_intra_only(&cpi->common);
504 }
505
pick_sb_modes(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_STATS * rd_cost,PARTITION_TYPE partition,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd,int use_nonrd_pick_mode)506 static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
507 MACROBLOCK *const x, int mi_row, int mi_col,
508 RD_STATS *rd_cost, PARTITION_TYPE partition,
509 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
510 int64_t best_rd, int use_nonrd_pick_mode) {
511 AV1_COMMON *const cm = &cpi->common;
512 const int num_planes = av1_num_planes(cm);
513 TileInfo *const tile_info = &tile_data->tile_info;
514 MACROBLOCKD *const xd = &x->e_mbd;
515 MB_MODE_INFO *mbmi;
516 MB_MODE_INFO *ctx_mbmi = &ctx->mic;
517 struct macroblock_plane *const p = x->plane;
518 struct macroblockd_plane *const pd = xd->plane;
519 const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
520 const DELTAQ_MODE deltaq_mode = cpi->oxcf.deltaq_mode;
521 int i, orig_rdmult;
522
523 #if CONFIG_COLLECT_COMPONENT_TIMING
524 start_timing(cpi, rd_pick_sb_modes_time);
525 #endif
526
527 if (best_rd < 0) {
528 ctx->rdcost = INT64_MAX;
529 ctx->skip = 0;
530 av1_invalid_rd_stats(rd_cost);
531 return;
532 }
533
534 aom_clear_system_state();
535
536 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
537
538 mbmi = xd->mi[0];
539
540 if (ctx->rd_mode_is_ready) {
541 assert(ctx_mbmi->sb_type == bsize);
542 assert(ctx_mbmi->partition == partition);
543 *mbmi = *ctx_mbmi;
544 rd_cost->rate = ctx->rate;
545 rd_cost->dist = ctx->dist;
546 rd_cost->rdcost = ctx->rdcost;
547 } else {
548 mbmi->sb_type = bsize;
549 mbmi->partition = partition;
550 }
551
552 #if CONFIG_RD_DEBUG
553 mbmi->mi_row = mi_row;
554 mbmi->mi_col = mi_col;
555 #endif
556
557 for (i = 0; i < num_planes; ++i) {
558 p[i].coeff = ctx->coeff[i];
559 p[i].qcoeff = ctx->qcoeff[i];
560 pd[i].dqcoeff = ctx->dqcoeff[i];
561 p[i].eobs = ctx->eobs[i];
562 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
563 }
564
565 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
566
567 if (!ctx->rd_mode_is_ready) {
568 ctx->skippable = 0;
569
570 // Set to zero to make sure we do not use the previous encoded frame stats
571 mbmi->skip = 0;
572
573 // Reset skip mode flag.
574 mbmi->skip_mode = 0;
575 }
576
577 x->skip_chroma_rd =
578 !is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
579 xd->plane[1].subsampling_y);
580
581 if (ctx->rd_mode_is_ready) {
582 x->skip = ctx->skip;
583 *x->mbmi_ext = ctx->mbmi_ext;
584 return;
585 }
586
587 if (is_cur_buf_hbd(xd)) {
588 x->source_variance = av1_high_get_sby_perpixel_variance(
589 cpi, &x->plane[0].src, bsize, xd->bd);
590 } else {
591 x->source_variance =
592 av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
593 }
594 if (use_pb_simple_motion_pred_sse(cpi)) {
595 const MV ref_mv_full = { .row = 0, .col = 0 };
596 unsigned int var = 0;
597 av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, ref_mv_full, 0,
598 &x->simple_motion_pred_sse, &var);
599 }
600
601 // If the threshold for disabling wedge search is zero, it means the feature
602 // should not be used. Use a value that will always succeed in the check.
603 if (cpi->sf.disable_wedge_search_edge_thresh == 0) {
604 x->edge_strength = UINT16_MAX;
605 x->edge_strength_x = UINT16_MAX;
606 x->edge_strength_y = UINT16_MAX;
607 } else {
608 EdgeInfo ei =
609 edge_info(&x->plane[0].src, bsize, is_cur_buf_hbd(xd), xd->bd);
610 x->edge_strength = ei.magnitude;
611 x->edge_strength_x = ei.x;
612 x->edge_strength_y = ei.y;
613 }
614 // Save rdmult before it might be changed, so it can be restored later.
615 orig_rdmult = x->rdmult;
616
617 if (aq_mode == VARIANCE_AQ) {
618 if (cpi->vaq_refresh) {
619 const int energy = bsize <= BLOCK_16X16
620 ? x->mb_energy
621 : av1_log_block_var(cpi, x, bsize);
622 mbmi->segment_id = energy;
623 }
624 x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
625 } else if (aq_mode == COMPLEXITY_AQ) {
626 x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
627 } else if (aq_mode == CYCLIC_REFRESH_AQ) {
628 // If segment is boosted, use rdmult for that segment.
629 if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
630 x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
631 } else if (cpi->oxcf.enable_tpl_model) {
632 x->rdmult = x->cb_rdmult;
633 }
634
635 if (deltaq_mode > 0) x->rdmult = set_deltaq_rdmult(cpi, xd);
636
637 // Find best coding mode & reconstruct the MB so it is available
638 // as a predictor for MBs that follow in the SB
639 if (frame_is_intra_only(cm)) {
640 #if CONFIG_COLLECT_COMPONENT_TIMING
641 start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
642 #endif
643 av1_rd_pick_intra_mode_sb(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx,
644 best_rd);
645 #if CONFIG_COLLECT_COMPONENT_TIMING
646 end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
647 #endif
648 } else {
649 #if CONFIG_COLLECT_COMPONENT_TIMING
650 start_timing(cpi, av1_rd_pick_inter_mode_sb_time);
651 #endif
652 if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
653 av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
654 rd_cost, bsize, ctx, best_rd);
655 } else {
656 // TODO(kyslov): do the same for pick_intra_mode and
657 // pick_inter_mode_sb_seg_skip
658 if (use_nonrd_pick_mode) {
659 av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
660 bsize, ctx, best_rd);
661 } else {
662 av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
663 bsize, ctx, best_rd);
664 }
665 }
666 #if CONFIG_COLLECT_COMPONENT_TIMING
667 end_timing(cpi, av1_rd_pick_inter_mode_sb_time);
668 #endif
669 }
670
671 // Examine the resulting rate and for AQ mode 2 make a segment choice.
672 if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
673 (bsize >= BLOCK_16X16) &&
674 (cm->current_frame.frame_type == KEY_FRAME ||
675 cpi->refresh_alt_ref_frame || cpi->refresh_alt2_ref_frame ||
676 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
677 av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
678 }
679
680 x->rdmult = orig_rdmult;
681
682 // TODO(jingning) The rate-distortion optimization flow needs to be
683 // refactored to provide proper exit/return handle.
684 if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
685
686 ctx->rate = rd_cost->rate;
687 ctx->dist = rd_cost->dist;
688 ctx->rdcost = rd_cost->rdcost;
689
690 #if CONFIG_COLLECT_COMPONENT_TIMING
691 end_timing(cpi, rd_pick_sb_modes_time);
692 #endif
693 }
694
update_inter_mode_stats(FRAME_CONTEXT * fc,FRAME_COUNTS * counts,PREDICTION_MODE mode,int16_t mode_context,uint8_t allow_update_cdf)695 static void update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts,
696 PREDICTION_MODE mode, int16_t mode_context,
697 uint8_t allow_update_cdf) {
698 (void)counts;
699
700 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
701 if (mode == NEWMV) {
702 #if CONFIG_ENTROPY_STATS
703 ++counts->newmv_mode[mode_ctx][0];
704 #endif
705 if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 0, 2);
706 return;
707 } else {
708 #if CONFIG_ENTROPY_STATS
709 ++counts->newmv_mode[mode_ctx][1];
710 #endif
711 if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 1, 2);
712
713 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
714 if (mode == GLOBALMV) {
715 #if CONFIG_ENTROPY_STATS
716 ++counts->zeromv_mode[mode_ctx][0];
717 #endif
718 if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2);
719 return;
720 } else {
721 #if CONFIG_ENTROPY_STATS
722 ++counts->zeromv_mode[mode_ctx][1];
723 #endif
724 if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2);
725 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
726 #if CONFIG_ENTROPY_STATS
727 ++counts->refmv_mode[mode_ctx][mode != NEARESTMV];
728 #endif
729 if (allow_update_cdf)
730 update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2);
731 }
732 }
733 }
734
update_palette_cdf(MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,FRAME_COUNTS * counts,uint8_t allow_update_cdf)735 static void update_palette_cdf(MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
736 FRAME_COUNTS *counts, uint8_t allow_update_cdf) {
737 FRAME_CONTEXT *fc = xd->tile_ctx;
738 const BLOCK_SIZE bsize = mbmi->sb_type;
739 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
740 const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize);
741
742 (void)counts;
743
744 if (mbmi->mode == DC_PRED) {
745 const int n = pmi->palette_size[0];
746 const int palette_mode_ctx = av1_get_palette_mode_ctx(xd);
747
748 #if CONFIG_ENTROPY_STATS
749 ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0];
750 #endif
751 if (allow_update_cdf)
752 update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx],
753 n > 0, 2);
754 if (n > 0) {
755 #if CONFIG_ENTROPY_STATS
756 ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
757 #endif
758 if (allow_update_cdf) {
759 update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx],
760 n - PALETTE_MIN_SIZE, PALETTE_SIZES);
761 }
762 }
763 }
764
765 if (mbmi->uv_mode == UV_DC_PRED) {
766 const int n = pmi->palette_size[1];
767 const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
768
769 #if CONFIG_ENTROPY_STATS
770 ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0];
771 #endif
772 if (allow_update_cdf)
773 update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2);
774
775 if (n > 0) {
776 #if CONFIG_ENTROPY_STATS
777 ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
778 #endif
779 if (allow_update_cdf) {
780 update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx],
781 n - PALETTE_MIN_SIZE, PALETTE_SIZES);
782 }
783 }
784 }
785 }
786
sum_intra_stats(const AV1_COMMON * const cm,FRAME_COUNTS * counts,MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,const MB_MODE_INFO * above_mi,const MB_MODE_INFO * left_mi,const int intraonly,const int mi_row,const int mi_col,uint8_t allow_update_cdf)787 static void sum_intra_stats(const AV1_COMMON *const cm, FRAME_COUNTS *counts,
788 MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
789 const MB_MODE_INFO *above_mi,
790 const MB_MODE_INFO *left_mi, const int intraonly,
791 const int mi_row, const int mi_col,
792 uint8_t allow_update_cdf) {
793 FRAME_CONTEXT *fc = xd->tile_ctx;
794 const PREDICTION_MODE y_mode = mbmi->mode;
795 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
796 (void)counts;
797 const BLOCK_SIZE bsize = mbmi->sb_type;
798
799 if (intraonly) {
800 #if CONFIG_ENTROPY_STATS
801 const PREDICTION_MODE above = av1_above_block_mode(above_mi);
802 const PREDICTION_MODE left = av1_left_block_mode(left_mi);
803 const int above_ctx = intra_mode_context[above];
804 const int left_ctx = intra_mode_context[left];
805 ++counts->kf_y_mode[above_ctx][left_ctx][y_mode];
806 #endif // CONFIG_ENTROPY_STATS
807 if (allow_update_cdf)
808 update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES);
809 } else {
810 #if CONFIG_ENTROPY_STATS
811 ++counts->y_mode[size_group_lookup[bsize]][y_mode];
812 #endif // CONFIG_ENTROPY_STATS
813 if (allow_update_cdf)
814 update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES);
815 }
816
817 if (av1_filter_intra_allowed(cm, mbmi)) {
818 const int use_filter_intra_mode =
819 mbmi->filter_intra_mode_info.use_filter_intra;
820 #if CONFIG_ENTROPY_STATS
821 ++counts->filter_intra[mbmi->sb_type][use_filter_intra_mode];
822 if (use_filter_intra_mode) {
823 ++counts
824 ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode];
825 }
826 #endif // CONFIG_ENTROPY_STATS
827 if (allow_update_cdf) {
828 update_cdf(fc->filter_intra_cdfs[mbmi->sb_type], use_filter_intra_mode,
829 2);
830 if (use_filter_intra_mode) {
831 update_cdf(fc->filter_intra_mode_cdf,
832 mbmi->filter_intra_mode_info.filter_intra_mode,
833 FILTER_INTRA_MODES);
834 }
835 }
836 }
837 if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
838 #if CONFIG_ENTROPY_STATS
839 ++counts->angle_delta[mbmi->mode - V_PRED]
840 [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA];
841 #endif
842 if (allow_update_cdf) {
843 update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED],
844 mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA,
845 2 * MAX_ANGLE_DELTA + 1);
846 }
847 }
848
849 if (!is_chroma_reference(mi_row, mi_col, bsize,
850 xd->plane[AOM_PLANE_U].subsampling_x,
851 xd->plane[AOM_PLANE_U].subsampling_y))
852 return;
853
854 #if CONFIG_ENTROPY_STATS
855 ++counts->uv_mode[is_cfl_allowed(xd)][y_mode][uv_mode];
856 #endif // CONFIG_ENTROPY_STATS
857 if (allow_update_cdf) {
858 const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd);
859 update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode,
860 UV_INTRA_MODES - !cfl_allowed);
861 }
862 if (uv_mode == UV_CFL_PRED) {
863 const int joint_sign = mbmi->cfl_alpha_signs;
864 const int idx = mbmi->cfl_alpha_idx;
865
866 #if CONFIG_ENTROPY_STATS
867 ++counts->cfl_sign[joint_sign];
868 #endif
869 if (allow_update_cdf)
870 update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS);
871 if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
872 aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
873
874 #if CONFIG_ENTROPY_STATS
875 ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)];
876 #endif
877 if (allow_update_cdf)
878 update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE);
879 }
880 if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
881 aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
882
883 #if CONFIG_ENTROPY_STATS
884 ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)];
885 #endif
886 if (allow_update_cdf)
887 update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE);
888 }
889 }
890 if (av1_is_directional_mode(get_uv_mode(uv_mode)) &&
891 av1_use_angle_delta(bsize)) {
892 #if CONFIG_ENTROPY_STATS
893 ++counts->angle_delta[uv_mode - UV_V_PRED]
894 [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA];
895 #endif
896 if (allow_update_cdf) {
897 update_cdf(fc->angle_delta_cdf[uv_mode - UV_V_PRED],
898 mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA,
899 2 * MAX_ANGLE_DELTA + 1);
900 }
901 }
902 if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
903 update_palette_cdf(xd, mbmi, counts, allow_update_cdf);
904 }
905
update_stats(const AV1_COMMON * const cm,TileDataEnc * tile_data,ThreadData * td,int mi_row,int mi_col)906 static void update_stats(const AV1_COMMON *const cm, TileDataEnc *tile_data,
907 ThreadData *td, int mi_row, int mi_col) {
908 MACROBLOCK *x = &td->mb;
909 MACROBLOCKD *const xd = &x->e_mbd;
910 const MB_MODE_INFO *const mbmi = xd->mi[0];
911 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
912 const CurrentFrame *const current_frame = &cm->current_frame;
913 const BLOCK_SIZE bsize = mbmi->sb_type;
914 FRAME_CONTEXT *fc = xd->tile_ctx;
915 const uint8_t allow_update_cdf = tile_data->allow_update_cdf;
916
917 // delta quant applies to both intra and inter
918 const int super_block_upper_left =
919 ((mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
920 ((mi_col & (cm->seq_params.mib_size - 1)) == 0);
921
922 const int seg_ref_active =
923 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
924
925 if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active &&
926 is_comp_ref_allowed(bsize)) {
927 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
928 #if CONFIG_ENTROPY_STATS
929 td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++;
930 #endif
931 if (allow_update_cdf)
932 update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2);
933 }
934
935 if (!mbmi->skip_mode) {
936 if (!seg_ref_active) {
937 const int skip_ctx = av1_get_skip_context(xd);
938 #if CONFIG_ENTROPY_STATS
939 td->counts->skip[skip_ctx][mbmi->skip]++;
940 #endif
941 if (allow_update_cdf) update_cdf(fc->skip_cdfs[skip_ctx], mbmi->skip, 2);
942 }
943 }
944
945 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
946 if (delta_q_info->delta_q_present_flag &&
947 (bsize != cm->seq_params.sb_size || !mbmi->skip) &&
948 super_block_upper_left) {
949 #if CONFIG_ENTROPY_STATS
950 const int dq =
951 (mbmi->current_qindex - xd->current_qindex) / delta_q_info->delta_q_res;
952 const int absdq = abs(dq);
953 for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) {
954 td->counts->delta_q[i][1]++;
955 }
956 if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
957 #endif
958 xd->current_qindex = mbmi->current_qindex;
959 if (delta_q_info->delta_lf_present_flag) {
960 if (delta_q_info->delta_lf_multi) {
961 const int frame_lf_count =
962 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
963 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
964 #if CONFIG_ENTROPY_STATS
965 const int delta_lf = (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) /
966 delta_q_info->delta_lf_res;
967 const int abs_delta_lf = abs(delta_lf);
968 for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
969 td->counts->delta_lf_multi[lf_id][i][1]++;
970 }
971 if (abs_delta_lf < DELTA_LF_SMALL)
972 td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++;
973 #endif
974 xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
975 }
976 } else {
977 #if CONFIG_ENTROPY_STATS
978 const int delta_lf =
979 (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
980 delta_q_info->delta_lf_res;
981 const int abs_delta_lf = abs(delta_lf);
982 for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
983 td->counts->delta_lf[i][1]++;
984 }
985 if (abs_delta_lf < DELTA_LF_SMALL)
986 td->counts->delta_lf[abs_delta_lf][0]++;
987 #endif
988 xd->delta_lf_from_base = mbmi->delta_lf_from_base;
989 }
990 }
991 }
992
993 if (!is_inter_block(mbmi)) {
994 sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi,
995 frame_is_intra_only(cm), mi_row, mi_col,
996 tile_data->allow_update_cdf);
997 }
998
999 if (av1_allow_intrabc(cm)) {
1000 if (allow_update_cdf)
1001 update_cdf(fc->intrabc_cdf, is_intrabc_block(mbmi), 2);
1002 #if CONFIG_ENTROPY_STATS
1003 ++td->counts->intrabc[is_intrabc_block(mbmi)];
1004 #endif // CONFIG_ENTROPY_STATS
1005 }
1006
1007 if (!frame_is_intra_only(cm)) {
1008 RD_COUNTS *rdc = &td->rd_counts;
1009
1010 FRAME_COUNTS *const counts = td->counts;
1011
1012 if (mbmi->skip_mode) {
1013 rdc->skip_mode_used_flag = 1;
1014 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1015 assert(has_second_ref(mbmi));
1016 rdc->compound_ref_used_flag = 1;
1017 }
1018 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1019 return;
1020 }
1021
1022 const int inter_block = is_inter_block(mbmi);
1023
1024 if (!seg_ref_active) {
1025 #if CONFIG_ENTROPY_STATS
1026 counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
1027 #endif
1028 if (allow_update_cdf) {
1029 update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
1030 inter_block, 2);
1031 }
1032 // If the segment reference feature is enabled we have only a single
1033 // reference frame allowed for the segment so exclude it from
1034 // the reference frame counts used to work out probabilities.
1035 if (inter_block) {
1036 const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
1037 const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
1038
1039 av1_collect_neighbors_ref_counts(xd);
1040
1041 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1042 if (has_second_ref(mbmi))
1043 // This flag is also updated for 4x4 blocks
1044 rdc->compound_ref_used_flag = 1;
1045 if (is_comp_ref_allowed(bsize)) {
1046 #if CONFIG_ENTROPY_STATS
1047 counts->comp_inter[av1_get_reference_mode_context(xd)]
1048 [has_second_ref(mbmi)]++;
1049 #endif // CONFIG_ENTROPY_STATS
1050 if (allow_update_cdf) {
1051 update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi),
1052 2);
1053 }
1054 }
1055 }
1056
1057 if (has_second_ref(mbmi)) {
1058 const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
1059 ? UNIDIR_COMP_REFERENCE
1060 : BIDIR_COMP_REFERENCE;
1061 if (allow_update_cdf) {
1062 update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type,
1063 COMP_REFERENCE_TYPES);
1064 }
1065 #if CONFIG_ENTROPY_STATS
1066 counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
1067 [comp_ref_type]++;
1068 #endif // CONFIG_ENTROPY_STATS
1069
1070 if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
1071 const int bit = (ref0 == BWDREF_FRAME);
1072 if (allow_update_cdf)
1073 update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2);
1074 #if CONFIG_ENTROPY_STATS
1075 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0]
1076 [bit]++;
1077 #endif // CONFIG_ENTROPY_STATS
1078 if (!bit) {
1079 const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
1080 if (allow_update_cdf)
1081 update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2);
1082 #if CONFIG_ENTROPY_STATS
1083 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
1084 [bit1]++;
1085 #endif // CONFIG_ENTROPY_STATS
1086 if (bit1) {
1087 if (allow_update_cdf) {
1088 update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd),
1089 ref1 == GOLDEN_FRAME, 2);
1090 }
1091 #if CONFIG_ENTROPY_STATS
1092 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)]
1093 [2][ref1 == GOLDEN_FRAME]++;
1094 #endif // CONFIG_ENTROPY_STATS
1095 }
1096 }
1097 } else {
1098 const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
1099 if (allow_update_cdf)
1100 update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2);
1101 #if CONFIG_ENTROPY_STATS
1102 counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++;
1103 #endif // CONFIG_ENTROPY_STATS
1104 if (!bit) {
1105 if (allow_update_cdf) {
1106 update_cdf(av1_get_pred_cdf_comp_ref_p1(xd),
1107 ref0 == LAST2_FRAME, 2);
1108 }
1109 #if CONFIG_ENTROPY_STATS
1110 counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1]
1111 [ref0 == LAST2_FRAME]++;
1112 #endif // CONFIG_ENTROPY_STATS
1113 } else {
1114 if (allow_update_cdf) {
1115 update_cdf(av1_get_pred_cdf_comp_ref_p2(xd),
1116 ref0 == GOLDEN_FRAME, 2);
1117 }
1118 #if CONFIG_ENTROPY_STATS
1119 counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2]
1120 [ref0 == GOLDEN_FRAME]++;
1121 #endif // CONFIG_ENTROPY_STATS
1122 }
1123 if (allow_update_cdf) {
1124 update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd),
1125 ref1 == ALTREF_FRAME, 2);
1126 }
1127 #if CONFIG_ENTROPY_STATS
1128 counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0]
1129 [ref1 == ALTREF_FRAME]++;
1130 #endif // CONFIG_ENTROPY_STATS
1131 if (ref1 != ALTREF_FRAME) {
1132 if (allow_update_cdf) {
1133 update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd),
1134 ref1 == ALTREF2_FRAME, 2);
1135 }
1136 #if CONFIG_ENTROPY_STATS
1137 counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1]
1138 [ref1 == ALTREF2_FRAME]++;
1139 #endif // CONFIG_ENTROPY_STATS
1140 }
1141 }
1142 } else {
1143 const int bit = (ref0 >= BWDREF_FRAME);
1144 if (allow_update_cdf)
1145 update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2);
1146 #if CONFIG_ENTROPY_STATS
1147 counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++;
1148 #endif // CONFIG_ENTROPY_STATS
1149 if (bit) {
1150 assert(ref0 <= ALTREF_FRAME);
1151 if (allow_update_cdf) {
1152 update_cdf(av1_get_pred_cdf_single_ref_p2(xd),
1153 ref0 == ALTREF_FRAME, 2);
1154 }
1155 #if CONFIG_ENTROPY_STATS
1156 counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1]
1157 [ref0 == ALTREF_FRAME]++;
1158 #endif // CONFIG_ENTROPY_STATS
1159 if (ref0 != ALTREF_FRAME) {
1160 if (allow_update_cdf) {
1161 update_cdf(av1_get_pred_cdf_single_ref_p6(xd),
1162 ref0 == ALTREF2_FRAME, 2);
1163 }
1164 #if CONFIG_ENTROPY_STATS
1165 counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5]
1166 [ref0 == ALTREF2_FRAME]++;
1167 #endif // CONFIG_ENTROPY_STATS
1168 }
1169 } else {
1170 const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
1171 if (allow_update_cdf)
1172 update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2);
1173 #if CONFIG_ENTROPY_STATS
1174 counts
1175 ->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++;
1176 #endif // CONFIG_ENTROPY_STATS
1177 if (!bit1) {
1178 if (allow_update_cdf) {
1179 update_cdf(av1_get_pred_cdf_single_ref_p4(xd),
1180 ref0 != LAST_FRAME, 2);
1181 }
1182 #if CONFIG_ENTROPY_STATS
1183 counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3]
1184 [ref0 != LAST_FRAME]++;
1185 #endif // CONFIG_ENTROPY_STATS
1186 } else {
1187 if (allow_update_cdf) {
1188 update_cdf(av1_get_pred_cdf_single_ref_p5(xd),
1189 ref0 != LAST3_FRAME, 2);
1190 }
1191 #if CONFIG_ENTROPY_STATS
1192 counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4]
1193 [ref0 != LAST3_FRAME]++;
1194 #endif // CONFIG_ENTROPY_STATS
1195 }
1196 }
1197 }
1198
1199 if (cm->seq_params.enable_interintra_compound &&
1200 is_interintra_allowed(mbmi)) {
1201 const int bsize_group = size_group_lookup[bsize];
1202 if (mbmi->ref_frame[1] == INTRA_FRAME) {
1203 #if CONFIG_ENTROPY_STATS
1204 counts->interintra[bsize_group][1]++;
1205 #endif
1206 if (allow_update_cdf)
1207 update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
1208 #if CONFIG_ENTROPY_STATS
1209 counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
1210 #endif
1211 if (allow_update_cdf) {
1212 update_cdf(fc->interintra_mode_cdf[bsize_group],
1213 mbmi->interintra_mode, INTERINTRA_MODES);
1214 }
1215 if (is_interintra_wedge_used(bsize)) {
1216 #if CONFIG_ENTROPY_STATS
1217 counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
1218 #endif
1219 if (allow_update_cdf) {
1220 update_cdf(fc->wedge_interintra_cdf[bsize],
1221 mbmi->use_wedge_interintra, 2);
1222 }
1223 if (mbmi->use_wedge_interintra) {
1224 #if CONFIG_ENTROPY_STATS
1225 counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++;
1226 #endif
1227 if (allow_update_cdf) {
1228 update_cdf(fc->wedge_idx_cdf[bsize],
1229 mbmi->interintra_wedge_index, 16);
1230 }
1231 }
1232 }
1233 } else {
1234 #if CONFIG_ENTROPY_STATS
1235 counts->interintra[bsize_group][0]++;
1236 #endif
1237 if (allow_update_cdf)
1238 update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
1239 }
1240 }
1241
1242 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1243 const MOTION_MODE motion_allowed =
1244 cm->switchable_motion_mode
1245 ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1246 cm->allow_warped_motion)
1247 : SIMPLE_TRANSLATION;
1248 if (mbmi->ref_frame[1] != INTRA_FRAME) {
1249 if (motion_allowed == WARPED_CAUSAL) {
1250 #if CONFIG_ENTROPY_STATS
1251 counts->motion_mode[bsize][mbmi->motion_mode]++;
1252 #endif
1253 if (allow_update_cdf) {
1254 update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode,
1255 MOTION_MODES);
1256 }
1257 } else if (motion_allowed == OBMC_CAUSAL) {
1258 #if CONFIG_ENTROPY_STATS
1259 counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1260 #endif
1261 if (allow_update_cdf) {
1262 update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL,
1263 2);
1264 }
1265 }
1266 }
1267
1268 if (has_second_ref(mbmi)) {
1269 assert(current_frame->reference_mode != SINGLE_REFERENCE &&
1270 is_inter_compound_mode(mbmi->mode) &&
1271 mbmi->motion_mode == SIMPLE_TRANSLATION);
1272
1273 const int masked_compound_used =
1274 is_any_masked_compound_used(bsize) &&
1275 cm->seq_params.enable_masked_compound;
1276 if (masked_compound_used) {
1277 const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
1278 #if CONFIG_ENTROPY_STATS
1279 ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx];
1280 #endif
1281 if (allow_update_cdf) {
1282 update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx],
1283 mbmi->comp_group_idx, 2);
1284 }
1285 }
1286
1287 if (mbmi->comp_group_idx == 0) {
1288 const int comp_index_ctx = get_comp_index_context(cm, xd);
1289 #if CONFIG_ENTROPY_STATS
1290 ++counts->compound_index[comp_index_ctx][mbmi->compound_idx];
1291 #endif
1292 if (allow_update_cdf) {
1293 update_cdf(fc->compound_index_cdf[comp_index_ctx],
1294 mbmi->compound_idx, 2);
1295 }
1296 } else {
1297 assert(masked_compound_used);
1298 if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1299 #if CONFIG_ENTROPY_STATS
1300 ++counts->compound_type[bsize][mbmi->interinter_comp.type -
1301 COMPOUND_WEDGE];
1302 #endif
1303 if (allow_update_cdf) {
1304 update_cdf(fc->compound_type_cdf[bsize],
1305 mbmi->interinter_comp.type - COMPOUND_WEDGE,
1306 MASKED_COMPOUND_TYPES);
1307 }
1308 }
1309 }
1310 }
1311 if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
1312 if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1313 #if CONFIG_ENTROPY_STATS
1314 counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++;
1315 #endif
1316 if (allow_update_cdf) {
1317 update_cdf(fc->wedge_idx_cdf[bsize],
1318 mbmi->interinter_comp.wedge_index, 16);
1319 }
1320 }
1321 }
1322 }
1323 }
1324
1325 if (inter_block &&
1326 !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
1327 int16_t mode_ctx;
1328 const PREDICTION_MODE mode = mbmi->mode;
1329
1330 mode_ctx =
1331 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1332 if (has_second_ref(mbmi)) {
1333 #if CONFIG_ENTROPY_STATS
1334 ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
1335 #endif
1336 if (allow_update_cdf)
1337 update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
1338 INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
1339 } else {
1340 update_inter_mode_stats(fc, counts, mode, mode_ctx, allow_update_cdf);
1341 }
1342
1343 int mode_allowed = (mbmi->mode == NEWMV);
1344 mode_allowed |= (mbmi->mode == NEW_NEWMV);
1345 if (mode_allowed) {
1346 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1347 int idx;
1348
1349 for (idx = 0; idx < 2; ++idx) {
1350 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1351 #if CONFIG_ENTROPY_STATS
1352 uint8_t drl_ctx =
1353 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
1354 ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
1355 #endif
1356
1357 if (mbmi->ref_mv_idx == idx) break;
1358 }
1359 }
1360 }
1361
1362 if (have_nearmv_in_inter_mode(mbmi->mode)) {
1363 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1364 int idx;
1365
1366 for (idx = 1; idx < 3; ++idx) {
1367 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1368 #if CONFIG_ENTROPY_STATS
1369 uint8_t drl_ctx =
1370 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
1371 ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
1372 #endif
1373
1374 if (mbmi->ref_mv_idx == idx - 1) break;
1375 }
1376 }
1377 }
1378 }
1379 }
1380 }
1381
1382 typedef struct {
1383 ENTROPY_CONTEXT a[MAX_MIB_SIZE * MAX_MB_PLANE];
1384 ENTROPY_CONTEXT l[MAX_MIB_SIZE * MAX_MB_PLANE];
1385 PARTITION_CONTEXT sa[MAX_MIB_SIZE];
1386 PARTITION_CONTEXT sl[MAX_MIB_SIZE];
1387 TXFM_CONTEXT *p_ta;
1388 TXFM_CONTEXT *p_tl;
1389 TXFM_CONTEXT ta[MAX_MIB_SIZE];
1390 TXFM_CONTEXT tl[MAX_MIB_SIZE];
1391 } RD_SEARCH_MACROBLOCK_CONTEXT;
1392
restore_context(MACROBLOCK * x,const RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)1393 static void restore_context(MACROBLOCK *x,
1394 const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row,
1395 int mi_col, BLOCK_SIZE bsize,
1396 const int num_planes) {
1397 MACROBLOCKD *xd = &x->e_mbd;
1398 int p;
1399 const int num_4x4_blocks_wide =
1400 block_size_wide[bsize] >> tx_size_wide_log2[0];
1401 const int num_4x4_blocks_high =
1402 block_size_high[bsize] >> tx_size_high_log2[0];
1403 int mi_width = mi_size_wide[bsize];
1404 int mi_height = mi_size_high[bsize];
1405 for (p = 0; p < num_planes; p++) {
1406 int tx_col = mi_col;
1407 int tx_row = mi_row & MAX_MIB_MASK;
1408 memcpy(xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
1409 ctx->a + num_4x4_blocks_wide * p,
1410 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1411 xd->plane[p].subsampling_x);
1412 memcpy(xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
1413 ctx->l + num_4x4_blocks_high * p,
1414 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1415 xd->plane[p].subsampling_y);
1416 }
1417 memcpy(xd->above_seg_context + mi_col, ctx->sa,
1418 sizeof(*xd->above_seg_context) * mi_width);
1419 memcpy(xd->left_seg_context + (mi_row & MAX_MIB_MASK), ctx->sl,
1420 sizeof(xd->left_seg_context[0]) * mi_height);
1421 xd->above_txfm_context = ctx->p_ta;
1422 xd->left_txfm_context = ctx->p_tl;
1423 memcpy(xd->above_txfm_context, ctx->ta,
1424 sizeof(*xd->above_txfm_context) * mi_width);
1425 memcpy(xd->left_txfm_context, ctx->tl,
1426 sizeof(*xd->left_txfm_context) * mi_height);
1427 }
1428
save_context(const MACROBLOCK * x,RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)1429 static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
1430 int mi_row, int mi_col, BLOCK_SIZE bsize,
1431 const int num_planes) {
1432 const MACROBLOCKD *xd = &x->e_mbd;
1433 int p;
1434 const int num_4x4_blocks_wide =
1435 block_size_wide[bsize] >> tx_size_wide_log2[0];
1436 const int num_4x4_blocks_high =
1437 block_size_high[bsize] >> tx_size_high_log2[0];
1438 int mi_width = mi_size_wide[bsize];
1439 int mi_height = mi_size_high[bsize];
1440
1441 // buffer the above/left context information of the block in search.
1442 for (p = 0; p < num_planes; ++p) {
1443 int tx_col = mi_col;
1444 int tx_row = mi_row & MAX_MIB_MASK;
1445 memcpy(ctx->a + num_4x4_blocks_wide * p,
1446 xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
1447 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1448 xd->plane[p].subsampling_x);
1449 memcpy(ctx->l + num_4x4_blocks_high * p,
1450 xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
1451 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1452 xd->plane[p].subsampling_y);
1453 }
1454 memcpy(ctx->sa, xd->above_seg_context + mi_col,
1455 sizeof(*xd->above_seg_context) * mi_width);
1456 memcpy(ctx->sl, xd->left_seg_context + (mi_row & MAX_MIB_MASK),
1457 sizeof(xd->left_seg_context[0]) * mi_height);
1458 memcpy(ctx->ta, xd->above_txfm_context,
1459 sizeof(*xd->above_txfm_context) * mi_width);
1460 memcpy(ctx->tl, xd->left_txfm_context,
1461 sizeof(*xd->left_txfm_context) * mi_height);
1462 ctx->p_ta = xd->above_txfm_context;
1463 ctx->p_tl = xd->left_txfm_context;
1464 }
1465
encode_b(const AV1_COMP * const cpi,TileDataEnc * tile_data,ThreadData * td,TOKENEXTRA ** tp,int mi_row,int mi_col,RUN_TYPE dry_run,BLOCK_SIZE bsize,PARTITION_TYPE partition,const PICK_MODE_CONTEXT * const ctx,int * rate)1466 static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
1467 ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col,
1468 RUN_TYPE dry_run, BLOCK_SIZE bsize,
1469 PARTITION_TYPE partition,
1470 const PICK_MODE_CONTEXT *const ctx, int *rate) {
1471 TileInfo *const tile = &tile_data->tile_info;
1472 MACROBLOCK *const x = &td->mb;
1473 MACROBLOCKD *xd = &x->e_mbd;
1474
1475 set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
1476 MB_MODE_INFO *mbmi = xd->mi[0];
1477 mbmi->partition = partition;
1478 update_state(cpi, tile_data, td, ctx, mi_row, mi_col, bsize, dry_run);
1479 if (cpi->oxcf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ &&
1480 cpi->oxcf.deltaq_mode == 0) {
1481 x->rdmult = x->cb_rdmult;
1482 }
1483
1484 if (!dry_run) av1_set_coeff_buffer(cpi, x, mi_row, mi_col);
1485
1486 encode_superblock(cpi, tile_data, td, tp, dry_run, mi_row, mi_col, bsize,
1487 rate);
1488
1489 if (!dry_run) {
1490 x->cb_offset += block_size_wide[bsize] * block_size_high[bsize];
1491 if (bsize == cpi->common.seq_params.sb_size && mbmi->skip == 1 &&
1492 cpi->common.delta_q_info.delta_lf_present_flag) {
1493 const int frame_lf_count = av1_num_planes(&cpi->common) > 1
1494 ? FRAME_LF_COUNT
1495 : FRAME_LF_COUNT - 2;
1496 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
1497 mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
1498 mbmi->delta_lf_from_base = xd->delta_lf_from_base;
1499 }
1500 if (has_second_ref(mbmi)) {
1501 if (mbmi->compound_idx == 0 ||
1502 mbmi->interinter_comp.type == COMPOUND_AVERAGE)
1503 mbmi->comp_group_idx = 0;
1504 else
1505 mbmi->comp_group_idx = 1;
1506 }
1507 update_stats(&cpi->common, tile_data, td, mi_row, mi_col);
1508 }
1509 }
1510
encode_sb(const AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,RUN_TYPE dry_run,BLOCK_SIZE bsize,PC_TREE * pc_tree,int * rate)1511 static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
1512 TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row,
1513 int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
1514 PC_TREE *pc_tree, int *rate) {
1515 const AV1_COMMON *const cm = &cpi->common;
1516 MACROBLOCK *const x = &td->mb;
1517 MACROBLOCKD *const xd = &x->e_mbd;
1518 const int hbs = mi_size_wide[bsize] / 2;
1519 const int is_partition_root = bsize >= BLOCK_8X8;
1520 const int ctx = is_partition_root
1521 ? partition_plane_context(xd, mi_row, mi_col, bsize)
1522 : -1;
1523 const PARTITION_TYPE partition = pc_tree->partitioning;
1524 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1525 int quarter_step = mi_size_wide[bsize] / 4;
1526 int i;
1527 BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
1528
1529 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1530
1531 if (!dry_run && ctx >= 0) {
1532 const int has_rows = (mi_row + hbs) < cm->mi_rows;
1533 const int has_cols = (mi_col + hbs) < cm->mi_cols;
1534
1535 if (has_rows && has_cols) {
1536 #if CONFIG_ENTROPY_STATS
1537 td->counts->partition[ctx][partition]++;
1538 #endif
1539
1540 if (tile_data->allow_update_cdf) {
1541 FRAME_CONTEXT *fc = xd->tile_ctx;
1542 update_cdf(fc->partition_cdf[ctx], partition,
1543 partition_cdf_length(bsize));
1544 }
1545 }
1546 }
1547
1548 switch (partition) {
1549 case PARTITION_NONE:
1550 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1551 partition, &pc_tree->none, rate);
1552 break;
1553 case PARTITION_VERT:
1554 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1555 partition, &pc_tree->vertical[0], rate);
1556 if (mi_col + hbs < cm->mi_cols) {
1557 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1558 partition, &pc_tree->vertical[1], rate);
1559 }
1560 break;
1561 case PARTITION_HORZ:
1562 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1563 partition, &pc_tree->horizontal[0], rate);
1564 if (mi_row + hbs < cm->mi_rows) {
1565 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1566 partition, &pc_tree->horizontal[1], rate);
1567 }
1568 break;
1569 case PARTITION_SPLIT:
1570 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
1571 pc_tree->split[0], rate);
1572 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize,
1573 pc_tree->split[1], rate);
1574 encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize,
1575 pc_tree->split[2], rate);
1576 encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run,
1577 subsize, pc_tree->split[3], rate);
1578 break;
1579
1580 case PARTITION_HORZ_A:
1581 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1582 partition, &pc_tree->horizontala[0], rate);
1583 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1584 partition, &pc_tree->horizontala[1], rate);
1585 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1586 partition, &pc_tree->horizontala[2], rate);
1587 break;
1588 case PARTITION_HORZ_B:
1589 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1590 partition, &pc_tree->horizontalb[0], rate);
1591 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1592 partition, &pc_tree->horizontalb[1], rate);
1593 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1594 bsize2, partition, &pc_tree->horizontalb[2], rate);
1595 break;
1596 case PARTITION_VERT_A:
1597 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1598 partition, &pc_tree->verticala[0], rate);
1599 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1600 partition, &pc_tree->verticala[1], rate);
1601 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1602 partition, &pc_tree->verticala[2], rate);
1603
1604 break;
1605 case PARTITION_VERT_B:
1606 encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1607 partition, &pc_tree->verticalb[0], rate);
1608 encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1609 partition, &pc_tree->verticalb[1], rate);
1610 encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1611 bsize2, partition, &pc_tree->verticalb[2], rate);
1612 break;
1613 case PARTITION_HORZ_4:
1614 for (i = 0; i < 4; ++i) {
1615 int this_mi_row = mi_row + i * quarter_step;
1616 if (i > 0 && this_mi_row >= cm->mi_rows) break;
1617
1618 encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize,
1619 partition, &pc_tree->horizontal4[i], rate);
1620 }
1621 break;
1622 case PARTITION_VERT_4:
1623 for (i = 0; i < 4; ++i) {
1624 int this_mi_col = mi_col + i * quarter_step;
1625 if (i > 0 && this_mi_col >= cm->mi_cols) break;
1626
1627 encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize,
1628 partition, &pc_tree->vertical4[i], rate);
1629 }
1630 break;
1631 default: assert(0 && "Invalid partition type."); break;
1632 }
1633
1634 update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
1635 }
1636
set_partial_sb_partition(const AV1_COMMON * const cm,MB_MODE_INFO * mi,int bh_in,int bw_in,int mi_rows_remaining,int mi_cols_remaining,BLOCK_SIZE bsize,MB_MODE_INFO ** mib)1637 static void set_partial_sb_partition(const AV1_COMMON *const cm,
1638 MB_MODE_INFO *mi, int bh_in, int bw_in,
1639 int mi_rows_remaining,
1640 int mi_cols_remaining, BLOCK_SIZE bsize,
1641 MB_MODE_INFO **mib) {
1642 int bh = bh_in;
1643 int r, c;
1644 for (r = 0; r < cm->seq_params.mib_size; r += bh) {
1645 int bw = bw_in;
1646 for (c = 0; c < cm->seq_params.mib_size; c += bw) {
1647 const int index = r * cm->mi_stride + c;
1648 mib[index] = mi + index;
1649 mib[index]->sb_type = find_partition_size(
1650 bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw);
1651 }
1652 }
1653 }
1654
1655 // This function attempts to set all mode info entries in a given superblock
1656 // to the same block partition size.
1657 // However, at the bottom and right borders of the image the requested size
1658 // may not be allowed in which case this code attempts to choose the largest
1659 // allowable partition.
set_fixed_partitioning(AV1_COMP * cpi,const TileInfo * const tile,MB_MODE_INFO ** mib,int mi_row,int mi_col,BLOCK_SIZE bsize)1660 static void set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
1661 MB_MODE_INFO **mib, int mi_row, int mi_col,
1662 BLOCK_SIZE bsize) {
1663 AV1_COMMON *const cm = &cpi->common;
1664 const int mi_rows_remaining = tile->mi_row_end - mi_row;
1665 const int mi_cols_remaining = tile->mi_col_end - mi_col;
1666 int block_row, block_col;
1667 MB_MODE_INFO *const mi_upper_left = cm->mi + mi_row * cm->mi_stride + mi_col;
1668 int bh = mi_size_high[bsize];
1669 int bw = mi_size_wide[bsize];
1670
1671 assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
1672
1673 // Apply the requested partition size to the SB if it is all "in image"
1674 if ((mi_cols_remaining >= cm->seq_params.mib_size) &&
1675 (mi_rows_remaining >= cm->seq_params.mib_size)) {
1676 for (block_row = 0; block_row < cm->seq_params.mib_size; block_row += bh) {
1677 for (block_col = 0; block_col < cm->seq_params.mib_size;
1678 block_col += bw) {
1679 int index = block_row * cm->mi_stride + block_col;
1680 mib[index] = mi_upper_left + index;
1681 mib[index]->sb_type = bsize;
1682 }
1683 }
1684 } else {
1685 // Else this is a partial SB.
1686 set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining,
1687 mi_cols_remaining, bsize, mib);
1688 }
1689 }
1690
rd_use_partition(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MB_MODE_INFO ** mib,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate,int64_t * dist,int do_recon,PC_TREE * pc_tree)1691 static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
1692 TileDataEnc *tile_data, MB_MODE_INFO **mib,
1693 TOKENEXTRA **tp, int mi_row, int mi_col,
1694 BLOCK_SIZE bsize, int *rate, int64_t *dist,
1695 int do_recon, PC_TREE *pc_tree) {
1696 AV1_COMMON *const cm = &cpi->common;
1697 const int num_planes = av1_num_planes(cm);
1698 TileInfo *const tile_info = &tile_data->tile_info;
1699 MACROBLOCK *const x = &td->mb;
1700 MACROBLOCKD *const xd = &x->e_mbd;
1701 const int bs = mi_size_wide[bsize];
1702 const int hbs = bs / 2;
1703 int i;
1704 const int pl = (bsize >= BLOCK_8X8)
1705 ? partition_plane_context(xd, mi_row, mi_col, bsize)
1706 : 0;
1707 const PARTITION_TYPE partition =
1708 (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
1709 : PARTITION_NONE;
1710 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1711 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
1712 RD_STATS last_part_rdc, none_rdc, chosen_rdc;
1713 BLOCK_SIZE sub_subsize = BLOCK_4X4;
1714 int splits_below = 0;
1715 BLOCK_SIZE bs_type = mib[0]->sb_type;
1716 int do_partition_search = 1;
1717 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
1718
1719 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1720
1721 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
1722
1723 av1_invalid_rd_stats(&last_part_rdc);
1724 av1_invalid_rd_stats(&none_rdc);
1725 av1_invalid_rd_stats(&chosen_rdc);
1726
1727 pc_tree->partitioning = partition;
1728
1729 xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
1730 xd->left_txfm_context =
1731 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
1732 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1733
1734 if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
1735 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1736 x->mb_energy = av1_log_block_var(cpi, x, bsize);
1737 }
1738
1739 if (do_partition_search &&
1740 cpi->sf.partition_search_type == SEARCH_PARTITION &&
1741 cpi->sf.adjust_partitioning_from_last_frame) {
1742 // Check if any of the sub blocks are further split.
1743 if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
1744 sub_subsize = get_partition_subsize(subsize, PARTITION_SPLIT);
1745 splits_below = 1;
1746 for (i = 0; i < 4; i++) {
1747 int jj = i >> 1, ii = i & 0x01;
1748 MB_MODE_INFO *this_mi = mib[jj * hbs * cm->mi_stride + ii * hbs];
1749 if (this_mi && this_mi->sb_type >= sub_subsize) {
1750 splits_below = 0;
1751 }
1752 }
1753 }
1754
1755 // If partition is not none try none unless each of the 4 splits are split
1756 // even further..
1757 if (partition != PARTITION_NONE && !splits_below &&
1758 mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
1759 pc_tree->partitioning = PARTITION_NONE;
1760 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
1761 PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0);
1762
1763 if (none_rdc.rate < INT_MAX) {
1764 none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
1765 none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
1766 }
1767
1768 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1769 mib[0]->sb_type = bs_type;
1770 pc_tree->partitioning = partition;
1771 }
1772 }
1773
1774 switch (partition) {
1775 case PARTITION_NONE:
1776 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1777 PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0);
1778 break;
1779 case PARTITION_HORZ:
1780 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1781 PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX,
1782 0);
1783 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1784 mi_row + hbs < cm->mi_rows) {
1785 RD_STATS tmp_rdc;
1786 const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
1787 av1_init_rd_stats(&tmp_rdc);
1788 update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
1789 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
1790 mi_col, subsize, NULL);
1791 pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
1792 PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
1793 INT64_MAX, 0);
1794 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1795 av1_invalid_rd_stats(&last_part_rdc);
1796 break;
1797 }
1798 last_part_rdc.rate += tmp_rdc.rate;
1799 last_part_rdc.dist += tmp_rdc.dist;
1800 last_part_rdc.rdcost += tmp_rdc.rdcost;
1801 }
1802 break;
1803 case PARTITION_VERT:
1804 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1805 PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX,
1806 0);
1807 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1808 mi_col + hbs < cm->mi_cols) {
1809 RD_STATS tmp_rdc;
1810 const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
1811 av1_init_rd_stats(&tmp_rdc);
1812 update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
1813 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
1814 mi_col, subsize, NULL);
1815 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
1816 PARTITION_VERT, subsize,
1817 &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 0);
1818 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1819 av1_invalid_rd_stats(&last_part_rdc);
1820 break;
1821 }
1822 last_part_rdc.rate += tmp_rdc.rate;
1823 last_part_rdc.dist += tmp_rdc.dist;
1824 last_part_rdc.rdcost += tmp_rdc.rdcost;
1825 }
1826 break;
1827 case PARTITION_SPLIT:
1828 last_part_rdc.rate = 0;
1829 last_part_rdc.dist = 0;
1830 last_part_rdc.rdcost = 0;
1831 for (i = 0; i < 4; i++) {
1832 int x_idx = (i & 1) * hbs;
1833 int y_idx = (i >> 1) * hbs;
1834 int jj = i >> 1, ii = i & 0x01;
1835 RD_STATS tmp_rdc;
1836 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1837 continue;
1838
1839 av1_init_rd_stats(&tmp_rdc);
1840 rd_use_partition(cpi, td, tile_data,
1841 mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
1842 mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
1843 &tmp_rdc.dist, i != 3, pc_tree->split[i]);
1844 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1845 av1_invalid_rd_stats(&last_part_rdc);
1846 break;
1847 }
1848 last_part_rdc.rate += tmp_rdc.rate;
1849 last_part_rdc.dist += tmp_rdc.dist;
1850 }
1851 break;
1852 case PARTITION_VERT_A:
1853 case PARTITION_VERT_B:
1854 case PARTITION_HORZ_A:
1855 case PARTITION_HORZ_B:
1856 case PARTITION_HORZ_4:
1857 case PARTITION_VERT_4:
1858 assert(0 && "Cannot handle extended partition types");
1859 default: assert(0); break;
1860 }
1861
1862 if (last_part_rdc.rate < INT_MAX) {
1863 last_part_rdc.rate += x->partition_cost[pl][partition];
1864 last_part_rdc.rdcost =
1865 RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
1866 }
1867
1868 if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
1869 cpi->sf.partition_search_type == SEARCH_PARTITION &&
1870 partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
1871 (mi_row + bs < cm->mi_rows || mi_row + hbs == cm->mi_rows) &&
1872 (mi_col + bs < cm->mi_cols || mi_col + hbs == cm->mi_cols)) {
1873 BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
1874 chosen_rdc.rate = 0;
1875 chosen_rdc.dist = 0;
1876
1877 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1878 pc_tree->partitioning = PARTITION_SPLIT;
1879
1880 // Split partition.
1881 for (i = 0; i < 4; i++) {
1882 int x_idx = (i & 1) * hbs;
1883 int y_idx = (i >> 1) * hbs;
1884 RD_STATS tmp_rdc;
1885
1886 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1887 continue;
1888
1889 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1890 pc_tree->split[i]->partitioning = PARTITION_NONE;
1891 pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
1892 PARTITION_SPLIT, split_subsize, &pc_tree->split[i]->none,
1893 INT64_MAX, 0);
1894
1895 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1896 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1897 av1_invalid_rd_stats(&chosen_rdc);
1898 break;
1899 }
1900
1901 chosen_rdc.rate += tmp_rdc.rate;
1902 chosen_rdc.dist += tmp_rdc.dist;
1903
1904 if (i != 3)
1905 encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
1906 OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL);
1907
1908 chosen_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
1909 }
1910 if (chosen_rdc.rate < INT_MAX) {
1911 chosen_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
1912 chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
1913 }
1914 }
1915
1916 // If last_part is better set the partitioning to that.
1917 if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
1918 mib[0]->sb_type = bsize;
1919 if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
1920 chosen_rdc = last_part_rdc;
1921 }
1922 // If none was better set the partitioning to that.
1923 if (none_rdc.rdcost < chosen_rdc.rdcost) {
1924 if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
1925 chosen_rdc = none_rdc;
1926 }
1927
1928 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1929
1930 // We must have chosen a partitioning and encoding or we'll fail later on.
1931 // No other opportunities for success.
1932 if (bsize == cm->seq_params.sb_size)
1933 assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
1934
1935 if (do_recon) {
1936 if (bsize == cm->seq_params.sb_size) {
1937 // NOTE: To get estimate for rate due to the tokens, use:
1938 // int rate_coeffs = 0;
1939 // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
1940 // bsize, pc_tree, &rate_coeffs);
1941 x->cb_offset = 0;
1942 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
1943 pc_tree, NULL);
1944 } else {
1945 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
1946 pc_tree, NULL);
1947 }
1948 }
1949
1950 *rate = chosen_rdc.rate;
1951 *dist = chosen_rdc.dist;
1952 }
1953
1954 // TODO(kyslov): now this is very similar to rd_use_partition (except that
1955 // doesn't do extra search arounf suggested partitioning)
1956 // consider passing a flag to select non-rd path (similar to
1957 // encode_sb_row)
nonrd_use_partition(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MB_MODE_INFO ** mib,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate,int64_t * dist,int do_recon,PC_TREE * pc_tree)1958 static void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
1959 TileDataEnc *tile_data, MB_MODE_INFO **mib,
1960 TOKENEXTRA **tp, int mi_row, int mi_col,
1961 BLOCK_SIZE bsize, int *rate, int64_t *dist,
1962 int do_recon, PC_TREE *pc_tree) {
1963 AV1_COMMON *const cm = &cpi->common;
1964 const int num_planes = av1_num_planes(cm);
1965 TileInfo *const tile_info = &tile_data->tile_info;
1966 MACROBLOCK *const x = &td->mb;
1967 MACROBLOCKD *const xd = &x->e_mbd;
1968 const int bs = mi_size_wide[bsize];
1969 const int hbs = bs / 2;
1970 int i;
1971 const int pl = (bsize >= BLOCK_8X8)
1972 ? partition_plane_context(xd, mi_row, mi_col, bsize)
1973 : 0;
1974 const PARTITION_TYPE partition =
1975 (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
1976 : PARTITION_NONE;
1977 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1978 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
1979 RD_STATS last_part_rdc;
1980 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
1981
1982 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1983
1984 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
1985
1986 av1_invalid_rd_stats(&last_part_rdc);
1987
1988 pc_tree->partitioning = partition;
1989
1990 xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
1991 xd->left_txfm_context =
1992 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
1993 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1994
1995 if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
1996 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1997 x->mb_energy = av1_log_block_var(cpi, x, bsize);
1998 }
1999
2000 switch (partition) {
2001 case PARTITION_NONE:
2002 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2003 PARTITION_NONE, bsize, ctx_none, INT64_MAX, 1);
2004 break;
2005 case PARTITION_HORZ:
2006 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2007 PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX,
2008 1);
2009 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2010 mi_row + hbs < cm->mi_rows) {
2011 RD_STATS tmp_rdc;
2012 const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
2013 av1_init_rd_stats(&tmp_rdc);
2014 update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
2015 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
2016 mi_col, subsize, NULL);
2017 pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
2018 PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
2019 INT64_MAX, 1);
2020 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2021 av1_invalid_rd_stats(&last_part_rdc);
2022 break;
2023 }
2024 last_part_rdc.rate += tmp_rdc.rate;
2025 last_part_rdc.dist += tmp_rdc.dist;
2026 last_part_rdc.rdcost += tmp_rdc.rdcost;
2027 }
2028 break;
2029 case PARTITION_VERT:
2030 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2031 PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX,
2032 1);
2033 if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2034 mi_col + hbs < cm->mi_cols) {
2035 RD_STATS tmp_rdc;
2036 const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
2037 av1_init_rd_stats(&tmp_rdc);
2038 update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
2039 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
2040 mi_col, subsize, NULL);
2041 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
2042 PARTITION_VERT, subsize,
2043 &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 1);
2044 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2045 av1_invalid_rd_stats(&last_part_rdc);
2046 break;
2047 }
2048 last_part_rdc.rate += tmp_rdc.rate;
2049 last_part_rdc.dist += tmp_rdc.dist;
2050 last_part_rdc.rdcost += tmp_rdc.rdcost;
2051 }
2052 break;
2053 case PARTITION_SPLIT:
2054 last_part_rdc.rate = 0;
2055 last_part_rdc.dist = 0;
2056 last_part_rdc.rdcost = 0;
2057 for (i = 0; i < 4; i++) {
2058 int x_idx = (i & 1) * hbs;
2059 int y_idx = (i >> 1) * hbs;
2060 int jj = i >> 1, ii = i & 0x01;
2061 RD_STATS tmp_rdc;
2062 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
2063 continue;
2064
2065 av1_init_rd_stats(&tmp_rdc);
2066 nonrd_use_partition(
2067 cpi, td, tile_data, mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
2068 mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
2069 &tmp_rdc.dist, i != 3, pc_tree->split[i]);
2070 if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2071 av1_invalid_rd_stats(&last_part_rdc);
2072 break;
2073 }
2074 last_part_rdc.rate += tmp_rdc.rate;
2075 last_part_rdc.dist += tmp_rdc.dist;
2076 }
2077 break;
2078 case PARTITION_VERT_A:
2079 case PARTITION_VERT_B:
2080 case PARTITION_HORZ_A:
2081 case PARTITION_HORZ_B:
2082 case PARTITION_HORZ_4:
2083 case PARTITION_VERT_4:
2084 assert(0 && "Cannot handle extended partition types");
2085 default: assert(0); break;
2086 }
2087
2088 if (last_part_rdc.rate < INT_MAX) {
2089 last_part_rdc.rate += x->partition_cost[pl][partition];
2090 last_part_rdc.rdcost =
2091 RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
2092 }
2093
2094 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2095
2096 // We must have chosen a partitioning and encoding or we'll fail later on.
2097 // No other opportunities for success.
2098 if (bsize == cm->seq_params.sb_size)
2099 assert(last_part_rdc.rate < INT_MAX && last_part_rdc.dist < INT64_MAX);
2100
2101 if (do_recon) {
2102 if (bsize == cm->seq_params.sb_size) {
2103 // NOTE: To get estimate for rate due to the tokens, use:
2104 // int rate_coeffs = 0;
2105 // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
2106 // bsize, pc_tree, &rate_coeffs);
2107 x->cb_offset = 0;
2108 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
2109 pc_tree, NULL);
2110 } else {
2111 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
2112 pc_tree, NULL);
2113 }
2114 }
2115
2116 *rate = last_part_rdc.rate;
2117 *dist = last_part_rdc.dist;
2118 }
2119
2120 // Checks to see if a super block is on a horizontal image edge.
2121 // In most cases this is the "real" edge unless there are formatting
2122 // bars embedded in the stream.
active_h_edge(const AV1_COMP * cpi,int mi_row,int mi_step)2123 static int active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
2124 int top_edge = 0;
2125 int bottom_edge = cpi->common.mi_rows;
2126 int is_active_h_edge = 0;
2127
2128 // For two pass account for any formatting bars detected.
2129 if (cpi->oxcf.pass == 2) {
2130 const TWO_PASS *const twopass = &cpi->twopass;
2131
2132 // The inactive region is specified in MBs not mi units.
2133 // The image edge is in the following MB row.
2134 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
2135
2136 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
2137 bottom_edge = AOMMAX(top_edge, bottom_edge);
2138 }
2139
2140 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
2141 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
2142 is_active_h_edge = 1;
2143 }
2144 return is_active_h_edge;
2145 }
2146
2147 // Checks to see if a super block is on a vertical image edge.
2148 // In most cases this is the "real" edge unless there are formatting
2149 // bars embedded in the stream.
active_v_edge(const AV1_COMP * cpi,int mi_col,int mi_step)2150 static int active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
2151 int left_edge = 0;
2152 int right_edge = cpi->common.mi_cols;
2153 int is_active_v_edge = 0;
2154
2155 // For two pass account for any formatting bars detected.
2156 if (cpi->oxcf.pass == 2) {
2157 const TWO_PASS *const twopass = &cpi->twopass;
2158
2159 // The inactive region is specified in MBs not mi units.
2160 // The image edge is in the following MB row.
2161 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
2162
2163 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
2164 right_edge = AOMMAX(left_edge, right_edge);
2165 }
2166
2167 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
2168 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
2169 is_active_v_edge = 1;
2170 }
2171 return is_active_v_edge;
2172 }
2173
store_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)2174 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2175 memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
2176 }
2177
load_pred_mv(MACROBLOCK * x,const PICK_MODE_CONTEXT * const ctx)2178 static INLINE void load_pred_mv(MACROBLOCK *x,
2179 const PICK_MODE_CONTEXT *const ctx) {
2180 memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
2181 }
2182
2183 // Try searching for an encoding for the given subblock. Returns zero if the
2184 // rdcost is already too high (to tell the caller not to bother searching for
2185 // encodings of further subblocks)
rd_try_subblock(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int is_last,int mi_row,int mi_col,BLOCK_SIZE subsize,RD_STATS * best_rdc,RD_STATS * sum_rdc,RD_STATS * this_rdc,PARTITION_TYPE partition,PICK_MODE_CONTEXT * prev_ctx,PICK_MODE_CONTEXT * this_ctx)2186 static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
2187 TileDataEnc *tile_data, TOKENEXTRA **tp, int is_last,
2188 int mi_row, int mi_col, BLOCK_SIZE subsize,
2189 RD_STATS *best_rdc, RD_STATS *sum_rdc,
2190 RD_STATS *this_rdc, PARTITION_TYPE partition,
2191 PICK_MODE_CONTEXT *prev_ctx,
2192 PICK_MODE_CONTEXT *this_ctx) {
2193 #define RTS_X_RATE_NOCOEF_ARG
2194 #define RTS_MAX_RDCOST best_rdc->rdcost
2195
2196 MACROBLOCK *const x = &td->mb;
2197
2198 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, prev_ctx);
2199
2200 const int64_t rdcost_remaining = best_rdc->rdcost == INT64_MAX
2201 ? INT64_MAX
2202 : (best_rdc->rdcost - sum_rdc->rdcost);
2203
2204 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc,
2205 RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx,
2206 rdcost_remaining, 0);
2207
2208 if (this_rdc->rate == INT_MAX) {
2209 sum_rdc->rdcost = INT64_MAX;
2210 } else {
2211 sum_rdc->rate += this_rdc->rate;
2212 sum_rdc->dist += this_rdc->dist;
2213 sum_rdc->rdcost += this_rdc->rdcost;
2214 }
2215
2216 if (sum_rdc->rdcost >= RTS_MAX_RDCOST) return 0;
2217
2218 if (!is_last) {
2219 update_state(cpi, tile_data, td, this_ctx, mi_row, mi_col, subsize, 1);
2220 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
2221 subsize, NULL);
2222 }
2223
2224 return 1;
2225
2226 #undef RTS_X_RATE_NOCOEF_ARG
2227 #undef RTS_MAX_RDCOST
2228 }
2229
rd_test_partition3(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,PC_TREE * pc_tree,RD_STATS * best_rdc,PICK_MODE_CONTEXT ctxs[3],PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,PARTITION_TYPE partition,int mi_row0,int mi_col0,BLOCK_SIZE subsize0,int mi_row1,int mi_col1,BLOCK_SIZE subsize1,int mi_row2,int mi_col2,BLOCK_SIZE subsize2)2230 static void rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
2231 TileDataEnc *tile_data, TOKENEXTRA **tp,
2232 PC_TREE *pc_tree, RD_STATS *best_rdc,
2233 PICK_MODE_CONTEXT ctxs[3],
2234 PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
2235 BLOCK_SIZE bsize, PARTITION_TYPE partition,
2236 int mi_row0, int mi_col0, BLOCK_SIZE subsize0,
2237 int mi_row1, int mi_col1, BLOCK_SIZE subsize1,
2238 int mi_row2, int mi_col2, BLOCK_SIZE subsize2) {
2239 MACROBLOCK *const x = &td->mb;
2240 MACROBLOCKD *const xd = &x->e_mbd;
2241 RD_STATS sum_rdc, this_rdc;
2242 #define RTP_STX_TRY_ARGS
2243 int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2244 av1_init_rd_stats(&sum_rdc);
2245 sum_rdc.rate = x->partition_cost[pl][partition];
2246 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
2247 if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0,
2248 best_rdc, &sum_rdc, &this_rdc,
2249 RTP_STX_TRY_ARGS partition, ctx, &ctxs[0]))
2250 return;
2251
2252 if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1,
2253 best_rdc, &sum_rdc, &this_rdc,
2254 RTP_STX_TRY_ARGS partition, &ctxs[0], &ctxs[1]))
2255 return;
2256
2257 // With the new layout of mixed partitions for PARTITION_HORZ_B and
2258 // PARTITION_VERT_B, the last subblock might start past halfway through the
2259 // main block, so we might signal it even though the subblock lies strictly
2260 // outside the image. In that case, we won't spend any bits coding it and the
2261 // difference (obviously) doesn't contribute to the error.
2262 const int try_block2 = 1;
2263 if (try_block2 &&
2264 !rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2,
2265 best_rdc, &sum_rdc, &this_rdc,
2266 RTP_STX_TRY_ARGS partition, &ctxs[1], &ctxs[2]))
2267 return;
2268
2269 if (sum_rdc.rdcost >= best_rdc->rdcost) return;
2270
2271 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
2272
2273 if (sum_rdc.rdcost >= best_rdc->rdcost) return;
2274
2275 *best_rdc = sum_rdc;
2276 pc_tree->partitioning = partition;
2277
2278 #undef RTP_STX_TRY_ARGS
2279 }
2280
reset_partition(PC_TREE * pc_tree,BLOCK_SIZE bsize)2281 static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
2282 pc_tree->partitioning = PARTITION_NONE;
2283 pc_tree->cb_search_range = SEARCH_FULL_PLANE;
2284 pc_tree->none.skip = 0;
2285
2286 pc_tree->pc_tree_stats.valid = 0;
2287 pc_tree->pc_tree_stats.split = 0;
2288 pc_tree->pc_tree_stats.skip = 0;
2289 pc_tree->pc_tree_stats.rdcost = INT64_MAX;
2290
2291 for (int i = 0; i < 4; i++) {
2292 pc_tree->pc_tree_stats.sub_block_split[i] = 0;
2293 pc_tree->pc_tree_stats.sub_block_skip[i] = 0;
2294 pc_tree->pc_tree_stats.sub_block_rdcost[i] = INT64_MAX;
2295 }
2296
2297 if (bsize >= BLOCK_8X8) {
2298 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2299 for (int idx = 0; idx < 4; ++idx)
2300 reset_partition(pc_tree->split[idx], subsize);
2301 }
2302 }
2303
rd_pick_sqr_partition(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,RD_STATS * rd_cost,int64_t best_rd,PC_TREE * pc_tree,int64_t * none_rd)2304 static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td,
2305 TileDataEnc *tile_data, TOKENEXTRA **tp,
2306 int mi_row, int mi_col, BLOCK_SIZE bsize,
2307 RD_STATS *rd_cost, int64_t best_rd,
2308 PC_TREE *pc_tree, int64_t *none_rd) {
2309 const AV1_COMMON *const cm = &cpi->common;
2310 TileInfo *const tile_info = &tile_data->tile_info;
2311 MACROBLOCK *const x = &td->mb;
2312 MACROBLOCKD *const xd = &x->e_mbd;
2313 const int mi_step = mi_size_wide[bsize] / 2;
2314 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2315 const TOKENEXTRA *const tp_orig = *tp;
2316 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
2317 int tmp_partition_cost[PARTITION_TYPES];
2318 BLOCK_SIZE subsize;
2319 RD_STATS this_rdc, sum_rdc, best_rdc, pn_rdc;
2320 const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
2321 int do_square_split = bsize_at_least_8x8;
2322 const int pl = bsize_at_least_8x8
2323 ? partition_plane_context(xd, mi_row, mi_col, bsize)
2324 : 0;
2325 const int *partition_cost =
2326 pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
2327 const int num_planes = av1_num_planes(cm);
2328
2329 int64_t split_rd[4] = { 0, 0, 0, 0 };
2330
2331 // Override skipping rectangular partition operations for edge blocks
2332 const int has_rows = (mi_row + mi_step < cm->mi_rows);
2333 const int has_cols = (mi_col + mi_step < cm->mi_cols);
2334
2335 if (none_rd) *none_rd = 0;
2336
2337 int partition_none_allowed = has_rows && has_cols;
2338
2339 (void)*tp_orig;
2340 (void)split_rd;
2341
2342 if (best_rd < 0) {
2343 pc_tree->none.rdcost = INT64_MAX;
2344 pc_tree->none.skip = 0;
2345 av1_invalid_rd_stats(rd_cost);
2346 return;
2347 }
2348 pc_tree->pc_tree_stats.valid = 1;
2349
2350 // Override partition costs at the edges of the frame in the same
2351 // way as in read_partition (see decodeframe.c)
2352 if (!(has_rows && has_cols)) {
2353 assert(bsize_at_least_8x8 && pl >= 0);
2354 const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
2355 for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX;
2356 if (has_cols) {
2357 // At the bottom, the two possibilities are HORZ and SPLIT
2358 aom_cdf_prob bot_cdf[2];
2359 partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
2360 static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
2361 av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
2362 } else if (has_rows) {
2363 // At the right, the two possibilities are VERT and SPLIT
2364 aom_cdf_prob rhs_cdf[2];
2365 partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
2366 static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
2367 av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
2368 } else {
2369 // At the bottom right, we always split
2370 tmp_partition_cost[PARTITION_SPLIT] = 0;
2371 }
2372
2373 partition_cost = tmp_partition_cost;
2374 }
2375
2376 #ifndef NDEBUG
2377 // Nothing should rely on the default value of this array (which is just
2378 // leftover from encoding the previous block. Setting it to fixed pattern
2379 // when debugging.
2380 // bit 0, 1, 2 are blk_skip of each plane
2381 // bit 4, 5, 6 are initialization checking of each plane
2382 memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
2383 #endif // NDEBUG
2384
2385 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2386
2387 av1_init_rd_stats(&this_rdc);
2388 av1_init_rd_stats(&sum_rdc);
2389 av1_invalid_rd_stats(&best_rdc);
2390 best_rdc.rdcost = best_rd;
2391
2392 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2393
2394 if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
2395 x->mb_energy = av1_log_block_var(cpi, x, bsize);
2396
2397 xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
2398 xd->left_txfm_context =
2399 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2400 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2401
2402 #if CONFIG_DIST_8X8
2403 if (x->using_dist_8x8) {
2404 if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
2405 do_square_split = 0;
2406 }
2407 #endif
2408
2409 // PARTITION_NONE
2410 if (partition_none_allowed) {
2411 int pt_cost = 0;
2412 if (bsize_at_least_8x8) {
2413 pc_tree->partitioning = PARTITION_NONE;
2414 pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
2415 ? partition_cost[PARTITION_NONE]
2416 : 0;
2417 }
2418 const int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
2419 const int64_t best_remain_rdcost =
2420 best_rdc.rdcost == INT64_MAX ? INT64_MAX
2421 : (best_rdc.rdcost - partition_rd_cost);
2422 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE,
2423 bsize, ctx_none, best_remain_rdcost, 0);
2424
2425 pc_tree->pc_tree_stats.rdcost = ctx_none->rdcost;
2426 pc_tree->pc_tree_stats.skip = ctx_none->skip;
2427
2428 if (none_rd) *none_rd = this_rdc.rdcost;
2429 if (this_rdc.rate != INT_MAX) {
2430 if (bsize_at_least_8x8) {
2431 this_rdc.rate += pt_cost;
2432 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
2433 }
2434
2435 if (this_rdc.rdcost < best_rdc.rdcost) {
2436 // Adjust dist breakout threshold according to the partition size.
2437 const int64_t dist_breakout_thr =
2438 cpi->sf.partition_search_breakout_dist_thr >>
2439 ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
2440 (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
2441 const int rate_breakout_thr =
2442 cpi->sf.partition_search_breakout_rate_thr *
2443 num_pels_log2_lookup[bsize];
2444
2445 best_rdc = this_rdc;
2446 if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
2447
2448 pc_tree->cb_search_range = SEARCH_FULL_PLANE;
2449
2450 if (!x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
2451 const int use_ml_based_breakout =
2452 bsize <= cpi->sf.use_square_partition_only_threshold &&
2453 bsize > BLOCK_4X4 && xd->bd == 8;
2454
2455 // TODO(anyone): Currently this is using the same model and threshold
2456 // values as in rd_pick_partition. Retraining the model and tuning the
2457 // threshold values might be helpful to improve the speed.
2458 if (use_ml_based_breakout) {
2459 if (ml_predict_breakout(cpi, bsize, x, &this_rdc,
2460 x->source_variance)) {
2461 do_square_split = 0;
2462 }
2463 }
2464
2465 // If all y, u, v transform blocks in this partition are skippable,
2466 // and the dist & rate are within the thresholds, the partition search
2467 // is terminated for current branch of the partition search tree. The
2468 // dist & rate thresholds are set to 0 at speed 0 to disable the early
2469 // termination at that speed.
2470 if (best_rdc.dist < dist_breakout_thr &&
2471 best_rdc.rate < rate_breakout_thr) {
2472 do_square_split = 0;
2473 }
2474 }
2475
2476 if (cpi->sf.firstpass_simple_motion_search_early_term &&
2477 cm->show_frame && bsize <= BLOCK_32X32 && bsize >= BLOCK_8X8 &&
2478 !frame_is_intra_only(cm) && mi_row + mi_step < cm->mi_rows &&
2479 mi_col + mi_step < cm->mi_cols && this_rdc.rdcost < INT64_MAX &&
2480 this_rdc.rdcost >= 0 && this_rdc.rate < INT_MAX &&
2481 this_rdc.rate >= 0 && do_square_split) {
2482 av1_firstpass_simple_motion_search_early_term(
2483 cpi, x, pc_tree, mi_row, mi_col, bsize, &this_rdc,
2484 &do_square_split);
2485 }
2486 }
2487 }
2488
2489 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2490 }
2491
2492 // store estimated motion vector
2493 if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
2494
2495 int64_t temp_best_rdcost = best_rdc.rdcost;
2496 pn_rdc = best_rdc;
2497
2498 // PARTITION_SPLIT
2499 if (do_square_split) {
2500 int reached_last_index = 0;
2501 subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2502 int idx;
2503
2504 sum_rdc.rate = partition_cost[PARTITION_SPLIT];
2505 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
2506
2507 for (idx = 0; idx < 4 && sum_rdc.rdcost < temp_best_rdcost; ++idx) {
2508 const int x_idx = (idx & 1) * mi_step;
2509 const int y_idx = (idx >> 1) * mi_step;
2510
2511 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
2512 continue;
2513
2514 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
2515
2516 pc_tree->split[idx]->index = idx;
2517 int64_t *p_split_rd = &split_rd[idx];
2518 const int64_t best_remain_rdcost =
2519 (temp_best_rdcost == INT64_MAX) ? INT64_MAX
2520 : (temp_best_rdcost - sum_rdc.rdcost);
2521 rd_pick_sqr_partition(
2522 cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
2523 &this_rdc, best_remain_rdcost, pc_tree->split[idx], p_split_rd);
2524
2525 pc_tree->pc_tree_stats.sub_block_rdcost[idx] = this_rdc.rdcost;
2526 pc_tree->pc_tree_stats.sub_block_skip[idx] =
2527 pc_tree->split[idx]->none.skip;
2528
2529 if (this_rdc.rate == INT_MAX) {
2530 sum_rdc.rdcost = INT64_MAX;
2531 break;
2532 } else {
2533 sum_rdc.rate += this_rdc.rate;
2534 sum_rdc.dist += this_rdc.dist;
2535 sum_rdc.rdcost += this_rdc.rdcost;
2536 }
2537 }
2538 reached_last_index = (idx == 4);
2539
2540 if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
2541 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
2542
2543 if (sum_rdc.rdcost < best_rdc.rdcost) {
2544 best_rdc = sum_rdc;
2545 pc_tree->partitioning = PARTITION_SPLIT;
2546 }
2547 }
2548
2549 int has_split = 0;
2550 if (pc_tree->partitioning == PARTITION_SPLIT) {
2551 for (int cb_idx = 0; cb_idx <= AOMMIN(idx, 3); ++cb_idx) {
2552 if (pc_tree->split[cb_idx]->partitioning == PARTITION_SPLIT)
2553 ++has_split;
2554 }
2555
2556 if (has_split >= 3 || sum_rdc.rdcost < (pn_rdc.rdcost >> 1)) {
2557 pc_tree->cb_search_range = SPLIT_PLANE;
2558 }
2559 }
2560
2561 if (pc_tree->partitioning == PARTITION_NONE) {
2562 pc_tree->cb_search_range = SEARCH_SAME_PLANE;
2563 if (pn_rdc.dist <= sum_rdc.dist)
2564 pc_tree->cb_search_range = NONE_PARTITION_PLANE;
2565 }
2566
2567 if (pn_rdc.rate == INT_MAX) pc_tree->cb_search_range = NONE_PARTITION_PLANE;
2568
2569 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2570 } // if (do_split)
2571
2572 pc_tree->pc_tree_stats.split = pc_tree->partitioning == PARTITION_SPLIT;
2573 if (do_square_split) {
2574 for (int i = 0; i < 4; ++i) {
2575 pc_tree->pc_tree_stats.sub_block_split[i] =
2576 pc_tree->split[i]->partitioning == PARTITION_SPLIT;
2577 }
2578 }
2579
2580 // TODO(jbb): This code added so that we avoid static analysis
2581 // warning related to the fact that best_rd isn't used after this
2582 // point. This code should be refactored so that the duplicate
2583 // checks occur in some sub function and thus are used...
2584 (void)best_rd;
2585 *rd_cost = best_rdc;
2586
2587 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
2588 pc_tree->index != 3) {
2589 if (bsize == cm->seq_params.sb_size) {
2590 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2591 } else {
2592 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
2593 pc_tree, NULL);
2594 }
2595 }
2596
2597 if (bsize == cm->seq_params.sb_size) {
2598 assert(best_rdc.rate < INT_MAX);
2599 assert(best_rdc.dist < INT64_MAX);
2600 } else {
2601 assert(tp_orig == *tp);
2602 }
2603 }
2604
2605 // split_score indicates confidence of picking split partition;
2606 // none_score indicates confidence of picking none partition;
2607 #define FEATURE_SIZE 19
ml_prune_2pass_split_partition(const PC_TREE_STATS * pc_tree_stats,BLOCK_SIZE bsize,int * split_score,int * none_score)2608 static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats,
2609 BLOCK_SIZE bsize, int *split_score,
2610 int *none_score) {
2611 if (!pc_tree_stats->valid) return 0;
2612 const float *split_weights = NULL;
2613 const float *none_weights = NULL;
2614 switch (bsize) {
2615 case BLOCK_4X4: break;
2616 case BLOCK_8X8:
2617 split_weights = av1_2pass_split_partition_weights_8;
2618 none_weights = av1_2pass_none_partition_weights_8;
2619 break;
2620 case BLOCK_16X16:
2621 split_weights = av1_2pass_split_partition_weights_16;
2622 none_weights = av1_2pass_none_partition_weights_16;
2623 break;
2624 case BLOCK_32X32:
2625 split_weights = av1_2pass_split_partition_weights_32;
2626 none_weights = av1_2pass_none_partition_weights_32;
2627 break;
2628 case BLOCK_64X64:
2629 split_weights = av1_2pass_split_partition_weights_64;
2630 none_weights = av1_2pass_none_partition_weights_64;
2631 break;
2632 case BLOCK_128X128:
2633 split_weights = av1_2pass_split_partition_weights_128;
2634 none_weights = av1_2pass_none_partition_weights_128;
2635 break;
2636 default: assert(0 && "Unexpected bsize.");
2637 }
2638 if (!split_weights || !none_weights) return 0;
2639
2640 aom_clear_system_state();
2641
2642 float features[FEATURE_SIZE];
2643 int feature_index = 0;
2644 features[feature_index++] = (float)pc_tree_stats->split;
2645 features[feature_index++] = (float)pc_tree_stats->skip;
2646 const int rdcost = (int)AOMMIN(INT_MAX, pc_tree_stats->rdcost);
2647 const int rd_valid = rdcost > 0 && rdcost < 1000000000;
2648 features[feature_index++] = (float)rd_valid;
2649 for (int i = 0; i < 4; ++i) {
2650 features[feature_index++] = (float)pc_tree_stats->sub_block_split[i];
2651 features[feature_index++] = (float)pc_tree_stats->sub_block_skip[i];
2652 const int sub_rdcost =
2653 (int)AOMMIN(INT_MAX, pc_tree_stats->sub_block_rdcost[i]);
2654 const int sub_rd_valid = sub_rdcost > 0 && sub_rdcost < 1000000000;
2655 features[feature_index++] = (float)sub_rd_valid;
2656 // Ratio between the sub-block RD and the whole-block RD.
2657 float rd_ratio = 1.0f;
2658 if (rd_valid && sub_rd_valid && sub_rdcost < rdcost)
2659 rd_ratio = (float)sub_rdcost / (float)rdcost;
2660 features[feature_index++] = rd_ratio;
2661 }
2662 assert(feature_index == FEATURE_SIZE);
2663
2664 float score_1 = split_weights[FEATURE_SIZE];
2665 float score_2 = none_weights[FEATURE_SIZE];
2666 for (int i = 0; i < FEATURE_SIZE; ++i) {
2667 score_1 += features[i] * split_weights[i];
2668 score_2 += features[i] * none_weights[i];
2669 }
2670 *split_score = (int)(score_1 * 100);
2671 *none_score = (int)(score_2 * 100);
2672 return 1;
2673 }
2674 #undef FEATURE_SIZE
2675
ml_prune_rect_partition(const AV1_COMP * const cpi,const MACROBLOCK * const x,BLOCK_SIZE bsize,int64_t best_rd,int64_t none_rd,int64_t * split_rd,int * const dst_prune_horz,int * const dst_prune_vert)2676 static void ml_prune_rect_partition(const AV1_COMP *const cpi,
2677 const MACROBLOCK *const x, BLOCK_SIZE bsize,
2678 int64_t best_rd, int64_t none_rd,
2679 int64_t *split_rd,
2680 int *const dst_prune_horz,
2681 int *const dst_prune_vert) {
2682 if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
2683 best_rd = AOMMAX(best_rd, 1);
2684 const NN_CONFIG *nn_config = NULL;
2685 const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f };
2686 float cur_thresh = 0.0f;
2687 switch (bsize) {
2688 case BLOCK_8X8:
2689 nn_config = &av1_rect_partition_nnconfig_8;
2690 cur_thresh = prob_thresholds[0];
2691 break;
2692 case BLOCK_16X16:
2693 nn_config = &av1_rect_partition_nnconfig_16;
2694 cur_thresh = prob_thresholds[1];
2695 break;
2696 case BLOCK_32X32:
2697 nn_config = &av1_rect_partition_nnconfig_32;
2698 cur_thresh = prob_thresholds[2];
2699 break;
2700 case BLOCK_64X64:
2701 nn_config = &av1_rect_partition_nnconfig_64;
2702 cur_thresh = prob_thresholds[3];
2703 break;
2704 case BLOCK_128X128:
2705 nn_config = &av1_rect_partition_nnconfig_128;
2706 cur_thresh = prob_thresholds[4];
2707 break;
2708 default: assert(0 && "Unexpected bsize.");
2709 }
2710 if (!nn_config) return;
2711 aom_clear_system_state();
2712
2713 // 1. Compute input features
2714 float features[9];
2715
2716 // RD cost ratios
2717 for (int i = 0; i < 5; i++) features[i] = 1.0f;
2718 if (none_rd > 0 && none_rd < 1000000000)
2719 features[0] = (float)none_rd / (float)best_rd;
2720 for (int i = 0; i < 4; i++) {
2721 if (split_rd[i] > 0 && split_rd[i] < 1000000000)
2722 features[1 + i] = (float)split_rd[i] / (float)best_rd;
2723 }
2724
2725 // Variance ratios
2726 const MACROBLOCKD *const xd = &x->e_mbd;
2727 int whole_block_variance;
2728 if (is_cur_buf_hbd(xd)) {
2729 whole_block_variance = av1_high_get_sby_perpixel_variance(
2730 cpi, &x->plane[0].src, bsize, xd->bd);
2731 } else {
2732 whole_block_variance =
2733 av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
2734 }
2735 whole_block_variance = AOMMAX(whole_block_variance, 1);
2736
2737 int split_variance[4];
2738 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2739 struct buf_2d buf;
2740 buf.stride = x->plane[0].src.stride;
2741 const int bw = block_size_wide[bsize];
2742 for (int i = 0; i < 4; ++i) {
2743 const int x_idx = (i & 1) * bw / 2;
2744 const int y_idx = (i >> 1) * bw / 2;
2745 buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
2746 if (is_cur_buf_hbd(xd)) {
2747 split_variance[i] =
2748 av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
2749 } else {
2750 split_variance[i] = av1_get_sby_perpixel_variance(cpi, &buf, subsize);
2751 }
2752 }
2753
2754 for (int i = 0; i < 4; i++)
2755 features[5 + i] = (float)split_variance[i] / (float)whole_block_variance;
2756
2757 // 2. Do the prediction and prune 0-2 partitions based on their probabilities
2758 float raw_scores[3] = { 0.0f };
2759 av1_nn_predict(features, nn_config, raw_scores);
2760 aom_clear_system_state();
2761 float probs[3] = { 0.0f };
2762 av1_nn_softmax(raw_scores, probs, 3);
2763
2764 // probs[0] is the probability of the fact that both rectangular partitions
2765 // are worse than current best_rd
2766 if (probs[1] <= cur_thresh) (*dst_prune_horz) = 1;
2767 if (probs[2] <= cur_thresh) (*dst_prune_vert) = 1;
2768 }
2769
2770 // Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be
2771 // considered.
ml_prune_ab_partition(BLOCK_SIZE bsize,int part_ctx,int var_ctx,int64_t best_rd,int64_t horz_rd[2],int64_t vert_rd[2],int64_t split_rd[4],int * const horza_partition_allowed,int * const horzb_partition_allowed,int * const verta_partition_allowed,int * const vertb_partition_allowed)2772 static void ml_prune_ab_partition(BLOCK_SIZE bsize, int part_ctx, int var_ctx,
2773 int64_t best_rd, int64_t horz_rd[2],
2774 int64_t vert_rd[2], int64_t split_rd[4],
2775 int *const horza_partition_allowed,
2776 int *const horzb_partition_allowed,
2777 int *const verta_partition_allowed,
2778 int *const vertb_partition_allowed) {
2779 if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
2780 const NN_CONFIG *nn_config = NULL;
2781 switch (bsize) {
2782 case BLOCK_8X8: nn_config = NULL; break;
2783 case BLOCK_16X16: nn_config = &av1_ab_partition_nnconfig_16; break;
2784 case BLOCK_32X32: nn_config = &av1_ab_partition_nnconfig_32; break;
2785 case BLOCK_64X64: nn_config = &av1_ab_partition_nnconfig_64; break;
2786 case BLOCK_128X128: nn_config = &av1_ab_partition_nnconfig_128; break;
2787 default: assert(0 && "Unexpected bsize.");
2788 }
2789 if (!nn_config) return;
2790
2791 aom_clear_system_state();
2792
2793 // Generate features.
2794 float features[10];
2795 int feature_index = 0;
2796 features[feature_index++] = (float)part_ctx;
2797 features[feature_index++] = (float)var_ctx;
2798 const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
2799 int sub_block_rdcost[8] = { 0 };
2800 int rd_index = 0;
2801 for (int i = 0; i < 2; ++i) {
2802 if (horz_rd[i] > 0 && horz_rd[i] < 1000000000)
2803 sub_block_rdcost[rd_index] = (int)horz_rd[i];
2804 ++rd_index;
2805 }
2806 for (int i = 0; i < 2; ++i) {
2807 if (vert_rd[i] > 0 && vert_rd[i] < 1000000000)
2808 sub_block_rdcost[rd_index] = (int)vert_rd[i];
2809 ++rd_index;
2810 }
2811 for (int i = 0; i < 4; ++i) {
2812 if (split_rd[i] > 0 && split_rd[i] < 1000000000)
2813 sub_block_rdcost[rd_index] = (int)split_rd[i];
2814 ++rd_index;
2815 }
2816 for (int i = 0; i < 8; ++i) {
2817 // Ratio between the sub-block RD and the whole-block RD.
2818 float rd_ratio = 1.0f;
2819 if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost)
2820 rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost;
2821 features[feature_index++] = rd_ratio;
2822 }
2823 assert(feature_index == 10);
2824
2825 // Calculate scores using the NN model.
2826 float score[16] = { 0.0f };
2827 av1_nn_predict(features, nn_config, score);
2828 aom_clear_system_state();
2829 int int_score[16];
2830 int max_score = -1000;
2831 for (int i = 0; i < 16; ++i) {
2832 int_score[i] = (int)(100 * score[i]);
2833 max_score = AOMMAX(int_score[i], max_score);
2834 }
2835
2836 // Make decisions based on the model scores.
2837 int thresh = max_score;
2838 switch (bsize) {
2839 case BLOCK_16X16: thresh -= 150; break;
2840 case BLOCK_32X32: thresh -= 100; break;
2841 default: break;
2842 }
2843 *horza_partition_allowed = 0;
2844 *horzb_partition_allowed = 0;
2845 *verta_partition_allowed = 0;
2846 *vertb_partition_allowed = 0;
2847 for (int i = 0; i < 16; ++i) {
2848 if (int_score[i] >= thresh) {
2849 if ((i >> 0) & 1) *horza_partition_allowed = 1;
2850 if ((i >> 1) & 1) *horzb_partition_allowed = 1;
2851 if ((i >> 2) & 1) *verta_partition_allowed = 1;
2852 if ((i >> 3) & 1) *vertb_partition_allowed = 1;
2853 }
2854 }
2855 }
2856
2857 #define FEATURES 18
2858 #define LABELS 4
2859 // Use a ML model to predict if horz4 and vert4 should be considered.
ml_prune_4_partition(const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,int part_ctx,int64_t best_rd,int64_t horz_rd[2],int64_t vert_rd[2],int64_t split_rd[4],int * const partition_horz4_allowed,int * const partition_vert4_allowed,unsigned int pb_source_variance,int mi_row,int mi_col)2860 static void ml_prune_4_partition(const AV1_COMP *const cpi, MACROBLOCK *const x,
2861 BLOCK_SIZE bsize, int part_ctx,
2862 int64_t best_rd, int64_t horz_rd[2],
2863 int64_t vert_rd[2], int64_t split_rd[4],
2864 int *const partition_horz4_allowed,
2865 int *const partition_vert4_allowed,
2866 unsigned int pb_source_variance, int mi_row,
2867 int mi_col) {
2868 if (best_rd >= 1000000000) return;
2869 const NN_CONFIG *nn_config = NULL;
2870 switch (bsize) {
2871 case BLOCK_16X16: nn_config = &av1_4_partition_nnconfig_16; break;
2872 case BLOCK_32X32: nn_config = &av1_4_partition_nnconfig_32; break;
2873 case BLOCK_64X64: nn_config = &av1_4_partition_nnconfig_64; break;
2874 default: assert(0 && "Unexpected bsize.");
2875 }
2876 if (!nn_config) return;
2877
2878 aom_clear_system_state();
2879
2880 // Generate features.
2881 float features[FEATURES];
2882 int feature_index = 0;
2883 features[feature_index++] = (float)part_ctx;
2884 features[feature_index++] = (float)get_unsigned_bits(pb_source_variance);
2885
2886 const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
2887 int sub_block_rdcost[8] = { 0 };
2888 int rd_index = 0;
2889 for (int i = 0; i < 2; ++i) {
2890 if (horz_rd[i] > 0 && horz_rd[i] < 1000000000)
2891 sub_block_rdcost[rd_index] = (int)horz_rd[i];
2892 ++rd_index;
2893 }
2894 for (int i = 0; i < 2; ++i) {
2895 if (vert_rd[i] > 0 && vert_rd[i] < 1000000000)
2896 sub_block_rdcost[rd_index] = (int)vert_rd[i];
2897 ++rd_index;
2898 }
2899 for (int i = 0; i < 4; ++i) {
2900 if (split_rd[i] > 0 && split_rd[i] < 1000000000)
2901 sub_block_rdcost[rd_index] = (int)split_rd[i];
2902 ++rd_index;
2903 }
2904 for (int i = 0; i < 8; ++i) {
2905 // Ratio between the sub-block RD and the whole-block RD.
2906 float rd_ratio = 1.0f;
2907 if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost)
2908 rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost;
2909 features[feature_index++] = rd_ratio;
2910 }
2911
2912 // Get variance of the 1:4 and 4:1 sub-blocks.
2913 unsigned int horz_4_source_var[4] = { 0 };
2914 unsigned int vert_4_source_var[4] = { 0 };
2915 {
2916 BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4);
2917 BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4);
2918 av1_setup_src_planes(x, cpi->source, mi_row, mi_col,
2919 av1_num_planes(&cpi->common), bsize);
2920 const int src_stride = x->plane[0].src.stride;
2921 const uint8_t *src = x->plane[0].src.buf;
2922 const MACROBLOCKD *const xd = &x->e_mbd;
2923 for (int i = 0; i < 4; ++i) {
2924 const uint8_t *horz_src =
2925 src + i * block_size_high[horz_4_bs] * src_stride;
2926 const uint8_t *vert_src = src + i * block_size_wide[vert_4_bs];
2927 unsigned int horz_var, vert_var, sse;
2928 if (is_cur_buf_hbd(xd)) {
2929 switch (xd->bd) {
2930 case 10:
2931 horz_var = cpi->fn_ptr[horz_4_bs].vf(
2932 horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10),
2933 0, &sse);
2934 vert_var = cpi->fn_ptr[vert_4_bs].vf(
2935 vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10),
2936 0, &sse);
2937 break;
2938 case 12:
2939 horz_var = cpi->fn_ptr[horz_4_bs].vf(
2940 horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12),
2941 0, &sse);
2942 vert_var = cpi->fn_ptr[vert_4_bs].vf(
2943 vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12),
2944 0, &sse);
2945 break;
2946 case 8:
2947 default:
2948 horz_var = cpi->fn_ptr[horz_4_bs].vf(
2949 horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8),
2950 0, &sse);
2951 vert_var = cpi->fn_ptr[vert_4_bs].vf(
2952 vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8),
2953 0, &sse);
2954 break;
2955 }
2956 horz_4_source_var[i] =
2957 ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]);
2958 vert_4_source_var[i] =
2959 ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]);
2960 } else {
2961 horz_var = cpi->fn_ptr[horz_4_bs].vf(horz_src, src_stride, AV1_VAR_OFFS,
2962 0, &sse);
2963 vert_var = cpi->fn_ptr[vert_4_bs].vf(vert_src, src_stride, AV1_VAR_OFFS,
2964 0, &sse);
2965 horz_4_source_var[i] =
2966 ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]);
2967 vert_4_source_var[i] =
2968 ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]);
2969 }
2970 }
2971 }
2972
2973 const float denom = (float)(pb_source_variance + 1);
2974 const float low_b = 0.1f;
2975 const float high_b = 10.0f;
2976 for (int i = 0; i < 4; ++i) {
2977 // Ratio between the 4:1 sub-block variance and the whole-block variance.
2978 float var_ratio = (float)(horz_4_source_var[i] + 1) / denom;
2979 if (var_ratio < low_b) var_ratio = low_b;
2980 if (var_ratio > high_b) var_ratio = high_b;
2981 features[feature_index++] = var_ratio;
2982 }
2983 for (int i = 0; i < 4; ++i) {
2984 // Ratio between the 1:4 sub-block RD and the whole-block RD.
2985 float var_ratio = (float)(vert_4_source_var[i] + 1) / denom;
2986 if (var_ratio < low_b) var_ratio = low_b;
2987 if (var_ratio > high_b) var_ratio = high_b;
2988 features[feature_index++] = var_ratio;
2989 }
2990 assert(feature_index == FEATURES);
2991
2992 // Calculate scores using the NN model.
2993 float score[LABELS] = { 0.0f };
2994 av1_nn_predict(features, nn_config, score);
2995 aom_clear_system_state();
2996 int int_score[LABELS];
2997 int max_score = -1000;
2998 for (int i = 0; i < LABELS; ++i) {
2999 int_score[i] = (int)(100 * score[i]);
3000 max_score = AOMMAX(int_score[i], max_score);
3001 }
3002
3003 // Make decisions based on the model scores.
3004 int thresh = max_score;
3005 switch (bsize) {
3006 case BLOCK_16X16: thresh -= 500; break;
3007 case BLOCK_32X32: thresh -= 500; break;
3008 case BLOCK_64X64: thresh -= 200; break;
3009 default: break;
3010 }
3011 *partition_horz4_allowed = 0;
3012 *partition_vert4_allowed = 0;
3013 for (int i = 0; i < LABELS; ++i) {
3014 if (int_score[i] >= thresh) {
3015 if ((i >> 0) & 1) *partition_horz4_allowed = 1;
3016 if ((i >> 1) & 1) *partition_vert4_allowed = 1;
3017 }
3018 }
3019 }
3020 #undef FEATURES
3021 #undef LABELS
3022
3023 #define FEATURES 4
3024 // ML-based partition search breakout.
ml_predict_breakout(const AV1_COMP * const cpi,BLOCK_SIZE bsize,const MACROBLOCK * const x,const RD_STATS * const rd_stats,unsigned int pb_source_variance)3025 static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
3026 const MACROBLOCK *const x,
3027 const RD_STATS *const rd_stats,
3028 unsigned int pb_source_variance) {
3029 const NN_CONFIG *nn_config = NULL;
3030 int thresh = 0;
3031 switch (bsize) {
3032 case BLOCK_8X8:
3033 nn_config = &av1_partition_breakout_nnconfig_8;
3034 thresh = cpi->sf.ml_partition_search_breakout_thresh[0];
3035 break;
3036 case BLOCK_16X16:
3037 nn_config = &av1_partition_breakout_nnconfig_16;
3038 thresh = cpi->sf.ml_partition_search_breakout_thresh[1];
3039 break;
3040 case BLOCK_32X32:
3041 nn_config = &av1_partition_breakout_nnconfig_32;
3042 thresh = cpi->sf.ml_partition_search_breakout_thresh[2];
3043 break;
3044 case BLOCK_64X64:
3045 nn_config = &av1_partition_breakout_nnconfig_64;
3046 thresh = cpi->sf.ml_partition_search_breakout_thresh[3];
3047 break;
3048 case BLOCK_128X128:
3049 nn_config = &av1_partition_breakout_nnconfig_128;
3050 thresh = cpi->sf.ml_partition_search_breakout_thresh[4];
3051 break;
3052 default: assert(0 && "Unexpected bsize.");
3053 }
3054 if (!nn_config || thresh < 0) return 0;
3055
3056 // Generate feature values.
3057 float features[FEATURES];
3058 int feature_index = 0;
3059 aom_clear_system_state();
3060
3061 const int num_pels_log2 = num_pels_log2_lookup[bsize];
3062 float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX);
3063 rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
3064 rate_f;
3065 features[feature_index++] = rate_f;
3066
3067 const float dist_f =
3068 (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2);
3069 features[feature_index++] = dist_f;
3070
3071 features[feature_index++] = (float)pb_source_variance;
3072
3073 const int dc_q = (int)x->plane[0].dequant_QTX[0];
3074 features[feature_index++] = (float)(dc_q * dc_q) / 256.0f;
3075 assert(feature_index == FEATURES);
3076
3077 // Calculate score using the NN model.
3078 float score = 0.0f;
3079 av1_nn_predict(features, nn_config, &score);
3080 aom_clear_system_state();
3081
3082 // Make decision.
3083 return (int)(score * 100) >= thresh;
3084 }
3085 #undef FEATURES
3086
3087 // Record the ref frames that have been selected by square partition blocks.
update_picked_ref_frames_mask(MACROBLOCK * const x,int ref_type,BLOCK_SIZE bsize,int mib_size,int mi_row,int mi_col)3088 static void update_picked_ref_frames_mask(MACROBLOCK *const x, int ref_type,
3089 BLOCK_SIZE bsize, int mib_size,
3090 int mi_row, int mi_col) {
3091 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
3092 const int sb_size_mask = mib_size - 1;
3093 const int mi_row_in_sb = mi_row & sb_size_mask;
3094 const int mi_col_in_sb = mi_col & sb_size_mask;
3095 const int mi_size = mi_size_wide[bsize];
3096 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_size; ++i) {
3097 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_size; ++j) {
3098 x->picked_ref_frames_mask[i * 32 + j] |= 1 << ref_type;
3099 }
3100 }
3101 }
3102
3103 // TODO(jinging,jimbankoski,rbultje): properly skip partition types that are
3104 // unlikely to be selected depending on previous rate-distortion optimization
3105 // results, for encoding speed-up.
3106 // TODO(chiyotsai@google.com): Move these ml related varables to a seprate file
3107 // to separate low level ml logic from partition logic
3108 #define NUM_SIMPLE_MOTION_FEATURES 28
rd_pick_partition(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,BLOCK_SIZE max_sq_part,BLOCK_SIZE min_sq_part,RD_STATS * rd_cost,int64_t best_rd,PC_TREE * pc_tree,int64_t * none_rd)3109 static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
3110 TileDataEnc *tile_data, TOKENEXTRA **tp,
3111 int mi_row, int mi_col, BLOCK_SIZE bsize,
3112 BLOCK_SIZE max_sq_part, BLOCK_SIZE min_sq_part,
3113 RD_STATS *rd_cost, int64_t best_rd,
3114 PC_TREE *pc_tree, int64_t *none_rd) {
3115 const AV1_COMMON *const cm = &cpi->common;
3116 const int num_planes = av1_num_planes(cm);
3117 TileInfo *const tile_info = &tile_data->tile_info;
3118 MACROBLOCK *const x = &td->mb;
3119 MACROBLOCKD *const xd = &x->e_mbd;
3120 const int mi_step = mi_size_wide[bsize] / 2;
3121 RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
3122 const TOKENEXTRA *const tp_orig = *tp;
3123 PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
3124 int tmp_partition_cost[PARTITION_TYPES];
3125 BLOCK_SIZE subsize;
3126 RD_STATS this_rdc, sum_rdc, best_rdc;
3127 const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
3128 int do_square_split = bsize_at_least_8x8;
3129 const int pl = bsize_at_least_8x8
3130 ? partition_plane_context(xd, mi_row, mi_col, bsize)
3131 : 0;
3132 const int *partition_cost =
3133 pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
3134
3135 int do_rectangular_split = cpi->oxcf.enable_rect_partitions;
3136 int64_t cur_none_rd = 0;
3137 int64_t split_rd[4] = { 0, 0, 0, 0 };
3138 int64_t horz_rd[2] = { 0, 0 };
3139 int64_t vert_rd[2] = { 0, 0 };
3140 int prune_horz = 0;
3141 int prune_vert = 0;
3142 int terminate_partition_search = 0;
3143
3144 int split_ctx_is_ready[2] = { 0, 0 };
3145 int horz_ctx_is_ready = 0;
3146 int vert_ctx_is_ready = 0;
3147 BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
3148
3149 if (best_rd < 0) {
3150 pc_tree->none.rdcost = INT64_MAX;
3151 pc_tree->none.skip = 0;
3152 av1_invalid_rd_stats(rd_cost);
3153 return;
3154 }
3155 if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0;
3156
3157 // Override skipping rectangular partition operations for edge blocks
3158 const int has_rows = (mi_row + mi_step < cm->mi_rows);
3159 const int has_cols = (mi_col + mi_step < cm->mi_cols);
3160 const int xss = x->e_mbd.plane[1].subsampling_x;
3161 const int yss = x->e_mbd.plane[1].subsampling_y;
3162
3163 if (none_rd) *none_rd = 0;
3164 int partition_none_allowed = has_rows && has_cols;
3165 int partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8 &&
3166 cpi->oxcf.enable_rect_partitions;
3167 int partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 &&
3168 cpi->oxcf.enable_rect_partitions;
3169
3170 (void)*tp_orig;
3171
3172 #if CONFIG_COLLECT_PARTITION_STATS
3173 int partition_decisions[EXT_PARTITION_TYPES] = { 0 };
3174 int partition_attempts[EXT_PARTITION_TYPES] = { 0 };
3175 int64_t partition_times[EXT_PARTITION_TYPES] = { 0 };
3176 struct aom_usec_timer partition_timer = { 0 };
3177 int partition_timer_on = 0;
3178 #if CONFIG_COLLECT_PARTITION_STATS == 2
3179 PartitionStats *part_stats = &cpi->partition_stats;
3180 #endif
3181 #endif
3182
3183 // Override partition costs at the edges of the frame in the same
3184 // way as in read_partition (see decodeframe.c)
3185 if (!(has_rows && has_cols)) {
3186 assert(bsize_at_least_8x8 && pl >= 0);
3187 const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
3188 for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX;
3189 if (has_cols) {
3190 // At the bottom, the two possibilities are HORZ and SPLIT
3191 aom_cdf_prob bot_cdf[2];
3192 partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
3193 static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
3194 av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
3195 } else if (has_rows) {
3196 // At the right, the two possibilities are VERT and SPLIT
3197 aom_cdf_prob rhs_cdf[2];
3198 partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
3199 static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
3200 av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
3201 } else {
3202 // At the bottom right, we always split
3203 tmp_partition_cost[PARTITION_SPLIT] = 0;
3204 }
3205
3206 partition_cost = tmp_partition_cost;
3207 do_square_split &= partition_cost[PARTITION_SPLIT] != INT_MAX;
3208 }
3209
3210 #ifndef NDEBUG
3211 // Nothing should rely on the default value of this array (which is just
3212 // leftover from encoding the previous block. Setting it to fixed pattern
3213 // when debugging.
3214 // bit 0, 1, 2 are blk_skip of each plane
3215 // bit 4, 5, 6 are initialization checking of each plane
3216 memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
3217 #endif // NDEBUG
3218
3219 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
3220
3221 av1_init_rd_stats(&this_rdc);
3222 av1_invalid_rd_stats(&best_rdc);
3223 best_rdc.rdcost = best_rd;
3224
3225 set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
3226
3227 if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
3228 x->mb_energy = av1_log_block_var(cpi, x, bsize);
3229
3230 if (bsize > cpi->sf.use_square_partition_only_threshold) {
3231 partition_horz_allowed &= !has_rows;
3232 partition_vert_allowed &= !has_cols;
3233 }
3234
3235 if (bsize > BLOCK_4X4 && x->use_cb_search_range) {
3236 int split_score = 0;
3237 int none_score = 0;
3238 const int score_valid = ml_prune_2pass_split_partition(
3239 &pc_tree->pc_tree_stats, bsize, &split_score, &none_score);
3240 if (score_valid) {
3241 {
3242 const int only_split_thresh = 300;
3243 const int no_none_thresh = 250;
3244 const int no_split_thresh = 0;
3245 if (split_score > only_split_thresh) {
3246 partition_none_allowed = 0;
3247 partition_horz_allowed = 0;
3248 partition_vert_allowed = 0;
3249 } else if (split_score > no_none_thresh) {
3250 partition_none_allowed = 0;
3251 }
3252 if (split_score < no_split_thresh) do_square_split = 0;
3253 }
3254 {
3255 const int no_split_thresh = 120;
3256 const int no_none_thresh = -120;
3257 if (none_score > no_split_thresh && partition_none_allowed)
3258 do_square_split = 0;
3259 if (none_score < no_none_thresh) partition_none_allowed = 0;
3260 }
3261 } else {
3262 if (pc_tree->cb_search_range == SPLIT_PLANE) {
3263 partition_none_allowed = 0;
3264 partition_horz_allowed = 0;
3265 partition_vert_allowed = 0;
3266 }
3267 if (pc_tree->cb_search_range == SEARCH_SAME_PLANE) do_square_split = 0;
3268 if (pc_tree->cb_search_range == NONE_PARTITION_PLANE) {
3269 do_square_split = 0;
3270 partition_horz_allowed = 0;
3271 partition_vert_allowed = 0;
3272 }
3273 }
3274
3275 // Fall back to default values in case all partition modes are rejected.
3276 if (partition_none_allowed == 0 && do_square_split == 0 &&
3277 partition_horz_allowed == 0 && partition_vert_allowed == 0) {
3278 do_square_split = bsize_at_least_8x8;
3279 partition_none_allowed = has_rows && has_cols;
3280 partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8 &&
3281 cpi->oxcf.enable_rect_partitions;
3282 partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 &&
3283 cpi->oxcf.enable_rect_partitions;
3284 }
3285 }
3286
3287 xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
3288 xd->left_txfm_context =
3289 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
3290 save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3291
3292 // Use simple_motion_search to prune partitions. This must be done prior to
3293 // PARTITION_SPLIT to propagate the initial mvs to a smaller blocksize.
3294 const int try_split_only =
3295 cpi->sf.simple_motion_search_split_only && bsize >= BLOCK_8X8 &&
3296 do_square_split && mi_row + mi_size_high[bsize] <= cm->mi_rows &&
3297 mi_col + mi_size_wide[bsize] <= cm->mi_cols && !frame_is_intra_only(cm) &&
3298 !av1_superres_scaled(cm);
3299
3300 if (try_split_only) {
3301 av1_simple_motion_search_based_split(
3302 cpi, x, mi_row, mi_col, bsize, &partition_none_allowed,
3303 &partition_horz_allowed, &partition_vert_allowed, &do_rectangular_split,
3304 &do_square_split);
3305 }
3306
3307 const int try_prune_rect =
3308 cpi->sf.simple_motion_search_prune_rect && !frame_is_intra_only(cm) &&
3309 do_rectangular_split &&
3310 (do_square_split || partition_none_allowed ||
3311 (prune_horz && prune_vert)) &&
3312 (partition_horz_allowed || partition_vert_allowed) && bsize >= BLOCK_8X8;
3313
3314 float simple_motion_features[NUM_SIMPLE_MOTION_FEATURES] = { 0.0f };
3315 int simple_motion_features_are_valid = 0;
3316
3317 if (try_prune_rect) {
3318 av1_simple_motion_search_prune_part(
3319 cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_none_allowed,
3320 &partition_horz_allowed, &partition_vert_allowed, &do_square_split,
3321 &do_rectangular_split, &prune_horz, &prune_vert, simple_motion_features,
3322 &simple_motion_features_are_valid);
3323 }
3324
3325 // Max and min square partition levels are defined as the partition nodes that
3326 // the recursive function rd_pick_partition() can reach. To implement this:
3327 // only PARTITION_NONE is allowed if the current node equals min_sq_part,
3328 // only PARTITION_SPLIT is allowed if the current node exceeds max_sq_part.
3329 assert(block_size_wide[min_sq_part] == block_size_high[min_sq_part]);
3330 assert(block_size_wide[max_sq_part] == block_size_high[max_sq_part]);
3331 assert(min_sq_part <= max_sq_part);
3332 assert(block_size_wide[bsize] == block_size_high[bsize]);
3333 const int max_partition_size = block_size_wide[max_sq_part];
3334 const int min_partition_size = block_size_wide[min_sq_part];
3335 const int blksize = block_size_wide[bsize];
3336 assert(min_partition_size <= max_partition_size);
3337 const int is_le_min_sq_part = blksize <= min_partition_size;
3338 const int is_gt_max_sq_part = blksize > max_partition_size;
3339 if (is_gt_max_sq_part) {
3340 // If current block size is larger than max, only allow split.
3341 partition_none_allowed = 0;
3342 partition_horz_allowed = 0;
3343 partition_vert_allowed = 0;
3344 do_square_split = 1;
3345 } else if (is_le_min_sq_part) {
3346 // If current block size is less or equal to min, only allow none if valid
3347 // block large enough; only allow split otherwise.
3348 partition_horz_allowed = 0;
3349 partition_vert_allowed = 0;
3350 // only disable square split when current block is not at the picture
3351 // boundary. otherwise, inherit the square split flag from previous logic
3352 if (has_rows && has_cols) do_square_split = 0;
3353 partition_none_allowed = !do_square_split;
3354 }
3355 do_square_split &= partition_cost[PARTITION_SPLIT] != INT_MAX;
3356
3357 BEGIN_PARTITION_SEARCH:
3358 if (x->must_find_valid_partition) {
3359 do_square_split =
3360 bsize_at_least_8x8 && partition_cost[PARTITION_SPLIT] != INT_MAX;
3361 partition_none_allowed = has_rows && has_cols;
3362 partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8 &&
3363 cpi->oxcf.enable_rect_partitions;
3364 partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 &&
3365 cpi->oxcf.enable_rect_partitions;
3366 terminate_partition_search = 0;
3367 }
3368
3369 // Partition block source pixel variance.
3370 unsigned int pb_source_variance = UINT_MAX;
3371
3372 // Partition block sse after simple motion compensation, not in use now,
3373 // but will be used for upcoming speed features
3374 unsigned int pb_simple_motion_pred_sse = UINT_MAX;
3375 (void)pb_simple_motion_pred_sse;
3376
3377 #if CONFIG_DIST_8X8
3378 if (x->using_dist_8x8) {
3379 if (block_size_high[bsize] <= 8) partition_horz_allowed = 0;
3380 if (block_size_wide[bsize] <= 8) partition_vert_allowed = 0;
3381 if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
3382 do_square_split = 0;
3383 }
3384 #endif
3385
3386 // PARTITION_NONE
3387 if (is_le_min_sq_part && has_rows && has_cols) partition_none_allowed = 1;
3388 if (!terminate_partition_search && partition_none_allowed &&
3389 !is_gt_max_sq_part) {
3390 int pt_cost = 0;
3391 if (bsize_at_least_8x8) {
3392 pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
3393 ? partition_cost[PARTITION_NONE]
3394 : 0;
3395 }
3396 const int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
3397 const int64_t best_remain_rdcost =
3398 (best_rdc.rdcost == INT64_MAX) ? INT64_MAX
3399 : (best_rdc.rdcost - partition_rd_cost);
3400 #if CONFIG_COLLECT_PARTITION_STATS
3401 if (best_remain_rdcost >= 0) {
3402 partition_attempts[PARTITION_NONE] += 1;
3403 aom_usec_timer_start(&partition_timer);
3404 partition_timer_on = 1;
3405 }
3406 #endif
3407 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE,
3408 bsize, ctx_none, best_remain_rdcost, 0);
3409 #if CONFIG_COLLECT_PARTITION_STATS
3410 if (partition_timer_on) {
3411 aom_usec_timer_mark(&partition_timer);
3412 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3413 partition_times[PARTITION_NONE] += time;
3414 partition_timer_on = 0;
3415 }
3416 #endif
3417 pb_source_variance = x->source_variance;
3418 pb_simple_motion_pred_sse = x->simple_motion_pred_sse;
3419 if (none_rd) *none_rd = this_rdc.rdcost;
3420 cur_none_rd = this_rdc.rdcost;
3421 if (this_rdc.rate != INT_MAX) {
3422 if (cpi->sf.prune_ref_frame_for_rect_partitions) {
3423 const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame);
3424 update_picked_ref_frames_mask(x, ref_type, bsize,
3425 cm->seq_params.mib_size, mi_row, mi_col);
3426 }
3427 if (bsize_at_least_8x8) {
3428 this_rdc.rate += pt_cost;
3429 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
3430 }
3431
3432 if (this_rdc.rdcost < best_rdc.rdcost) {
3433 // Adjust dist breakout threshold according to the partition size.
3434 const int64_t dist_breakout_thr =
3435 cpi->sf.partition_search_breakout_dist_thr >>
3436 ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
3437 (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
3438 const int rate_breakout_thr =
3439 cpi->sf.partition_search_breakout_rate_thr *
3440 num_pels_log2_lookup[bsize];
3441
3442 best_rdc = this_rdc;
3443 if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
3444
3445 if ((do_square_split || do_rectangular_split) &&
3446 !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
3447 const int use_ml_based_breakout =
3448 bsize <= cpi->sf.use_square_partition_only_threshold &&
3449 bsize > BLOCK_4X4 && xd->bd == 8;
3450 if (use_ml_based_breakout) {
3451 if (ml_predict_breakout(cpi, bsize, x, &this_rdc,
3452 pb_source_variance)) {
3453 do_square_split = 0;
3454 do_rectangular_split = 0;
3455 }
3456 }
3457
3458 // If all y, u, v transform blocks in this partition are skippable,
3459 // and the dist & rate are within the thresholds, the partition
3460 // search is terminated for current branch of the partition search
3461 // tree. The dist & rate thresholds are set to 0 at speed 0 to
3462 // disable the early termination at that speed.
3463 if (best_rdc.dist < dist_breakout_thr &&
3464 best_rdc.rate < rate_breakout_thr) {
3465 do_square_split = 0;
3466 do_rectangular_split = 0;
3467 }
3468 }
3469
3470 if (cpi->sf.simple_motion_search_early_term_none && cm->show_frame &&
3471 !frame_is_intra_only(cm) && bsize >= BLOCK_16X16 &&
3472 mi_row + mi_step < cm->mi_rows && mi_col + mi_step < cm->mi_cols &&
3473 this_rdc.rdcost < INT64_MAX && this_rdc.rdcost >= 0 &&
3474 this_rdc.rate < INT_MAX && this_rdc.rate >= 0 &&
3475 (do_square_split || do_rectangular_split)) {
3476 av1_simple_motion_search_early_term_none(
3477 cpi, x, pc_tree, mi_row, mi_col, bsize, &this_rdc,
3478 &terminate_partition_search, simple_motion_features,
3479 &simple_motion_features_are_valid);
3480 }
3481 }
3482 }
3483
3484 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3485 }
3486
3487 // store estimated motion vector
3488 if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
3489
3490 // PARTITION_SPLIT
3491 if ((!terminate_partition_search && do_square_split) || is_gt_max_sq_part) {
3492 av1_init_rd_stats(&sum_rdc);
3493 subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
3494 sum_rdc.rate = partition_cost[PARTITION_SPLIT];
3495 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3496
3497 int idx;
3498 #if CONFIG_COLLECT_PARTITION_STATS
3499 if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
3500 partition_attempts[PARTITION_SPLIT] += 1;
3501 aom_usec_timer_start(&partition_timer);
3502 partition_timer_on = 1;
3503 }
3504 #endif
3505 for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
3506 const int x_idx = (idx & 1) * mi_step;
3507 const int y_idx = (idx >> 1) * mi_step;
3508
3509 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
3510 continue;
3511
3512 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3513
3514 pc_tree->split[idx]->index = idx;
3515 int64_t *p_split_rd = &split_rd[idx];
3516 const int64_t best_remain_rdcost =
3517 best_rdc.rdcost == INT64_MAX ? INT64_MAX
3518 : (best_rdc.rdcost - sum_rdc.rdcost);
3519 rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
3520 subsize, max_sq_part, min_sq_part, &this_rdc,
3521 best_remain_rdcost, pc_tree->split[idx], p_split_rd);
3522
3523 if (this_rdc.rate == INT_MAX) {
3524 sum_rdc.rdcost = INT64_MAX;
3525 break;
3526 } else {
3527 sum_rdc.rate += this_rdc.rate;
3528 sum_rdc.dist += this_rdc.dist;
3529 sum_rdc.rdcost += this_rdc.rdcost;
3530 if (idx <= 1 && (bsize <= BLOCK_8X8 ||
3531 pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
3532 const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic;
3533 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3534 // Neither palette mode nor cfl predicted
3535 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3536 if (mbmi->uv_mode != UV_CFL_PRED) split_ctx_is_ready[idx] = 1;
3537 }
3538 }
3539 }
3540 }
3541 #if CONFIG_COLLECT_PARTITION_STATS
3542 if (partition_timer_on) {
3543 aom_usec_timer_mark(&partition_timer);
3544 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3545 partition_times[PARTITION_SPLIT] += time;
3546 partition_timer_on = 0;
3547 }
3548 #endif
3549 const int reached_last_index = (idx == 4);
3550
3551 if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
3552 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3553
3554 if (sum_rdc.rdcost < best_rdc.rdcost) {
3555 best_rdc = sum_rdc;
3556 pc_tree->partitioning = PARTITION_SPLIT;
3557 }
3558 } else if (cpi->sf.less_rectangular_check_level > 0) {
3559 // skip rectangular partition test when larger block size
3560 // gives better rd cost
3561 if (cpi->sf.less_rectangular_check_level == 2 || idx <= 2)
3562 do_rectangular_split &= !partition_none_allowed;
3563 }
3564
3565 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3566 } // if (do_split)
3567
3568 if (cpi->sf.ml_prune_rect_partition && !frame_is_intra_only(cm) &&
3569 (partition_horz_allowed || partition_vert_allowed) &&
3570 !(prune_horz || prune_vert)) {
3571 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
3572 ml_prune_rect_partition(cpi, x, bsize, best_rdc.rdcost, cur_none_rd,
3573 split_rd, &prune_horz, &prune_vert);
3574 }
3575
3576 // PARTITION_HORZ
3577 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz_allowed));
3578 if (!terminate_partition_search && partition_horz_allowed && !prune_horz &&
3579 (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
3580 !is_gt_max_sq_part) {
3581 av1_init_rd_stats(&sum_rdc);
3582 subsize = get_partition_subsize(bsize, PARTITION_HORZ);
3583 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3584 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3585 partition_none_allowed) {
3586 pc_tree->horizontal[0].pred_interp_filter =
3587 av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
3588 }
3589 sum_rdc.rate = partition_cost[PARTITION_HORZ];
3590 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3591 const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
3592 ? INT64_MAX
3593 : (best_rdc.rdcost - sum_rdc.rdcost);
3594 #if CONFIG_COLLECT_PARTITION_STATS
3595 if (best_remain_rdcost >= 0) {
3596 partition_attempts[PARTITION_HORZ] += 1;
3597 aom_usec_timer_start(&partition_timer);
3598 partition_timer_on = 1;
3599 }
3600 #endif
3601 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_HORZ,
3602 subsize, &pc_tree->horizontal[0], best_remain_rdcost, 0);
3603
3604 if (this_rdc.rate == INT_MAX) {
3605 sum_rdc.rdcost = INT64_MAX;
3606 } else {
3607 sum_rdc.rate += this_rdc.rate;
3608 sum_rdc.dist += this_rdc.dist;
3609 sum_rdc.rdcost += this_rdc.rdcost;
3610 }
3611 horz_rd[0] = this_rdc.rdcost;
3612
3613 if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) {
3614 const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
3615 const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic;
3616 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3617 // Neither palette mode nor cfl predicted
3618 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3619 if (mbmi->uv_mode != UV_CFL_PRED) horz_ctx_is_ready = 1;
3620 }
3621 update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
3622 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
3623 subsize, NULL);
3624
3625 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
3626
3627 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3628 partition_none_allowed) {
3629 pc_tree->horizontal[1].pred_interp_filter =
3630 av1_extract_interp_filter(ctx_h->mic.interp_filters, 0);
3631 }
3632 pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
3633 PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
3634 best_rdc.rdcost - sum_rdc.rdcost, 0);
3635 horz_rd[1] = this_rdc.rdcost;
3636
3637 if (this_rdc.rate == INT_MAX) {
3638 sum_rdc.rdcost = INT64_MAX;
3639 } else {
3640 sum_rdc.rate += this_rdc.rate;
3641 sum_rdc.dist += this_rdc.dist;
3642 sum_rdc.rdcost += this_rdc.rdcost;
3643 }
3644 }
3645 #if CONFIG_COLLECT_PARTITION_STATS
3646 if (partition_timer_on) {
3647 aom_usec_timer_mark(&partition_timer);
3648 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3649 partition_times[PARTITION_HORZ] += time;
3650 partition_timer_on = 0;
3651 }
3652 #endif
3653
3654 if (sum_rdc.rdcost < best_rdc.rdcost) {
3655 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3656 if (sum_rdc.rdcost < best_rdc.rdcost) {
3657 best_rdc = sum_rdc;
3658 pc_tree->partitioning = PARTITION_HORZ;
3659 }
3660 }
3661
3662 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3663 }
3664
3665 // PARTITION_VERT
3666 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert_allowed));
3667 if (!terminate_partition_search && partition_vert_allowed && !prune_vert &&
3668 (do_rectangular_split || active_v_edge(cpi, mi_col, mi_step)) &&
3669 !is_gt_max_sq_part) {
3670 av1_init_rd_stats(&sum_rdc);
3671 subsize = get_partition_subsize(bsize, PARTITION_VERT);
3672
3673 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3674
3675 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3676 partition_none_allowed) {
3677 pc_tree->vertical[0].pred_interp_filter =
3678 av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
3679 }
3680 sum_rdc.rate = partition_cost[PARTITION_VERT];
3681 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3682 const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
3683 ? INT64_MAX
3684 : (best_rdc.rdcost - sum_rdc.rdcost);
3685 #if CONFIG_COLLECT_PARTITION_STATS
3686 if (best_remain_rdcost >= 0) {
3687 partition_attempts[PARTITION_VERT] += 1;
3688 aom_usec_timer_start(&partition_timer);
3689 partition_timer_on = 1;
3690 }
3691 #endif
3692 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_VERT,
3693 subsize, &pc_tree->vertical[0], best_remain_rdcost, 0);
3694
3695 if (this_rdc.rate == INT_MAX) {
3696 sum_rdc.rdcost = INT64_MAX;
3697 } else {
3698 sum_rdc.rate += this_rdc.rate;
3699 sum_rdc.dist += this_rdc.dist;
3700 sum_rdc.rdcost += this_rdc.rdcost;
3701 }
3702 vert_rd[0] = this_rdc.rdcost;
3703 if (sum_rdc.rdcost < best_rdc.rdcost && has_cols) {
3704 const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic;
3705 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3706 // Neither palette mode nor cfl predicted
3707 if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3708 if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1;
3709 }
3710 update_state(cpi, tile_data, td, &pc_tree->vertical[0], mi_row, mi_col,
3711 subsize, 1);
3712 encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
3713 subsize, NULL);
3714
3715 if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3716
3717 if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3718 partition_none_allowed) {
3719 pc_tree->vertical[1].pred_interp_filter =
3720 av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
3721 }
3722 pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
3723 PARTITION_VERT, subsize, &pc_tree->vertical[1],
3724 best_rdc.rdcost - sum_rdc.rdcost, 0);
3725 vert_rd[1] = this_rdc.rdcost;
3726
3727 if (this_rdc.rate == INT_MAX) {
3728 sum_rdc.rdcost = INT64_MAX;
3729 } else {
3730 sum_rdc.rate += this_rdc.rate;
3731 sum_rdc.dist += this_rdc.dist;
3732 sum_rdc.rdcost += this_rdc.rdcost;
3733 }
3734 }
3735 #if CONFIG_COLLECT_PARTITION_STATS
3736 if (partition_timer_on) {
3737 aom_usec_timer_mark(&partition_timer);
3738 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3739 partition_times[PARTITION_VERT] += time;
3740 partition_timer_on = 0;
3741 }
3742 #endif
3743
3744 if (sum_rdc.rdcost < best_rdc.rdcost) {
3745 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3746 if (sum_rdc.rdcost < best_rdc.rdcost) {
3747 best_rdc = sum_rdc;
3748 pc_tree->partitioning = PARTITION_VERT;
3749 }
3750 }
3751
3752 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3753 }
3754
3755 if (pb_source_variance == UINT_MAX) {
3756 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
3757 if (is_cur_buf_hbd(xd)) {
3758 pb_source_variance = av1_high_get_sby_perpixel_variance(
3759 cpi, &x->plane[0].src, bsize, xd->bd);
3760 } else {
3761 pb_source_variance =
3762 av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
3763 }
3764 }
3765
3766 if (use_pb_simple_motion_pred_sse(cpi) &&
3767 pb_simple_motion_pred_sse == UINT_MAX) {
3768 const MV ref_mv_full = { .row = 0, .col = 0 };
3769 unsigned int var = 0;
3770
3771 av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, ref_mv_full, 0,
3772 &pb_simple_motion_pred_sse, &var);
3773 }
3774
3775 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !do_rectangular_split));
3776
3777 const int ext_partition_allowed =
3778 do_rectangular_split && bsize > BLOCK_8X8 && partition_none_allowed;
3779
3780 // The standard AB partitions are allowed whenever ext-partition-types are
3781 // allowed
3782 int horzab_partition_allowed =
3783 ext_partition_allowed & cpi->oxcf.enable_ab_partitions;
3784 int vertab_partition_allowed =
3785 ext_partition_allowed & cpi->oxcf.enable_ab_partitions;
3786
3787 #if CONFIG_DIST_8X8
3788 if (x->using_dist_8x8) {
3789 if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) {
3790 horzab_partition_allowed = 0;
3791 vertab_partition_allowed = 0;
3792 }
3793 }
3794 #endif
3795
3796 if (cpi->sf.prune_ext_partition_types_search_level) {
3797 if (cpi->sf.prune_ext_partition_types_search_level == 1) {
3798 // TODO(debargha,huisu@google.com): may need to tune the threshold for
3799 // pb_source_variance.
3800 horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3801 (pc_tree->partitioning == PARTITION_NONE &&
3802 pb_source_variance < 32) ||
3803 pc_tree->partitioning == PARTITION_SPLIT);
3804 vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3805 (pc_tree->partitioning == PARTITION_NONE &&
3806 pb_source_variance < 32) ||
3807 pc_tree->partitioning == PARTITION_SPLIT);
3808 } else {
3809 horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3810 pc_tree->partitioning == PARTITION_SPLIT);
3811 vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3812 pc_tree->partitioning == PARTITION_SPLIT);
3813 }
3814 horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0);
3815 horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0);
3816 vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0);
3817 vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0);
3818 split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0);
3819 split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0);
3820 split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0);
3821 split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0);
3822 }
3823 int horza_partition_allowed = horzab_partition_allowed;
3824 int horzb_partition_allowed = horzab_partition_allowed;
3825 if (cpi->sf.prune_ext_partition_types_search_level) {
3826 const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1];
3827 const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3];
3828 switch (cpi->sf.prune_ext_partition_types_search_level) {
3829 case 1:
3830 horza_partition_allowed &= (horz_a_rd / 16 * 14 < best_rdc.rdcost);
3831 horzb_partition_allowed &= (horz_b_rd / 16 * 14 < best_rdc.rdcost);
3832 break;
3833 case 2:
3834 default:
3835 horza_partition_allowed &= (horz_a_rd / 16 * 15 < best_rdc.rdcost);
3836 horzb_partition_allowed &= (horz_b_rd / 16 * 15 < best_rdc.rdcost);
3837 break;
3838 }
3839 }
3840
3841 int verta_partition_allowed = vertab_partition_allowed;
3842 int vertb_partition_allowed = vertab_partition_allowed;
3843 if (cpi->sf.prune_ext_partition_types_search_level) {
3844 const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2];
3845 const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3];
3846 switch (cpi->sf.prune_ext_partition_types_search_level) {
3847 case 1:
3848 verta_partition_allowed &= (vert_a_rd / 16 * 14 < best_rdc.rdcost);
3849 vertb_partition_allowed &= (vert_b_rd / 16 * 14 < best_rdc.rdcost);
3850 break;
3851 case 2:
3852 default:
3853 verta_partition_allowed &= (vert_a_rd / 16 * 15 < best_rdc.rdcost);
3854 vertb_partition_allowed &= (vert_b_rd / 16 * 15 < best_rdc.rdcost);
3855 break;
3856 }
3857 }
3858
3859 if (cpi->sf.ml_prune_ab_partition && ext_partition_allowed &&
3860 partition_horz_allowed && partition_vert_allowed) {
3861 // TODO(huisu@google.com): x->source_variance may not be the current
3862 // block's variance. The correct one to use is pb_source_variance. Need to
3863 // re-train the model to fix it.
3864 ml_prune_ab_partition(bsize, pc_tree->partitioning,
3865 get_unsigned_bits(x->source_variance),
3866 best_rdc.rdcost, horz_rd, vert_rd, split_rd,
3867 &horza_partition_allowed, &horzb_partition_allowed,
3868 &verta_partition_allowed, &vertb_partition_allowed);
3869 }
3870
3871 horza_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3872 horzb_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3873 verta_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3874 vertb_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3875
3876 // PARTITION_HORZ_A
3877 if (!terminate_partition_search && partition_horz_allowed &&
3878 horza_partition_allowed && !is_gt_max_sq_part) {
3879 subsize = get_partition_subsize(bsize, PARTITION_HORZ_A);
3880 pc_tree->horizontala[0].rd_mode_is_ready = 0;
3881 pc_tree->horizontala[1].rd_mode_is_ready = 0;
3882 pc_tree->horizontala[2].rd_mode_is_ready = 0;
3883 if (split_ctx_is_ready[0]) {
3884 av1_copy_tree_context(&pc_tree->horizontala[0], &pc_tree->split[0]->none);
3885 pc_tree->horizontala[0].mic.partition = PARTITION_HORZ_A;
3886 pc_tree->horizontala[0].rd_mode_is_ready = 1;
3887 if (split_ctx_is_ready[1]) {
3888 av1_copy_tree_context(&pc_tree->horizontala[1],
3889 &pc_tree->split[1]->none);
3890 pc_tree->horizontala[1].mic.partition = PARTITION_HORZ_A;
3891 pc_tree->horizontala[1].rd_mode_is_ready = 1;
3892 }
3893 }
3894 #if CONFIG_COLLECT_PARTITION_STATS
3895 {
3896 RD_STATS tmp_sum_rdc;
3897 av1_init_rd_stats(&tmp_sum_rdc);
3898 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_A];
3899 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3900 if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3901 partition_attempts[PARTITION_HORZ_A] += 1;
3902 aom_usec_timer_start(&partition_timer);
3903 partition_timer_on = 1;
3904 }
3905 }
3906 #endif
3907 rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
3908 pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
3909 PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row,
3910 mi_col + mi_step, bsize2, mi_row + mi_step, mi_col,
3911 subsize);
3912 #if CONFIG_COLLECT_PARTITION_STATS
3913 if (partition_timer_on) {
3914 aom_usec_timer_mark(&partition_timer);
3915 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3916 partition_times[PARTITION_HORZ_A] += time;
3917 partition_timer_on = 0;
3918 }
3919 #endif
3920 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3921 }
3922 // PARTITION_HORZ_B
3923 if (!terminate_partition_search && partition_horz_allowed &&
3924 horzb_partition_allowed && !is_gt_max_sq_part) {
3925 subsize = get_partition_subsize(bsize, PARTITION_HORZ_B);
3926 pc_tree->horizontalb[0].rd_mode_is_ready = 0;
3927 pc_tree->horizontalb[1].rd_mode_is_ready = 0;
3928 pc_tree->horizontalb[2].rd_mode_is_ready = 0;
3929 if (horz_ctx_is_ready) {
3930 av1_copy_tree_context(&pc_tree->horizontalb[0], &pc_tree->horizontal[0]);
3931 pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B;
3932 pc_tree->horizontalb[0].rd_mode_is_ready = 1;
3933 }
3934 #if CONFIG_COLLECT_PARTITION_STATS
3935 {
3936 RD_STATS tmp_sum_rdc;
3937 av1_init_rd_stats(&tmp_sum_rdc);
3938 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_B];
3939 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3940 if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3941 partition_attempts[PARTITION_HORZ_B] += 1;
3942 aom_usec_timer_start(&partition_timer);
3943 partition_timer_on = 1;
3944 }
3945 }
3946 #endif
3947 rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
3948 pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
3949 PARTITION_HORZ_B, mi_row, mi_col, subsize,
3950 mi_row + mi_step, mi_col, bsize2, mi_row + mi_step,
3951 mi_col + mi_step, bsize2);
3952
3953 #if CONFIG_COLLECT_PARTITION_STATS
3954 if (partition_timer_on) {
3955 aom_usec_timer_mark(&partition_timer);
3956 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3957 partition_times[PARTITION_HORZ_B] += time;
3958 partition_timer_on = 0;
3959 }
3960 #endif
3961 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3962 }
3963
3964 // PARTITION_VERT_A
3965 if (!terminate_partition_search && partition_vert_allowed &&
3966 verta_partition_allowed && !is_gt_max_sq_part) {
3967 subsize = get_partition_subsize(bsize, PARTITION_VERT_A);
3968 pc_tree->verticala[0].rd_mode_is_ready = 0;
3969 pc_tree->verticala[1].rd_mode_is_ready = 0;
3970 pc_tree->verticala[2].rd_mode_is_ready = 0;
3971 if (split_ctx_is_ready[0]) {
3972 av1_copy_tree_context(&pc_tree->verticala[0], &pc_tree->split[0]->none);
3973 pc_tree->verticala[0].mic.partition = PARTITION_VERT_A;
3974 pc_tree->verticala[0].rd_mode_is_ready = 1;
3975 }
3976 #if CONFIG_COLLECT_PARTITION_STATS
3977 {
3978 RD_STATS tmp_sum_rdc;
3979 av1_init_rd_stats(&tmp_sum_rdc);
3980 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_A];
3981 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3982 if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3983 partition_attempts[PARTITION_VERT_A] += 1;
3984 aom_usec_timer_start(&partition_timer);
3985 partition_timer_on = 1;
3986 }
3987 }
3988 #endif
3989 rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
3990 pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
3991 PARTITION_VERT_A, mi_row, mi_col, bsize2,
3992 mi_row + mi_step, mi_col, bsize2, mi_row,
3993 mi_col + mi_step, subsize);
3994 #if CONFIG_COLLECT_PARTITION_STATS
3995 if (partition_timer_on) {
3996 aom_usec_timer_mark(&partition_timer);
3997 int64_t time = aom_usec_timer_elapsed(&partition_timer);
3998 partition_times[PARTITION_VERT_A] += time;
3999 partition_timer_on = 0;
4000 }
4001 #endif
4002 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4003 }
4004 // PARTITION_VERT_B
4005 if (!terminate_partition_search && partition_vert_allowed &&
4006 vertb_partition_allowed && !is_gt_max_sq_part) {
4007 subsize = get_partition_subsize(bsize, PARTITION_VERT_B);
4008 pc_tree->verticalb[0].rd_mode_is_ready = 0;
4009 pc_tree->verticalb[1].rd_mode_is_ready = 0;
4010 pc_tree->verticalb[2].rd_mode_is_ready = 0;
4011 if (vert_ctx_is_ready) {
4012 av1_copy_tree_context(&pc_tree->verticalb[0], &pc_tree->vertical[0]);
4013 pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B;
4014 pc_tree->verticalb[0].rd_mode_is_ready = 1;
4015 }
4016 #if CONFIG_COLLECT_PARTITION_STATS
4017 {
4018 RD_STATS tmp_sum_rdc;
4019 av1_init_rd_stats(&tmp_sum_rdc);
4020 tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_B];
4021 tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
4022 if (!frame_is_intra_only(cm) &&
4023 best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
4024 partition_attempts[PARTITION_VERT_B] += 1;
4025 aom_usec_timer_start(&partition_timer);
4026 partition_timer_on = 1;
4027 }
4028 }
4029 #endif
4030 rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
4031 pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
4032 PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row,
4033 mi_col + mi_step, bsize2, mi_row + mi_step,
4034 mi_col + mi_step, bsize2);
4035 #if CONFIG_COLLECT_PARTITION_STATS
4036 if (partition_timer_on) {
4037 aom_usec_timer_mark(&partition_timer);
4038 int64_t time = aom_usec_timer_elapsed(&partition_timer);
4039 partition_times[PARTITION_VERT_B] += time;
4040 partition_timer_on = 0;
4041 }
4042 #endif
4043 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4044 }
4045
4046 // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or
4047 // PARTITION_VERT_4 for this block. This is almost the same as
4048 // ext_partition_allowed, except that we don't allow 128x32 or 32x128
4049 // blocks, so we require that bsize is not BLOCK_128X128.
4050 const int partition4_allowed = cpi->oxcf.enable_1to4_partitions &&
4051 ext_partition_allowed &&
4052 bsize != BLOCK_128X128;
4053
4054 int partition_horz4_allowed = partition4_allowed && partition_horz_allowed;
4055 int partition_vert4_allowed = partition4_allowed && partition_vert_allowed;
4056 if (cpi->sf.prune_ext_partition_types_search_level == 2) {
4057 partition_horz4_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
4058 pc_tree->partitioning == PARTITION_HORZ_A ||
4059 pc_tree->partitioning == PARTITION_HORZ_B ||
4060 pc_tree->partitioning == PARTITION_SPLIT ||
4061 pc_tree->partitioning == PARTITION_NONE);
4062 partition_vert4_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
4063 pc_tree->partitioning == PARTITION_VERT_A ||
4064 pc_tree->partitioning == PARTITION_VERT_B ||
4065 pc_tree->partitioning == PARTITION_SPLIT ||
4066 pc_tree->partitioning == PARTITION_NONE);
4067 }
4068 if (cpi->sf.ml_prune_4_partition && partition4_allowed &&
4069 partition_horz_allowed && partition_vert_allowed) {
4070 ml_prune_4_partition(cpi, x, bsize, pc_tree->partitioning, best_rdc.rdcost,
4071 horz_rd, vert_rd, split_rd, &partition_horz4_allowed,
4072 &partition_vert4_allowed, pb_source_variance, mi_row,
4073 mi_col);
4074 }
4075
4076 #if CONFIG_DIST_8X8
4077 if (x->using_dist_8x8) {
4078 if (block_size_high[bsize] <= 16 || block_size_wide[bsize] <= 16) {
4079 partition_horz4_allowed = 0;
4080 partition_vert4_allowed = 0;
4081 }
4082 }
4083 #endif
4084
4085 if (blksize < (min_partition_size << 2)) {
4086 partition_horz4_allowed = 0;
4087 partition_vert4_allowed = 0;
4088 }
4089
4090 // PARTITION_HORZ_4
4091 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz4_allowed));
4092 if (!terminate_partition_search && partition_horz4_allowed && has_rows &&
4093 (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
4094 !is_gt_max_sq_part) {
4095 av1_init_rd_stats(&sum_rdc);
4096 const int quarter_step = mi_size_high[bsize] / 4;
4097 PICK_MODE_CONTEXT *ctx_prev = ctx_none;
4098
4099 subsize = get_partition_subsize(bsize, PARTITION_HORZ_4);
4100 sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
4101 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
4102
4103 #if CONFIG_COLLECT_PARTITION_STATS
4104 if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
4105 partition_attempts[PARTITION_HORZ_4] += 1;
4106 aom_usec_timer_start(&partition_timer);
4107 partition_timer_on = 1;
4108 }
4109 #endif
4110 for (int i = 0; i < 4; ++i) {
4111 const int this_mi_row = mi_row + i * quarter_step;
4112
4113 if (i > 0 && this_mi_row >= cm->mi_rows) break;
4114
4115 PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i];
4116
4117 ctx_this->rd_mode_is_ready = 0;
4118 if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row,
4119 mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
4120 PARTITION_HORZ_4, ctx_prev, ctx_this))
4121 break;
4122
4123 ctx_prev = ctx_this;
4124 }
4125
4126 if (sum_rdc.rdcost < best_rdc.rdcost) {
4127 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
4128 if (sum_rdc.rdcost < best_rdc.rdcost) {
4129 best_rdc = sum_rdc;
4130 pc_tree->partitioning = PARTITION_HORZ_4;
4131 }
4132 }
4133
4134 #if CONFIG_COLLECT_PARTITION_STATS
4135 if (partition_timer_on) {
4136 aom_usec_timer_mark(&partition_timer);
4137 int64_t time = aom_usec_timer_elapsed(&partition_timer);
4138 partition_times[PARTITION_HORZ_4] += time;
4139 partition_timer_on = 0;
4140 }
4141 #endif
4142 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4143 }
4144
4145 // PARTITION_VERT_4
4146 assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert4_allowed));
4147 if (!terminate_partition_search && partition_vert4_allowed && has_cols &&
4148 (do_rectangular_split || active_v_edge(cpi, mi_row, mi_step)) &&
4149 !is_gt_max_sq_part) {
4150 av1_init_rd_stats(&sum_rdc);
4151 const int quarter_step = mi_size_wide[bsize] / 4;
4152 PICK_MODE_CONTEXT *ctx_prev = ctx_none;
4153
4154 subsize = get_partition_subsize(bsize, PARTITION_VERT_4);
4155 sum_rdc.rate = partition_cost[PARTITION_VERT_4];
4156 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
4157
4158 #if CONFIG_COLLECT_PARTITION_STATS
4159 if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
4160 partition_attempts[PARTITION_VERT_4] += 1;
4161 aom_usec_timer_start(&partition_timer);
4162 partition_timer_on = 1;
4163 }
4164 #endif
4165 for (int i = 0; i < 4; ++i) {
4166 const int this_mi_col = mi_col + i * quarter_step;
4167
4168 if (i > 0 && this_mi_col >= cm->mi_cols) break;
4169
4170 PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i];
4171
4172 ctx_this->rd_mode_is_ready = 0;
4173 if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row,
4174 this_mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
4175 PARTITION_VERT_4, ctx_prev, ctx_this))
4176 break;
4177
4178 ctx_prev = ctx_this;
4179 }
4180
4181 if (sum_rdc.rdcost < best_rdc.rdcost) {
4182 sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
4183 if (sum_rdc.rdcost < best_rdc.rdcost) {
4184 best_rdc = sum_rdc;
4185 pc_tree->partitioning = PARTITION_VERT_4;
4186 }
4187 }
4188 #if CONFIG_COLLECT_PARTITION_STATS
4189 if (partition_timer_on) {
4190 aom_usec_timer_mark(&partition_timer);
4191 int64_t time = aom_usec_timer_elapsed(&partition_timer);
4192 partition_times[PARTITION_VERT_4] += time;
4193 partition_timer_on = 0;
4194 }
4195 #endif
4196 restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4197 }
4198
4199 if (bsize == cm->seq_params.sb_size && best_rdc.rate == INT_MAX) {
4200 // Did not find a valid partition, go back and search again, with less
4201 // constraint on which partition types to search.
4202 x->must_find_valid_partition = 1;
4203 #if CONFIG_COLLECT_PARTITION_STATS == 2
4204 part_stats->partition_redo += 1;
4205 #endif
4206 goto BEGIN_PARTITION_SEARCH;
4207 }
4208
4209 // TODO(jbb): This code added so that we avoid static analysis
4210 // warning related to the fact that best_rd isn't used after this
4211 // point. This code should be refactored so that the duplicate
4212 // checks occur in some sub function and thus are used...
4213 (void)best_rd;
4214 *rd_cost = best_rdc;
4215
4216 #if CONFIG_COLLECT_PARTITION_STATS
4217 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) {
4218 partition_decisions[pc_tree->partitioning] += 1;
4219 }
4220 #endif
4221
4222 #if CONFIG_COLLECT_PARTITION_STATS == 1
4223 // If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each
4224 // prediction block
4225 FILE *f = fopen("data.csv", "a");
4226 fprintf(f, "%d,%d,%d,", bsize, cm->show_frame, frame_is_intra_only(cm));
4227 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
4228 fprintf(f, "%d,", partition_decisions[idx]);
4229 }
4230 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
4231 fprintf(f, "%d,", partition_attempts[idx]);
4232 }
4233 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
4234 fprintf(f, "%ld,", partition_times[idx]);
4235 }
4236 fprintf(f, "\n");
4237 fclose(f);
4238 #endif
4239
4240 #if CONFIG_COLLECT_PARTITION_STATS == 2
4241 // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for
4242 // the whole clip. So we need to pass the information upstream to the encoder
4243 const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
4244 int *agg_attempts = part_stats->partition_attempts[bsize_idx];
4245 int *agg_decisions = part_stats->partition_decisions[bsize_idx];
4246 int64_t *agg_times = part_stats->partition_times[bsize_idx];
4247 for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
4248 agg_attempts[idx] += partition_attempts[idx];
4249 agg_decisions[idx] += partition_decisions[idx];
4250 agg_times[idx] += partition_times[idx];
4251 }
4252 #endif
4253
4254 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
4255 pc_tree->index != 3) {
4256 if (bsize == cm->seq_params.sb_size) {
4257 x->cb_offset = 0;
4258 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
4259 pc_tree, NULL);
4260 } else {
4261 encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
4262 pc_tree, NULL);
4263 }
4264 }
4265
4266 if (bsize == cm->seq_params.sb_size) {
4267 assert(best_rdc.rate < INT_MAX);
4268 assert(best_rdc.dist < INT64_MAX);
4269 } else {
4270 assert(tp_orig == *tp);
4271 }
4272 }
4273 #undef NUM_SIMPLE_MOTION_FEATURES
4274
4275 // Set all the counters as max.
init_first_partition_pass_stats_tables(AV1_COMP * cpi,FIRST_PARTITION_PASS_STATS * stats)4276 static void init_first_partition_pass_stats_tables(
4277 AV1_COMP *cpi, FIRST_PARTITION_PASS_STATS *stats) {
4278 for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
4279 memset(stats[i].ref0_counts, 0xff, sizeof(stats[i].ref0_counts));
4280 memset(stats[i].ref1_counts, 0xff, sizeof(stats[i].ref1_counts));
4281 stats[i].sample_counts = INT_MAX;
4282 if (cpi->sf.use_first_partition_pass_interintra_stats)
4283 memset(stats[i].interintra_motion_mode_count, 0xff,
4284 sizeof(stats[i].interintra_motion_mode_count));
4285 }
4286 }
4287
4288 // Minimum number of samples to trigger the mode pruning in
4289 // two_pass_partition_search feature.
4290 #define FIRST_PARTITION_PASS_MIN_SAMPLES 16
4291
get_rdmult_delta(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int orig_rdmult)4292 static int get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
4293 int mi_col, int orig_rdmult) {
4294 TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
4295 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
4296 int tpl_stride = tpl_frame->stride;
4297 int64_t intra_cost = 0;
4298 int64_t mc_dep_cost = 0;
4299 int mi_wide = mi_size_wide[bsize];
4300 int mi_high = mi_size_high[bsize];
4301 int row, col;
4302
4303 int dr = 0;
4304 double r0, rk, beta;
4305
4306 if (tpl_frame->is_valid == 0) return orig_rdmult;
4307
4308 if (cpi->common.show_frame) return orig_rdmult;
4309
4310 if (cpi->twopass.gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult;
4311
4312 for (row = mi_row; row < mi_row + mi_high; ++row) {
4313 for (col = mi_col; col < mi_col + mi_wide; ++col) {
4314 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
4315
4316 if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
4317
4318 intra_cost += this_stats->intra_cost;
4319 mc_dep_cost += this_stats->mc_dep_cost;
4320 }
4321 }
4322
4323 aom_clear_system_state();
4324
4325 r0 = cpi->rd.r0;
4326 rk = (double)intra_cost / mc_dep_cost;
4327 beta = r0 / rk;
4328 dr = av1_get_adaptive_rdmult(cpi, beta);
4329
4330 dr = AOMMIN(dr, orig_rdmult * 3 / 2);
4331 dr = AOMMAX(dr, orig_rdmult * 1 / 2);
4332
4333 dr = AOMMAX(1, dr);
4334
4335 return dr;
4336 }
4337
setup_delta_q(AV1_COMP * const cpi,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)4338 static void setup_delta_q(AV1_COMP *const cpi, MACROBLOCK *const x,
4339 const TileInfo *const tile_info, int mi_row,
4340 int mi_col, int num_planes) {
4341 AV1_COMMON *const cm = &cpi->common;
4342 MACROBLOCKD *const xd = &x->e_mbd;
4343 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
4344 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4345 const int mib_size = cm->seq_params.mib_size;
4346
4347 // Delta-q modulation based on variance
4348 av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
4349
4350 int offset_qindex;
4351 if (DELTAQ_MODULATION == 1) {
4352 const int block_wavelet_energy_level =
4353 av1_block_wavelet_energy_level(cpi, x, sb_size);
4354 x->sb_energy_level = block_wavelet_energy_level;
4355 offset_qindex =
4356 av1_compute_deltaq_from_energy_level(cpi, block_wavelet_energy_level);
4357 } else {
4358 const int block_var_level = av1_log_block_var(cpi, x, sb_size);
4359 x->sb_energy_level = block_var_level;
4360 offset_qindex = av1_compute_deltaq_from_energy_level(cpi, block_var_level);
4361 }
4362 const int qmask = ~(delta_q_info->delta_q_res - 1);
4363 int current_qindex =
4364 clamp(cm->base_qindex + offset_qindex, delta_q_info->delta_q_res,
4365 256 - delta_q_info->delta_q_res);
4366 current_qindex =
4367 ((current_qindex - cm->base_qindex + delta_q_info->delta_q_res / 2) &
4368 qmask) +
4369 cm->base_qindex;
4370 assert(current_qindex > 0);
4371
4372 xd->delta_qindex = current_qindex - cm->base_qindex;
4373 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4374 xd->mi[0]->current_qindex = current_qindex;
4375 av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
4376 if (cpi->oxcf.deltaq_mode == DELTA_Q_LF) {
4377 const int lfmask = ~(delta_q_info->delta_lf_res - 1);
4378 const int delta_lf_from_base =
4379 ((offset_qindex / 2 + delta_q_info->delta_lf_res / 2) & lfmask);
4380
4381 // pre-set the delta lf for loop filter. Note that this value is set
4382 // before mi is assigned for each block in current superblock
4383 for (int j = 0; j < AOMMIN(mib_size, cm->mi_rows - mi_row); j++) {
4384 for (int k = 0; k < AOMMIN(mib_size, cm->mi_cols - mi_col); k++) {
4385 cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)].delta_lf_from_base =
4386 clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
4387 const int frame_lf_count =
4388 av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
4389 for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
4390 cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)].delta_lf[lf_id] =
4391 clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
4392 }
4393 }
4394 }
4395 }
4396 }
4397
4398 // First pass of partition search only considers square partition block sizes.
4399 // The results will be used in the second partition search pass to prune
4400 // unlikely partition candidates.
first_partition_search_pass(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,int mi_col,TOKENEXTRA ** tp)4401 static void first_partition_search_pass(AV1_COMP *cpi, ThreadData *td,
4402 TileDataEnc *tile_data, int mi_row,
4403 int mi_col, TOKENEXTRA **tp) {
4404 MACROBLOCK *const x = &td->mb;
4405 x->cb_partition_scan = 1;
4406
4407 const SPEED_FEATURES *const sf = &cpi->sf;
4408 // Reset the stats tables.
4409 av1_zero(x->first_partition_pass_stats);
4410
4411 AV1_COMMON *const cm = &cpi->common;
4412 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4413 const int mib_size_log2 = cm->seq_params.mib_size_log2;
4414 PC_TREE *const pc_root = td->pc_root[mib_size_log2 - MIN_MIB_SIZE_LOG2];
4415 RD_STATS dummy_rdc;
4416 rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4417 &dummy_rdc, INT64_MAX, pc_root, NULL);
4418 x->cb_partition_scan = 0;
4419
4420 x->source_variance = UINT_MAX;
4421 x->simple_motion_pred_sse = UINT_MAX;
4422 if (sf->adaptive_pred_interp_filter) {
4423 const int leaf_nodes = 256;
4424 for (int i = 0; i < leaf_nodes; ++i) {
4425 td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
4426 td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
4427 td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
4428 td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
4429 }
4430 }
4431
4432 x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
4433 av1_zero(x->txb_rd_record_8X8);
4434 av1_zero(x->txb_rd_record_16X16);
4435 av1_zero(x->txb_rd_record_32X32);
4436 av1_zero(x->txb_rd_record_64X64);
4437 av1_zero(x->txb_rd_record_intra);
4438 av1_zero(x->pred_mv);
4439 pc_root->index = 0;
4440
4441 for (int idy = 0; idy < mi_size_high[sb_size]; ++idy) {
4442 for (int idx = 0; idx < mi_size_wide[sb_size]; ++idx) {
4443 const int offset = cm->mi_stride * (mi_row + idy) + (mi_col + idx);
4444 cm->mi_grid_visible[offset] = 0;
4445 }
4446 }
4447
4448 x->use_cb_search_range = 1;
4449
4450 for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
4451 FIRST_PARTITION_PASS_STATS *const stat = &x->first_partition_pass_stats[i];
4452 if (stat->sample_counts < FIRST_PARTITION_PASS_MIN_SAMPLES) {
4453 // If there are not enough samples collected, make all available.
4454 memset(stat->ref0_counts, 0xff, sizeof(stat->ref0_counts));
4455 memset(stat->ref1_counts, 0xff, sizeof(stat->ref1_counts));
4456 if (cpi->sf.use_first_partition_pass_interintra_stats)
4457 memset(stat->interintra_motion_mode_count, 0xff,
4458 sizeof(stat->interintra_motion_mode_count));
4459 } else if (sf->selective_ref_frame < 3) {
4460 // ALTREF2_FRAME and BWDREF_FRAME may be skipped during the
4461 // initial partition scan, so we don't eliminate them.
4462 stat->ref0_counts[ALTREF2_FRAME] = 0xff;
4463 stat->ref1_counts[ALTREF2_FRAME] = 0xff;
4464 stat->ref0_counts[BWDREF_FRAME] = 0xff;
4465 stat->ref1_counts[BWDREF_FRAME] = 0xff;
4466 if (cpi->sf.use_first_partition_pass_interintra_stats) {
4467 stat->interintra_motion_mode_count[ALTREF2_FRAME] = 0xff;
4468 stat->interintra_motion_mode_count[BWDREF_FRAME] = 0xff;
4469 }
4470 }
4471 }
4472 }
4473
4474 #define AVG_CDF_WEIGHT_LEFT 3
4475 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
4476
avg_cdf_symbol(aom_cdf_prob * cdf_ptr_left,aom_cdf_prob * cdf_ptr_tr,int num_cdfs,int cdf_stride,int nsymbs,int wt_left,int wt_tr)4477 static void avg_cdf_symbol(aom_cdf_prob *cdf_ptr_left, aom_cdf_prob *cdf_ptr_tr,
4478 int num_cdfs, int cdf_stride, int nsymbs,
4479 int wt_left, int wt_tr) {
4480 for (int i = 0; i < num_cdfs; i++) {
4481 for (int j = 0; j <= nsymbs; j++) {
4482 cdf_ptr_left[i * cdf_stride + j] =
4483 (aom_cdf_prob)(((int)cdf_ptr_left[i * cdf_stride + j] * wt_left +
4484 (int)cdf_ptr_tr[i * cdf_stride + j] * wt_tr +
4485 ((wt_left + wt_tr) / 2)) /
4486 (wt_left + wt_tr));
4487 assert(cdf_ptr_left[i * cdf_stride + j] >= 0 &&
4488 cdf_ptr_left[i * cdf_stride + j] < CDF_PROB_TOP);
4489 }
4490 }
4491 }
4492
4493 #define AVERAGE_CDF(cname_left, cname_tr, nsymbs) \
4494 AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, CDF_SIZE(nsymbs))
4495
4496 #define AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, cdf_stride) \
4497 do { \
4498 aom_cdf_prob *cdf_ptr_left = (aom_cdf_prob *)cname_left; \
4499 aom_cdf_prob *cdf_ptr_tr = (aom_cdf_prob *)cname_tr; \
4500 int array_size = (int)sizeof(cname_left) / sizeof(aom_cdf_prob); \
4501 int num_cdfs = array_size / cdf_stride; \
4502 avg_cdf_symbol(cdf_ptr_left, cdf_ptr_tr, num_cdfs, cdf_stride, nsymbs, \
4503 wt_left, wt_tr); \
4504 } while (0)
4505
avg_nmv(nmv_context * nmv_left,nmv_context * nmv_tr,int wt_left,int wt_tr)4506 static void avg_nmv(nmv_context *nmv_left, nmv_context *nmv_tr, int wt_left,
4507 int wt_tr) {
4508 AVERAGE_CDF(nmv_left->joints_cdf, nmv_tr->joints_cdf, 4);
4509 for (int i = 0; i < 2; i++) {
4510 AVERAGE_CDF(nmv_left->comps[i].classes_cdf, nmv_tr->comps[i].classes_cdf,
4511 MV_CLASSES);
4512 AVERAGE_CDF(nmv_left->comps[i].class0_fp_cdf,
4513 nmv_tr->comps[i].class0_fp_cdf, MV_FP_SIZE);
4514 AVERAGE_CDF(nmv_left->comps[i].fp_cdf, nmv_tr->comps[i].fp_cdf, MV_FP_SIZE);
4515 AVERAGE_CDF(nmv_left->comps[i].sign_cdf, nmv_tr->comps[i].sign_cdf, 2);
4516 AVERAGE_CDF(nmv_left->comps[i].class0_hp_cdf,
4517 nmv_tr->comps[i].class0_hp_cdf, 2);
4518 AVERAGE_CDF(nmv_left->comps[i].hp_cdf, nmv_tr->comps[i].hp_cdf, 2);
4519 AVERAGE_CDF(nmv_left->comps[i].class0_cdf, nmv_tr->comps[i].class0_cdf,
4520 CLASS0_SIZE);
4521 AVERAGE_CDF(nmv_left->comps[i].bits_cdf, nmv_tr->comps[i].bits_cdf, 2);
4522 }
4523 }
4524
4525 // In case of row-based multi-threading of encoder, since we always
4526 // keep a top - right sync, we can average the top - right SB's CDFs and
4527 // the left SB's CDFs and use the same for current SB's encoding to
4528 // improve the performance. This function facilitates the averaging
4529 // of CDF and used only when row-mt is enabled in encoder.
avg_cdf_symbols(FRAME_CONTEXT * ctx_left,FRAME_CONTEXT * ctx_tr,int wt_left,int wt_tr)4530 static void avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr,
4531 int wt_left, int wt_tr) {
4532 AVERAGE_CDF(ctx_left->txb_skip_cdf, ctx_tr->txb_skip_cdf, 2);
4533 AVERAGE_CDF(ctx_left->eob_extra_cdf, ctx_tr->eob_extra_cdf, 2);
4534 AVERAGE_CDF(ctx_left->dc_sign_cdf, ctx_tr->dc_sign_cdf, 2);
4535 AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, 5);
4536 AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, 6);
4537 AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, 7);
4538 AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, 8);
4539 AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, 9);
4540 AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, 10);
4541 AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, 11);
4542 AVERAGE_CDF(ctx_left->coeff_base_eob_cdf, ctx_tr->coeff_base_eob_cdf, 3);
4543 AVERAGE_CDF(ctx_left->coeff_base_cdf, ctx_tr->coeff_base_cdf, 4);
4544 AVERAGE_CDF(ctx_left->coeff_br_cdf, ctx_tr->coeff_br_cdf, BR_CDF_SIZE);
4545 AVERAGE_CDF(ctx_left->newmv_cdf, ctx_tr->newmv_cdf, 2);
4546 AVERAGE_CDF(ctx_left->zeromv_cdf, ctx_tr->zeromv_cdf, 2);
4547 AVERAGE_CDF(ctx_left->refmv_cdf, ctx_tr->refmv_cdf, 2);
4548 AVERAGE_CDF(ctx_left->drl_cdf, ctx_tr->drl_cdf, 2);
4549 AVERAGE_CDF(ctx_left->inter_compound_mode_cdf,
4550 ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_MODES);
4551 AVERAGE_CDF(ctx_left->compound_type_cdf, ctx_tr->compound_type_cdf,
4552 MASKED_COMPOUND_TYPES);
4553 AVERAGE_CDF(ctx_left->wedge_idx_cdf, ctx_tr->wedge_idx_cdf, 16);
4554 AVERAGE_CDF(ctx_left->interintra_cdf, ctx_tr->interintra_cdf, 2);
4555 AVERAGE_CDF(ctx_left->wedge_interintra_cdf, ctx_tr->wedge_interintra_cdf, 2);
4556 AVERAGE_CDF(ctx_left->interintra_mode_cdf, ctx_tr->interintra_mode_cdf,
4557 INTERINTRA_MODES);
4558 AVERAGE_CDF(ctx_left->motion_mode_cdf, ctx_tr->motion_mode_cdf, MOTION_MODES);
4559 AVERAGE_CDF(ctx_left->obmc_cdf, ctx_tr->obmc_cdf, 2);
4560 AVERAGE_CDF(ctx_left->palette_y_size_cdf, ctx_tr->palette_y_size_cdf,
4561 PALETTE_SIZES);
4562 AVERAGE_CDF(ctx_left->palette_uv_size_cdf, ctx_tr->palette_uv_size_cdf,
4563 PALETTE_SIZES);
4564 for (int j = 0; j < PALETTE_SIZES; j++) {
4565 int nsymbs = j + PALETTE_MIN_SIZE;
4566 AVG_CDF_STRIDE(ctx_left->palette_y_color_index_cdf[j],
4567 ctx_tr->palette_y_color_index_cdf[j], nsymbs,
4568 CDF_SIZE(PALETTE_COLORS));
4569 AVG_CDF_STRIDE(ctx_left->palette_uv_color_index_cdf[j],
4570 ctx_tr->palette_uv_color_index_cdf[j], nsymbs,
4571 CDF_SIZE(PALETTE_COLORS));
4572 }
4573 AVERAGE_CDF(ctx_left->palette_y_mode_cdf, ctx_tr->palette_y_mode_cdf, 2);
4574 AVERAGE_CDF(ctx_left->palette_uv_mode_cdf, ctx_tr->palette_uv_mode_cdf, 2);
4575 AVERAGE_CDF(ctx_left->comp_inter_cdf, ctx_tr->comp_inter_cdf, 2);
4576 AVERAGE_CDF(ctx_left->single_ref_cdf, ctx_tr->single_ref_cdf, 2);
4577 AVERAGE_CDF(ctx_left->comp_ref_type_cdf, ctx_tr->comp_ref_type_cdf, 2);
4578 AVERAGE_CDF(ctx_left->uni_comp_ref_cdf, ctx_tr->uni_comp_ref_cdf, 2);
4579 AVERAGE_CDF(ctx_left->comp_ref_cdf, ctx_tr->comp_ref_cdf, 2);
4580 AVERAGE_CDF(ctx_left->comp_bwdref_cdf, ctx_tr->comp_bwdref_cdf, 2);
4581 AVERAGE_CDF(ctx_left->txfm_partition_cdf, ctx_tr->txfm_partition_cdf, 2);
4582 AVERAGE_CDF(ctx_left->compound_index_cdf, ctx_tr->compound_index_cdf, 2);
4583 AVERAGE_CDF(ctx_left->comp_group_idx_cdf, ctx_tr->comp_group_idx_cdf, 2);
4584 AVERAGE_CDF(ctx_left->skip_mode_cdfs, ctx_tr->skip_mode_cdfs, 2);
4585 AVERAGE_CDF(ctx_left->skip_cdfs, ctx_tr->skip_cdfs, 2);
4586 AVERAGE_CDF(ctx_left->intra_inter_cdf, ctx_tr->intra_inter_cdf, 2);
4587 avg_nmv(&ctx_left->nmvc, &ctx_tr->nmvc, wt_left, wt_tr);
4588 avg_nmv(&ctx_left->ndvc, &ctx_tr->ndvc, wt_left, wt_tr);
4589 AVERAGE_CDF(ctx_left->intrabc_cdf, ctx_tr->intrabc_cdf, 2);
4590 AVERAGE_CDF(ctx_left->seg.tree_cdf, ctx_tr->seg.tree_cdf, MAX_SEGMENTS);
4591 AVERAGE_CDF(ctx_left->seg.pred_cdf, ctx_tr->seg.pred_cdf, 2);
4592 AVERAGE_CDF(ctx_left->seg.spatial_pred_seg_cdf,
4593 ctx_tr->seg.spatial_pred_seg_cdf, MAX_SEGMENTS);
4594 AVERAGE_CDF(ctx_left->filter_intra_cdfs, ctx_tr->filter_intra_cdfs, 2);
4595 AVERAGE_CDF(ctx_left->filter_intra_mode_cdf, ctx_tr->filter_intra_mode_cdf,
4596 FILTER_INTRA_MODES);
4597 AVERAGE_CDF(ctx_left->switchable_restore_cdf, ctx_tr->switchable_restore_cdf,
4598 RESTORE_SWITCHABLE_TYPES);
4599 AVERAGE_CDF(ctx_left->wiener_restore_cdf, ctx_tr->wiener_restore_cdf, 2);
4600 AVERAGE_CDF(ctx_left->sgrproj_restore_cdf, ctx_tr->sgrproj_restore_cdf, 2);
4601 AVERAGE_CDF(ctx_left->y_mode_cdf, ctx_tr->y_mode_cdf, INTRA_MODES);
4602 AVG_CDF_STRIDE(ctx_left->uv_mode_cdf[0], ctx_tr->uv_mode_cdf[0],
4603 UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES));
4604 AVERAGE_CDF(ctx_left->uv_mode_cdf[1], ctx_tr->uv_mode_cdf[1], UV_INTRA_MODES);
4605 for (int i = 0; i < PARTITION_CONTEXTS; i++) {
4606 if (i < 4) {
4607 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 4,
4608 CDF_SIZE(10));
4609 } else if (i < 16) {
4610 AVERAGE_CDF(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 10);
4611 } else {
4612 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 8,
4613 CDF_SIZE(10));
4614 }
4615 }
4616 AVERAGE_CDF(ctx_left->switchable_interp_cdf, ctx_tr->switchable_interp_cdf,
4617 SWITCHABLE_FILTERS);
4618 AVERAGE_CDF(ctx_left->kf_y_cdf, ctx_tr->kf_y_cdf, INTRA_MODES);
4619 AVERAGE_CDF(ctx_left->angle_delta_cdf, ctx_tr->angle_delta_cdf,
4620 2 * MAX_ANGLE_DELTA + 1);
4621 AVG_CDF_STRIDE(ctx_left->tx_size_cdf[0], ctx_tr->tx_size_cdf[0], MAX_TX_DEPTH,
4622 CDF_SIZE(MAX_TX_DEPTH + 1));
4623 AVERAGE_CDF(ctx_left->tx_size_cdf[1], ctx_tr->tx_size_cdf[1],
4624 MAX_TX_DEPTH + 1);
4625 AVERAGE_CDF(ctx_left->tx_size_cdf[2], ctx_tr->tx_size_cdf[2],
4626 MAX_TX_DEPTH + 1);
4627 AVERAGE_CDF(ctx_left->tx_size_cdf[3], ctx_tr->tx_size_cdf[3],
4628 MAX_TX_DEPTH + 1);
4629 AVERAGE_CDF(ctx_left->delta_q_cdf, ctx_tr->delta_q_cdf, DELTA_Q_PROBS + 1);
4630 AVERAGE_CDF(ctx_left->delta_lf_cdf, ctx_tr->delta_lf_cdf, DELTA_LF_PROBS + 1);
4631 for (int i = 0; i < FRAME_LF_COUNT; i++) {
4632 AVERAGE_CDF(ctx_left->delta_lf_multi_cdf[i], ctx_tr->delta_lf_multi_cdf[i],
4633 DELTA_LF_PROBS + 1);
4634 }
4635 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[1], ctx_tr->intra_ext_tx_cdf[1], 7,
4636 CDF_SIZE(TX_TYPES));
4637 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[2], ctx_tr->intra_ext_tx_cdf[2], 5,
4638 CDF_SIZE(TX_TYPES));
4639 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[1], ctx_tr->inter_ext_tx_cdf[1], 16,
4640 CDF_SIZE(TX_TYPES));
4641 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[2], ctx_tr->inter_ext_tx_cdf[2], 12,
4642 CDF_SIZE(TX_TYPES));
4643 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[3], ctx_tr->inter_ext_tx_cdf[3], 2,
4644 CDF_SIZE(TX_TYPES));
4645 AVERAGE_CDF(ctx_left->cfl_sign_cdf, ctx_tr->cfl_sign_cdf, CFL_JOINT_SIGNS);
4646 AVERAGE_CDF(ctx_left->cfl_alpha_cdf, ctx_tr->cfl_alpha_cdf,
4647 CFL_ALPHABET_SIZE);
4648 }
4649
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp,int use_nonrd_mode)4650 static void encode_sb_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
4651 int mi_row, TOKENEXTRA **tp, int use_nonrd_mode) {
4652 AV1_COMMON *const cm = &cpi->common;
4653 const int num_planes = av1_num_planes(cm);
4654 const TileInfo *const tile_info = &tile_data->tile_info;
4655 MACROBLOCK *const x = &td->mb;
4656 MACROBLOCKD *const xd = &x->e_mbd;
4657 const SPEED_FEATURES *const sf = &cpi->sf;
4658 const int leaf_nodes = 256;
4659 const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_data->tile_info);
4660 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4661 const int mib_size = cm->seq_params.mib_size;
4662 const int mib_size_log2 = cm->seq_params.mib_size_log2;
4663 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
4664
4665 #if CONFIG_COLLECT_COMPONENT_TIMING
4666 start_timing(cpi, encode_sb_time);
4667 #endif
4668
4669 // Initialize the left context for the new SB row
4670 av1_zero_left_context(xd);
4671
4672 // Reset delta for every tile
4673 if (mi_row == tile_info->mi_row_start) {
4674 if (cm->delta_q_info.delta_q_present_flag)
4675 xd->current_qindex = cm->base_qindex;
4676 if (cm->delta_q_info.delta_lf_present_flag) {
4677 av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
4678 }
4679 }
4680
4681 // Code each SB in the row
4682 for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
4683 mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
4684 (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
4685 sb_col_in_tile);
4686 if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
4687 (tile_info->mi_row_start != mi_row)) {
4688 if ((tile_info->mi_col_start == mi_col)) {
4689 // restore frame context of 1st column sb
4690 memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
4691 } else {
4692 int wt_left = AVG_CDF_WEIGHT_LEFT;
4693 int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
4694 if (tile_info->mi_col_end > (mi_col + mib_size))
4695 avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile, wt_left,
4696 wt_tr);
4697 else
4698 avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
4699 wt_left, wt_tr);
4700 }
4701 }
4702
4703 switch (cpi->oxcf.coeff_cost_upd_freq) {
4704 case COST_UPD_TILE: // Tile level
4705 if (mi_row != tile_info->mi_row_start) break;
4706 AOM_FALLTHROUGH_INTENDED;
4707 case COST_UPD_SBROW: // SB row level in tile
4708 if (mi_col != tile_info->mi_col_start) break;
4709 AOM_FALLTHROUGH_INTENDED;
4710 case COST_UPD_SB: // SB level
4711 av1_fill_coeff_costs(&td->mb, xd->tile_ctx, num_planes);
4712 break;
4713 default: assert(0);
4714 }
4715
4716 switch (cpi->oxcf.mode_cost_upd_freq) {
4717 case COST_UPD_TILE: // Tile level
4718 if (mi_row != tile_info->mi_row_start) break;
4719 AOM_FALLTHROUGH_INTENDED;
4720 case COST_UPD_SBROW: // SB row level in tile
4721 if (mi_col != tile_info->mi_col_start) break;
4722 AOM_FALLTHROUGH_INTENDED;
4723 case COST_UPD_SB: // SB level
4724 av1_fill_mode_rates(cm, x, xd->tile_ctx);
4725 break;
4726 default: assert(0);
4727 }
4728
4729 if (sf->adaptive_pred_interp_filter) {
4730 for (int i = 0; i < leaf_nodes; ++i) {
4731 td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
4732 td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
4733 td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
4734 td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
4735 }
4736 }
4737
4738 x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
4739
4740 if (!use_nonrd_mode) {
4741 av1_zero(x->txb_rd_record_8X8);
4742 av1_zero(x->txb_rd_record_16X16);
4743 av1_zero(x->txb_rd_record_32X32);
4744 av1_zero(x->txb_rd_record_64X64);
4745 av1_zero(x->txb_rd_record_intra);
4746 }
4747
4748 av1_zero(x->picked_ref_frames_mask);
4749
4750 av1_zero(x->pred_mv);
4751 PC_TREE *const pc_root = td->pc_root[mib_size_log2 - MIN_MIB_SIZE_LOG2];
4752 pc_root->index = 0;
4753
4754 if ((sf->simple_motion_search_prune_rect ||
4755 sf->simple_motion_search_early_term_none ||
4756 sf->firstpass_simple_motion_search_early_term) &&
4757 !frame_is_intra_only(cm)) {
4758 init_simple_motion_search_mvs(pc_root);
4759 }
4760
4761 const struct segmentation *const seg = &cm->seg;
4762 int seg_skip = 0;
4763 if (seg->enabled) {
4764 const uint8_t *const map =
4765 seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
4766 const int segment_id =
4767 map ? get_segment_id(cm, map, sb_size, mi_row, mi_col) : 0;
4768 seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
4769 }
4770 xd->cur_frame_force_integer_mv = cm->cur_frame_force_integer_mv;
4771
4772 x->sb_energy_level = 0;
4773 if (cm->delta_q_info.delta_q_present_flag)
4774 setup_delta_q(cpi, x, tile_info, mi_row, mi_col, num_planes);
4775
4776 int dummy_rate;
4777 int64_t dummy_dist;
4778 RD_STATS dummy_rdc;
4779 const int idx_str = cm->mi_stride * mi_row + mi_col;
4780 MB_MODE_INFO **mi = cm->mi_grid_visible + idx_str;
4781 x->source_variance = UINT_MAX;
4782 x->simple_motion_pred_sse = UINT_MAX;
4783 if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
4784 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4785 const BLOCK_SIZE bsize = seg_skip ? sb_size : sf->always_this_block_size;
4786 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4787 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4788 &dummy_rate, &dummy_dist, 1, pc_root);
4789 } else if (cpi->partition_search_skippable_frame) {
4790 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4791 const BLOCK_SIZE bsize =
4792 get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
4793 set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4794 rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4795 &dummy_rate, &dummy_dist, 1, pc_root);
4796 } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
4797 use_nonrd_mode) {
4798 set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4799 av1_choose_var_based_partitioning(cpi, tile_info, x, mi_row, mi_col);
4800 nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4801 &dummy_rate, &dummy_dist, 1, pc_root);
4802
4803 } else {
4804 const int orig_rdmult = cpi->rd.RDMULT;
4805 x->cb_rdmult = orig_rdmult;
4806 if (cpi->twopass.gf_group.index > 0 && cpi->oxcf.enable_tpl_model &&
4807 cpi->oxcf.aq_mode == NO_AQ && cpi->oxcf.deltaq_mode == 0) {
4808 const int dr =
4809 get_rdmult_delta(cpi, BLOCK_128X128, mi_row, mi_col, orig_rdmult);
4810
4811 x->cb_rdmult = dr;
4812 x->rdmult = x->cb_rdmult;
4813 }
4814
4815 reset_partition(pc_root, sb_size);
4816 x->use_cb_search_range = 0;
4817 #if CONFIG_COLLECT_COMPONENT_TIMING
4818 start_timing(cpi, first_partition_search_pass_time);
4819 #endif
4820 init_first_partition_pass_stats_tables(cpi,
4821 x->first_partition_pass_stats);
4822 // Do the first pass if we need two pass partition search
4823 if (cpi->two_pass_partition_search &&
4824 cpi->sf.use_square_partition_only_threshold > BLOCK_4X4 &&
4825 mi_row + mi_size_high[sb_size] <= cm->mi_rows &&
4826 mi_col + mi_size_wide[sb_size] <= cm->mi_cols &&
4827 cm->current_frame.frame_type != KEY_FRAME) {
4828 first_partition_search_pass(cpi, td, tile_data, mi_row, mi_col, tp);
4829 }
4830 #if CONFIG_COLLECT_COMPONENT_TIMING
4831 end_timing(cpi, first_partition_search_pass_time);
4832 #endif
4833
4834 #if CONFIG_COLLECT_COMPONENT_TIMING
4835 start_timing(cpi, rd_pick_partition_time);
4836 #endif
4837 BLOCK_SIZE max_sq_size = BLOCK_128X128;
4838 switch (cpi->oxcf.max_partition_size) {
4839 case 4: max_sq_size = BLOCK_4X4; break;
4840 case 8: max_sq_size = BLOCK_8X8; break;
4841 case 16: max_sq_size = BLOCK_16X16; break;
4842 case 32: max_sq_size = BLOCK_32X32; break;
4843 case 64: max_sq_size = BLOCK_64X64; break;
4844 case 128: max_sq_size = BLOCK_128X128; break;
4845 default: assert(0); break;
4846 }
4847 max_sq_size = AOMMIN(max_sq_size, sb_size);
4848
4849 BLOCK_SIZE min_sq_size = BLOCK_4X4;
4850 switch (cpi->oxcf.min_partition_size) {
4851 case 4: min_sq_size = BLOCK_4X4; break;
4852 case 8: min_sq_size = BLOCK_8X8; break;
4853 case 16: min_sq_size = BLOCK_16X16; break;
4854 case 32: min_sq_size = BLOCK_32X32; break;
4855 case 64: min_sq_size = BLOCK_64X64; break;
4856 case 128: min_sq_size = BLOCK_128X128; break;
4857 default: assert(0); break;
4858 }
4859
4860 if (use_auto_max_partition(cpi, sb_size, mi_row, mi_col)) {
4861 float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f };
4862
4863 av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features);
4864 max_sq_size =
4865 AOMMIN(av1_predict_max_partition(cpi, x, features), max_sq_size);
4866 }
4867
4868 min_sq_size = AOMMIN(min_sq_size, max_sq_size);
4869
4870 rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4871 max_sq_size, min_sq_size, &dummy_rdc, INT64_MAX,
4872 pc_root, NULL);
4873 #if CONFIG_COLLECT_COMPONENT_TIMING
4874 end_timing(cpi, rd_pick_partition_time);
4875 #endif
4876 }
4877 // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
4878 if (cpi->sf.inter_mode_rd_model_estimation == 1 && cm->tile_cols == 1 &&
4879 cm->tile_rows == 1) {
4880 av1_inter_mode_data_fit(tile_data, x->rdmult);
4881 }
4882 if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
4883 (tile_info->mi_row_end > (mi_row + mib_size))) {
4884 if (sb_cols_in_tile == 1)
4885 memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
4886 else if (sb_col_in_tile >= 1)
4887 memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
4888 sizeof(*xd->tile_ctx));
4889 }
4890 (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
4891 sb_col_in_tile, sb_cols_in_tile);
4892 }
4893 #if CONFIG_COLLECT_COMPONENT_TIMING
4894 end_timing(cpi, encode_sb_time);
4895 #endif
4896 }
4897
init_encode_frame_mb_context(AV1_COMP * cpi)4898 static void init_encode_frame_mb_context(AV1_COMP *cpi) {
4899 AV1_COMMON *const cm = &cpi->common;
4900 const int num_planes = av1_num_planes(cm);
4901 MACROBLOCK *const x = &cpi->td.mb;
4902 MACROBLOCKD *const xd = &x->e_mbd;
4903
4904 // Copy data over into macro block data structures.
4905 av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
4906 cm->seq_params.sb_size);
4907
4908 av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
4909 cm->seq_params.subsampling_y, num_planes);
4910 }
4911
get_frame_type(const AV1_COMP * cpi)4912 static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) {
4913 if (frame_is_intra_only(&cpi->common)) {
4914 return INTRA_FRAME;
4915 } else if ((cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) ||
4916 cpi->rc.is_src_frame_internal_arf) {
4917 // We will not update the golden frame with an internal overlay frame
4918 return ALTREF_FRAME;
4919 } else if (cpi->refresh_golden_frame || cpi->refresh_alt2_ref_frame ||
4920 cpi->refresh_alt_ref_frame) {
4921 return GOLDEN_FRAME;
4922 } else {
4923 return LAST_FRAME;
4924 }
4925 }
4926
select_tx_mode(const AV1_COMP * cpi)4927 static TX_MODE select_tx_mode(const AV1_COMP *cpi) {
4928 if (cpi->common.coded_lossless) return ONLY_4X4;
4929 if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
4930 return TX_MODE_LARGEST;
4931 else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
4932 cpi->sf.tx_size_search_method == USE_FAST_RD)
4933 return TX_MODE_SELECT;
4934 else
4935 return cpi->common.tx_mode;
4936 }
4937
av1_alloc_tile_data(AV1_COMP * cpi)4938 void av1_alloc_tile_data(AV1_COMP *cpi) {
4939 AV1_COMMON *const cm = &cpi->common;
4940 const int tile_cols = cm->tile_cols;
4941 const int tile_rows = cm->tile_rows;
4942 int tile_col, tile_row;
4943
4944 if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
4945 CHECK_MEM_ERROR(
4946 cm, cpi->tile_data,
4947 aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
4948 cpi->allocated_tiles = tile_cols * tile_rows;
4949
4950 for (tile_row = 0; tile_row < tile_rows; ++tile_row)
4951 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
4952 TileDataEnc *const tile_data =
4953 &cpi->tile_data[tile_row * tile_cols + tile_col];
4954 int i, j;
4955 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
4956 for (j = 0; j < MAX_MODES; ++j) {
4957 tile_data->thresh_freq_fact[i][j] = 32;
4958 }
4959 }
4960 }
4961 }
4962
av1_init_tile_data(AV1_COMP * cpi)4963 void av1_init_tile_data(AV1_COMP *cpi) {
4964 AV1_COMMON *const cm = &cpi->common;
4965 const int num_planes = av1_num_planes(cm);
4966 const int tile_cols = cm->tile_cols;
4967 const int tile_rows = cm->tile_rows;
4968 int tile_col, tile_row;
4969 TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
4970 TOKENLIST *tplist = cpi->tplist[0][0];
4971 unsigned int tile_tok = 0;
4972 int tplist_count = 0;
4973
4974 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
4975 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
4976 TileDataEnc *const tile_data =
4977 &cpi->tile_data[tile_row * tile_cols + tile_col];
4978 TileInfo *const tile_info = &tile_data->tile_info;
4979 av1_tile_init(tile_info, cm, tile_row, tile_col);
4980
4981 cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
4982 pre_tok = cpi->tile_tok[tile_row][tile_col];
4983 tile_tok = allocated_tokens(
4984 *tile_info, cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
4985 cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
4986 tplist = cpi->tplist[tile_row][tile_col];
4987 tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
4988 tile_data->allow_update_cdf = !cm->large_scale_tile;
4989 tile_data->allow_update_cdf =
4990 tile_data->allow_update_cdf && !cm->disable_cdf_update;
4991 tile_data->tctx = *cm->fc;
4992 }
4993 }
4994 }
4995
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)4996 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
4997 int tile_col, int mi_row) {
4998 AV1_COMMON *const cm = &cpi->common;
4999 const int num_planes = av1_num_planes(cm);
5000 const int tile_cols = cm->tile_cols;
5001 TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
5002 const TileInfo *const tile_info = &this_tile->tile_info;
5003 TOKENEXTRA *tok = NULL;
5004 const int sb_row_in_tile =
5005 (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2;
5006 const int tile_mb_cols =
5007 (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
5008 const int num_mb_rows_in_sb =
5009 ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
5010
5011 get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
5012 cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
5013 cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok;
5014
5015 encode_sb_row(cpi, td, this_tile, mi_row, &tok, cpi->sf.use_nonrd_pick_mode);
5016
5017 cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok;
5018 cpi->tplist[tile_row][tile_col][sb_row_in_tile].count =
5019 (unsigned int)(cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop -
5020 cpi->tplist[tile_row][tile_col][sb_row_in_tile].start);
5021
5022 assert(
5023 (unsigned int)(tok -
5024 cpi->tplist[tile_row][tile_col][sb_row_in_tile].start) <=
5025 get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
5026 cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes));
5027
5028 (void)tile_mb_cols;
5029 (void)num_mb_rows_in_sb;
5030 }
5031
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)5032 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
5033 int tile_col) {
5034 AV1_COMMON *const cm = &cpi->common;
5035 TileDataEnc *const this_tile =
5036 &cpi->tile_data[tile_row * cm->tile_cols + tile_col];
5037 const TileInfo *const tile_info = &this_tile->tile_info;
5038 int mi_row;
5039
5040 av1_inter_mode_data_init(this_tile);
5041
5042 av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
5043 tile_info->mi_col_end, tile_row);
5044 av1_init_above_context(cm, &td->mb.e_mbd, tile_row);
5045
5046 // Set up pointers to per thread motion search counters.
5047 this_tile->m_search_count = 0; // Count of motion search hits.
5048 this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
5049 td->mb.m_search_count_ptr = &this_tile->m_search_count;
5050 td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
5051
5052 cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params);
5053
5054 av1_crc32c_calculator_init(&td->mb.mb_rd_record.crc_calculator);
5055
5056 for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
5057 mi_row += cm->seq_params.mib_size) {
5058 av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
5059 }
5060 }
5061
encode_tiles(AV1_COMP * cpi)5062 static void encode_tiles(AV1_COMP *cpi) {
5063 AV1_COMMON *const cm = &cpi->common;
5064 const int tile_cols = cm->tile_cols;
5065 const int tile_rows = cm->tile_rows;
5066 int tile_col, tile_row;
5067
5068 if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
5069 av1_alloc_tile_data(cpi);
5070
5071 av1_init_tile_data(cpi);
5072
5073 for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
5074 for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
5075 TileDataEnc *const this_tile =
5076 &cpi->tile_data[tile_row * cm->tile_cols + tile_col];
5077 cpi->td.intrabc_used = 0;
5078 cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
5079 cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
5080 av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
5081 cpi->intrabc_used |= cpi->td.intrabc_used;
5082 }
5083 }
5084 }
5085
5086 #define GLOBAL_TRANS_TYPES_ENC 3 // highest motion model to search
gm_get_params_cost(const WarpedMotionParams * gm,const WarpedMotionParams * ref_gm,int allow_hp)5087 static int gm_get_params_cost(const WarpedMotionParams *gm,
5088 const WarpedMotionParams *ref_gm, int allow_hp) {
5089 int params_cost = 0;
5090 int trans_bits, trans_prec_diff;
5091 switch (gm->wmtype) {
5092 case AFFINE:
5093 case ROTZOOM:
5094 params_cost += aom_count_signed_primitive_refsubexpfin(
5095 GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5096 (ref_gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS),
5097 (gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
5098 params_cost += aom_count_signed_primitive_refsubexpfin(
5099 GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5100 (ref_gm->wmmat[3] >> GM_ALPHA_PREC_DIFF),
5101 (gm->wmmat[3] >> GM_ALPHA_PREC_DIFF));
5102 if (gm->wmtype >= AFFINE) {
5103 params_cost += aom_count_signed_primitive_refsubexpfin(
5104 GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5105 (ref_gm->wmmat[4] >> GM_ALPHA_PREC_DIFF),
5106 (gm->wmmat[4] >> GM_ALPHA_PREC_DIFF));
5107 params_cost += aom_count_signed_primitive_refsubexpfin(
5108 GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5109 (ref_gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
5110 (1 << GM_ALPHA_PREC_BITS),
5111 (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
5112 }
5113 AOM_FALLTHROUGH_INTENDED;
5114 case TRANSLATION:
5115 trans_bits = (gm->wmtype == TRANSLATION)
5116 ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
5117 : GM_ABS_TRANS_BITS;
5118 trans_prec_diff = (gm->wmtype == TRANSLATION)
5119 ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
5120 : GM_TRANS_PREC_DIFF;
5121 params_cost += aom_count_signed_primitive_refsubexpfin(
5122 (1 << trans_bits) + 1, SUBEXPFIN_K,
5123 (ref_gm->wmmat[0] >> trans_prec_diff),
5124 (gm->wmmat[0] >> trans_prec_diff));
5125 params_cost += aom_count_signed_primitive_refsubexpfin(
5126 (1 << trans_bits) + 1, SUBEXPFIN_K,
5127 (ref_gm->wmmat[1] >> trans_prec_diff),
5128 (gm->wmmat[1] >> trans_prec_diff));
5129 AOM_FALLTHROUGH_INTENDED;
5130 case IDENTITY: break;
5131 default: assert(0);
5132 }
5133 return (params_cost << AV1_PROB_COST_SHIFT);
5134 }
5135
do_gm_search_logic(SPEED_FEATURES * const sf,int num_refs_using_gm,int frame)5136 static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm,
5137 int frame) {
5138 (void)num_refs_using_gm;
5139 (void)frame;
5140 switch (sf->gm_search_type) {
5141 case GM_FULL_SEARCH: return 1;
5142 case GM_REDUCED_REF_SEARCH_SKIP_L2_L3:
5143 return !(frame == LAST2_FRAME || frame == LAST3_FRAME);
5144 case GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2:
5145 return !(frame == LAST2_FRAME || frame == LAST3_FRAME ||
5146 (frame == ALTREF2_FRAME));
5147 case GM_DISABLE_SEARCH: return 0;
5148 default: assert(0);
5149 }
5150 return 1;
5151 }
5152
get_max_allowed_ref_frames(const AV1_COMP * cpi)5153 static int get_max_allowed_ref_frames(const AV1_COMP *cpi) {
5154 const unsigned int max_allowed_refs_for_given_speed =
5155 (cpi->sf.selective_ref_frame >= 3) ? INTER_REFS_PER_FRAME - 1
5156 : INTER_REFS_PER_FRAME;
5157 return AOMMIN(max_allowed_refs_for_given_speed,
5158 cpi->oxcf.max_reference_frames);
5159 }
5160
5161 // Enforce the number of references for each arbitrary frame based on user
5162 // options and speed.
enforce_max_ref_frames(AV1_COMP * cpi)5163 static void enforce_max_ref_frames(AV1_COMP *cpi) {
5164 MV_REFERENCE_FRAME ref_frame;
5165 int total_valid_refs = 0;
5166 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
5167 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
5168 total_valid_refs++;
5169 }
5170 }
5171
5172 const int max_allowed_refs = get_max_allowed_ref_frames(cpi);
5173
5174 // When more than 'max_allowed_refs' are available, we reduce the number of
5175 // reference frames one at a time based on this order.
5176 const MV_REFERENCE_FRAME disable_order[] = {
5177 LAST3_FRAME,
5178 LAST2_FRAME,
5179 ALTREF2_FRAME,
5180 GOLDEN_FRAME,
5181 };
5182
5183 for (int i = 0; i < 4 && total_valid_refs > max_allowed_refs; ++i) {
5184 const MV_REFERENCE_FRAME ref_frame_to_disable = disable_order[i];
5185
5186 if (!(cpi->ref_frame_flags &
5187 av1_ref_frame_flag_list[ref_frame_to_disable])) {
5188 continue;
5189 }
5190
5191 switch (ref_frame_to_disable) {
5192 case LAST3_FRAME: cpi->ref_frame_flags &= ~AOM_LAST3_FLAG; break;
5193 case LAST2_FRAME: cpi->ref_frame_flags &= ~AOM_LAST2_FLAG; break;
5194 case ALTREF2_FRAME: cpi->ref_frame_flags &= ~AOM_ALT2_FLAG; break;
5195 case GOLDEN_FRAME: cpi->ref_frame_flags &= ~AOM_GOLD_FLAG; break;
5196 default: assert(0);
5197 }
5198 --total_valid_refs;
5199 }
5200 assert(total_valid_refs <= max_allowed_refs);
5201 }
5202
av1_refs_are_one_sided(const AV1_COMMON * cm)5203 static INLINE int av1_refs_are_one_sided(const AV1_COMMON *cm) {
5204 assert(!frame_is_intra_only(cm));
5205
5206 int one_sided_refs = 1;
5207 for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
5208 const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
5209 if (buf == NULL) continue;
5210
5211 const int ref_order_hint = buf->order_hint;
5212 if (get_relative_dist(&cm->seq_params.order_hint_info, ref_order_hint,
5213 (int)cm->current_frame.order_hint) > 0) {
5214 one_sided_refs = 0; // bwd reference
5215 break;
5216 }
5217 }
5218 return one_sided_refs;
5219 }
5220
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])5221 static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
5222 int ref_order_hint[2]) {
5223 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
5224 ref_order_hint[0] = ref_order_hint[1] = 0;
5225 if (!skip_mode_info->skip_mode_allowed) return;
5226
5227 const RefCntBuffer *const buf_0 =
5228 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
5229 const RefCntBuffer *const buf_1 =
5230 get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
5231 assert(buf_0 != NULL && buf_1 != NULL);
5232
5233 ref_order_hint[0] = buf_0->order_hint;
5234 ref_order_hint[1] = buf_1->order_hint;
5235 }
5236
check_skip_mode_enabled(AV1_COMP * const cpi)5237 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
5238 AV1_COMMON *const cm = &cpi->common;
5239
5240 av1_setup_skip_mode_allowed(cm);
5241 if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
5242
5243 // Turn off skip mode if the temporal distances of the reference pair to the
5244 // current frame are different by more than 1 frame.
5245 const int cur_offset = (int)cm->current_frame.order_hint;
5246 int ref_offset[2];
5247 get_skip_mode_ref_offsets(cm, ref_offset);
5248 const int cur_to_ref0 = get_relative_dist(&cm->seq_params.order_hint_info,
5249 cur_offset, ref_offset[0]);
5250 const int cur_to_ref1 = abs(get_relative_dist(&cm->seq_params.order_hint_info,
5251 cur_offset, ref_offset[1]));
5252 if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
5253
5254 // High Latency: Turn off skip mode if all refs are fwd.
5255 if (cpi->all_one_sided_refs && cpi->oxcf.lag_in_frames > 0) return 0;
5256
5257 static const int flag_list[REF_FRAMES] = { 0,
5258 AOM_LAST_FLAG,
5259 AOM_LAST2_FLAG,
5260 AOM_LAST3_FLAG,
5261 AOM_GOLD_FLAG,
5262 AOM_BWD_FLAG,
5263 AOM_ALT2_FLAG,
5264 AOM_ALT_FLAG };
5265 const int ref_frame[2] = {
5266 cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
5267 cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
5268 };
5269 if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) ||
5270 !(cpi->ref_frame_flags & flag_list[ref_frame[1]]))
5271 return 0;
5272
5273 return 1;
5274 }
5275
5276 // Function to decide if we can skip the global motion parameter computation
5277 // for a particular ref frame
skip_gm_frame(AV1_COMMON * const cm,int ref_frame)5278 static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) {
5279 if ((ref_frame == LAST3_FRAME || ref_frame == LAST2_FRAME) &&
5280 cm->global_motion[GOLDEN_FRAME].wmtype != IDENTITY) {
5281 return get_relative_dist(
5282 &cm->seq_params.order_hint_info,
5283 cm->cur_frame->ref_order_hints[ref_frame - LAST_FRAME],
5284 cm->cur_frame->ref_order_hints[GOLDEN_FRAME - LAST_FRAME]) <= 0;
5285 }
5286 return 0;
5287 }
5288
set_default_interp_skip_flags(AV1_COMP * cpi)5289 static void set_default_interp_skip_flags(AV1_COMP *cpi) {
5290 const int num_planes = av1_num_planes(&cpi->common);
5291 cpi->default_interp_skip_flags = (num_planes == 1)
5292 ? DEFAULT_LUMA_INTERP_SKIP_FLAG
5293 : DEFAULT_INTERP_SKIP_FLAG;
5294 }
5295
encode_frame_internal(AV1_COMP * cpi)5296 static void encode_frame_internal(AV1_COMP *cpi) {
5297 ThreadData *const td = &cpi->td;
5298 MACROBLOCK *const x = &td->mb;
5299 AV1_COMMON *const cm = &cpi->common;
5300 MACROBLOCKD *const xd = &x->e_mbd;
5301 RD_COUNTS *const rdc = &cpi->td.rd_counts;
5302 int i;
5303
5304 x->min_partition_size = AOMMIN(x->min_partition_size, cm->seq_params.sb_size);
5305 x->max_partition_size = AOMMIN(x->max_partition_size, cm->seq_params.sb_size);
5306 #if CONFIG_DIST_8X8
5307 x->using_dist_8x8 = cpi->oxcf.using_dist_8x8;
5308 x->tune_metric = cpi->oxcf.tuning;
5309 #endif
5310 cm->setup_mi(cm);
5311
5312 xd->mi = cm->mi_grid_visible;
5313 xd->mi[0] = cm->mi;
5314
5315 av1_zero(*td->counts);
5316 av1_zero(rdc->comp_pred_diff);
5317 // Two pass partition search can be enabled/disabled for different frames.
5318 // Reset this data at frame level to avoid any incorrect usage.
5319 init_first_partition_pass_stats_tables(cpi, x->first_partition_pass_stats);
5320
5321 // Reset the flag.
5322 cpi->intrabc_used = 0;
5323 // Need to disable intrabc when superres is selected
5324 if (av1_superres_scaled(cm)) {
5325 cm->allow_intrabc = 0;
5326 }
5327
5328 cm->allow_intrabc &= (cpi->oxcf.enable_intrabc);
5329
5330 if (cpi->oxcf.pass != 1 && av1_use_hash_me(cm)) {
5331 // add to hash table
5332 const int pic_width = cpi->source->y_crop_width;
5333 const int pic_height = cpi->source->y_crop_height;
5334 uint32_t *block_hash_values[2][2];
5335 int8_t *is_block_same[2][3];
5336 int k, j;
5337
5338 for (k = 0; k < 2; k++) {
5339 for (j = 0; j < 2; j++) {
5340 CHECK_MEM_ERROR(cm, block_hash_values[k][j],
5341 aom_malloc(sizeof(uint32_t) * pic_width * pic_height));
5342 }
5343
5344 for (j = 0; j < 3; j++) {
5345 CHECK_MEM_ERROR(cm, is_block_same[k][j],
5346 aom_malloc(sizeof(int8_t) * pic_width * pic_height));
5347 }
5348 }
5349
5350 av1_hash_table_create(&cm->cur_frame->hash_table);
5351 av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
5352 is_block_same[0], &cpi->td.mb);
5353 av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0],
5354 block_hash_values[1], is_block_same[0],
5355 is_block_same[1], &cpi->td.mb);
5356 av1_add_to_hash_map_by_row_with_precal_data(
5357 &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
5358 pic_width, pic_height, 4);
5359 av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1],
5360 block_hash_values[0], is_block_same[1],
5361 is_block_same[0], &cpi->td.mb);
5362 av1_add_to_hash_map_by_row_with_precal_data(
5363 &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
5364 pic_width, pic_height, 8);
5365 av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0],
5366 block_hash_values[1], is_block_same[0],
5367 is_block_same[1], &cpi->td.mb);
5368 av1_add_to_hash_map_by_row_with_precal_data(
5369 &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
5370 pic_width, pic_height, 16);
5371 av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1],
5372 block_hash_values[0], is_block_same[1],
5373 is_block_same[0], &cpi->td.mb);
5374 av1_add_to_hash_map_by_row_with_precal_data(
5375 &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
5376 pic_width, pic_height, 32);
5377 av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0],
5378 block_hash_values[1], is_block_same[0],
5379 is_block_same[1], &cpi->td.mb);
5380 av1_add_to_hash_map_by_row_with_precal_data(
5381 &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
5382 pic_width, pic_height, 64);
5383
5384 av1_generate_block_hash_value(cpi->source, 128, block_hash_values[1],
5385 block_hash_values[0], is_block_same[1],
5386 is_block_same[0], &cpi->td.mb);
5387 av1_add_to_hash_map_by_row_with_precal_data(
5388 &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
5389 pic_width, pic_height, 128);
5390
5391 for (k = 0; k < 2; k++) {
5392 for (j = 0; j < 2; j++) {
5393 aom_free(block_hash_values[k][j]);
5394 }
5395
5396 for (j = 0; j < 3; j++) {
5397 aom_free(is_block_same[k][j]);
5398 }
5399 }
5400 }
5401
5402 for (i = 0; i < MAX_SEGMENTS; ++i) {
5403 const int qindex = cm->seg.enabled
5404 ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
5405 : cm->base_qindex;
5406 xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
5407 cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 &&
5408 cm->v_dc_delta_q == 0 && cm->v_ac_delta_q == 0;
5409 if (xd->lossless[i]) cpi->has_lossless_segment = 1;
5410 xd->qindex[i] = qindex;
5411 if (xd->lossless[i]) {
5412 cpi->optimize_seg_arr[i] = 0;
5413 } else {
5414 cpi->optimize_seg_arr[i] = cpi->sf.optimize_coefficients;
5415 }
5416 }
5417 cm->coded_lossless = is_coded_lossless(cm, xd);
5418 cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm);
5419
5420 cm->tx_mode = select_tx_mode(cpi);
5421
5422 // Fix delta q resolution for the moment
5423 cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES;
5424 // Set delta_q_present_flag before it is used for the first time
5425 cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
5426 cm->delta_q_info.delta_q_present_flag = cpi->oxcf.deltaq_mode != NO_DELTA_Q;
5427 cm->delta_q_info.delta_lf_present_flag = cpi->oxcf.deltaq_mode == DELTA_Q_LF;
5428 cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
5429 // update delta_q_present_flag and delta_lf_present_flag based on
5430 // base_qindex
5431 cm->delta_q_info.delta_q_present_flag &= cm->base_qindex > 0;
5432 cm->delta_q_info.delta_lf_present_flag &= cm->base_qindex > 0;
5433
5434 if (cpi->twopass.gf_group.index &&
5435 cpi->twopass.gf_group.index < MAX_LAG_BUFFERS &&
5436 cpi->oxcf.enable_tpl_model) {
5437 TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
5438 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
5439
5440 int tpl_stride = tpl_frame->stride;
5441 int64_t intra_cost_base = 0;
5442 int64_t mc_dep_cost_base = 0;
5443 int row, col;
5444
5445 for (row = 0; row < cm->mi_rows; ++row) {
5446 for (col = 0; col < cm->mi_cols; ++col) {
5447 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
5448 intra_cost_base += this_stats->intra_cost;
5449 mc_dep_cost_base += this_stats->mc_dep_cost;
5450 }
5451 }
5452
5453 aom_clear_system_state();
5454
5455 if (tpl_frame->is_valid)
5456 cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
5457 }
5458
5459 av1_frame_init_quantizer(cpi);
5460
5461 av1_initialize_rd_consts(cpi);
5462 av1_initialize_me_consts(cpi, x, cm->base_qindex);
5463 init_encode_frame_mb_context(cpi);
5464 set_default_interp_skip_flags(cpi);
5465 if (cm->prev_frame)
5466 cm->last_frame_seg_map = cm->prev_frame->seg_map;
5467 else
5468 cm->last_frame_seg_map = NULL;
5469 if (cm->allow_intrabc || cm->coded_lossless) {
5470 av1_set_default_ref_deltas(cm->lf.ref_deltas);
5471 av1_set_default_mode_deltas(cm->lf.mode_deltas);
5472 } else if (cm->prev_frame) {
5473 memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
5474 memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
5475 }
5476 memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
5477 memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
5478
5479 // Special case: set prev_mi to NULL when the previous mode info
5480 // context cannot be used.
5481 cm->prev_mi = cm->allow_ref_frame_mvs ? cm->prev_mip : NULL;
5482
5483 x->txb_split_count = 0;
5484 #if CONFIG_SPEED_STATS
5485 x->tx_search_count = 0;
5486 #endif // CONFIG_SPEED_STATS
5487
5488 #if CONFIG_COLLECT_COMPONENT_TIMING
5489 start_timing(cpi, av1_compute_global_motion_time);
5490 #endif
5491 av1_zero(rdc->global_motion_used);
5492 av1_zero(cpi->gmparams_cost);
5493 if (cpi->common.current_frame.frame_type == INTER_FRAME && cpi->source &&
5494 cpi->oxcf.enable_global_motion && !cpi->global_motion_search_done) {
5495 YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES];
5496 int frame;
5497 double params_by_motion[RANSAC_NUM_MOTIONS * (MAX_PARAMDIM - 1)];
5498 const double *params_this_motion;
5499 int inliers_by_motion[RANSAC_NUM_MOTIONS];
5500 WarpedMotionParams tmp_wm_params;
5501 // clang-format off
5502 static const double kIdentityParams[MAX_PARAMDIM - 1] = {
5503 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
5504 };
5505 // clang-format on
5506 int num_refs_using_gm = 0;
5507
5508 for (frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) {
5509 ref_buf[frame] = NULL;
5510 RefCntBuffer *buf = get_ref_frame_buf(cm, frame);
5511 if (buf != NULL) ref_buf[frame] = &buf->buf;
5512 int pframe;
5513 cm->global_motion[frame] = default_warp_params;
5514 const WarpedMotionParams *ref_params =
5515 cm->prev_frame ? &cm->prev_frame->global_motion[frame]
5516 : &default_warp_params;
5517 // check for duplicate buffer
5518 for (pframe = ALTREF_FRAME; pframe > frame; --pframe) {
5519 if (ref_buf[frame] == ref_buf[pframe]) break;
5520 }
5521 if (pframe > frame) {
5522 memcpy(&cm->global_motion[frame], &cm->global_motion[pframe],
5523 sizeof(WarpedMotionParams));
5524 } else if (ref_buf[frame] &&
5525 ref_buf[frame]->y_crop_width == cpi->source->y_crop_width &&
5526 ref_buf[frame]->y_crop_height == cpi->source->y_crop_height &&
5527 do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame) &&
5528 !(cpi->sf.selective_ref_gm && skip_gm_frame(cm, frame))) {
5529 TransformationType model;
5530 const int64_t ref_frame_error = av1_frame_error(
5531 is_cur_buf_hbd(xd), xd->bd, ref_buf[frame]->y_buffer,
5532 ref_buf[frame]->y_stride, cpi->source->y_buffer,
5533 cpi->source->y_width, cpi->source->y_height, cpi->source->y_stride);
5534
5535 if (ref_frame_error == 0) continue;
5536
5537 aom_clear_system_state();
5538
5539 // TODO(sarahparker, debargha): Explore do_adaptive_gm_estimation = 1
5540 const int do_adaptive_gm_estimation = 0;
5541
5542 const int ref_frame_dist = get_relative_dist(
5543 &cm->seq_params.order_hint_info, cm->current_frame.order_hint,
5544 cm->cur_frame->ref_order_hints[frame - LAST_FRAME]);
5545 const GlobalMotionEstimationType gm_estimation_type =
5546 cm->seq_params.order_hint_info.enable_order_hint &&
5547 abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation
5548 ? GLOBAL_MOTION_DISFLOW_BASED
5549 : GLOBAL_MOTION_FEATURE_BASED;
5550 for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
5551 int64_t best_warp_error = INT64_MAX;
5552 // Initially set all params to identity.
5553 for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
5554 memcpy(params_by_motion + (MAX_PARAMDIM - 1) * i, kIdentityParams,
5555 (MAX_PARAMDIM - 1) * sizeof(*params_by_motion));
5556 }
5557
5558 av1_compute_global_motion(model, cpi->source, ref_buf[frame],
5559 cpi->common.seq_params.bit_depth,
5560 gm_estimation_type, inliers_by_motion,
5561 params_by_motion, RANSAC_NUM_MOTIONS);
5562
5563 for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
5564 if (inliers_by_motion[i] == 0) continue;
5565
5566 params_this_motion = params_by_motion + (MAX_PARAMDIM - 1) * i;
5567 av1_convert_model_to_params(params_this_motion, &tmp_wm_params);
5568
5569 if (tmp_wm_params.wmtype != IDENTITY) {
5570 const int64_t warp_error = av1_refine_integerized_param(
5571 &tmp_wm_params, tmp_wm_params.wmtype, is_cur_buf_hbd(xd),
5572 xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
5573 ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
5574 cpi->source->y_buffer, cpi->source->y_width,
5575 cpi->source->y_height, cpi->source->y_stride, 5,
5576 best_warp_error);
5577 if (warp_error < best_warp_error) {
5578 best_warp_error = warp_error;
5579 // Save the wm_params modified by
5580 // av1_refine_integerized_param() rather than motion index to
5581 // avoid rerunning refine() below.
5582 memcpy(&(cm->global_motion[frame]), &tmp_wm_params,
5583 sizeof(WarpedMotionParams));
5584 }
5585 }
5586 }
5587 if (cm->global_motion[frame].wmtype <= AFFINE)
5588 if (!get_shear_params(&cm->global_motion[frame]))
5589 cm->global_motion[frame] = default_warp_params;
5590
5591 if (cm->global_motion[frame].wmtype == TRANSLATION) {
5592 cm->global_motion[frame].wmmat[0] =
5593 convert_to_trans_prec(cm->allow_high_precision_mv,
5594 cm->global_motion[frame].wmmat[0]) *
5595 GM_TRANS_ONLY_DECODE_FACTOR;
5596 cm->global_motion[frame].wmmat[1] =
5597 convert_to_trans_prec(cm->allow_high_precision_mv,
5598 cm->global_motion[frame].wmmat[1]) *
5599 GM_TRANS_ONLY_DECODE_FACTOR;
5600 }
5601
5602 // If the best error advantage found doesn't meet the threshold for
5603 // this motion type, revert to IDENTITY.
5604 if (!av1_is_enough_erroradvantage(
5605 (double)best_warp_error / ref_frame_error,
5606 gm_get_params_cost(&cm->global_motion[frame], ref_params,
5607 cm->allow_high_precision_mv),
5608 cpi->sf.gm_erroradv_type)) {
5609 cm->global_motion[frame] = default_warp_params;
5610 }
5611 if (cm->global_motion[frame].wmtype != IDENTITY) break;
5612 }
5613 aom_clear_system_state();
5614 }
5615 if (cm->global_motion[frame].wmtype != IDENTITY) num_refs_using_gm++;
5616 cpi->gmparams_cost[frame] =
5617 gm_get_params_cost(&cm->global_motion[frame], ref_params,
5618 cm->allow_high_precision_mv) +
5619 cpi->gmtype_cost[cm->global_motion[frame].wmtype] -
5620 cpi->gmtype_cost[IDENTITY];
5621 }
5622 // clear disabled ref_frames
5623 for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
5624 const int ref_disabled =
5625 !(cpi->ref_frame_flags & av1_ref_frame_flag_list[frame]);
5626 if (ref_disabled && cpi->sf.recode_loop != DISALLOW_RECODE) {
5627 cpi->gmparams_cost[frame] = 0;
5628 cm->global_motion[frame] = default_warp_params;
5629 }
5630 }
5631 cpi->global_motion_search_done = 1;
5632 }
5633 memcpy(cm->cur_frame->global_motion, cm->global_motion,
5634 REF_FRAMES * sizeof(WarpedMotionParams));
5635 #if CONFIG_COLLECT_COMPONENT_TIMING
5636 end_timing(cpi, av1_compute_global_motion_time);
5637 #endif
5638
5639 #if CONFIG_COLLECT_COMPONENT_TIMING
5640 start_timing(cpi, av1_setup_motion_field_time);
5641 #endif
5642 av1_setup_motion_field(cm);
5643 #if CONFIG_COLLECT_COMPONENT_TIMING
5644 end_timing(cpi, av1_setup_motion_field_time);
5645 #endif
5646
5647 cpi->all_one_sided_refs =
5648 frame_is_intra_only(cm) ? 0 : av1_refs_are_one_sided(cm);
5649
5650 cm->current_frame.skip_mode_info.skip_mode_flag =
5651 check_skip_mode_enabled(cpi);
5652
5653 {
5654 cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read_dummy;
5655 cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write_dummy;
5656 cpi->row_mt = 0;
5657 if (cpi->oxcf.row_mt && (cpi->oxcf.max_threads > 1)) {
5658 cpi->row_mt = 1;
5659 cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read;
5660 cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write;
5661 av1_encode_tiles_row_mt(cpi);
5662 } else {
5663 if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
5664 av1_encode_tiles_mt(cpi);
5665 else
5666 encode_tiles(cpi);
5667 }
5668 }
5669
5670 // If intrabc is allowed but never selected, reset the allow_intrabc flag.
5671 if (cm->allow_intrabc && !cpi->intrabc_used) cm->allow_intrabc = 0;
5672 if (cm->allow_intrabc) cm->delta_q_info.delta_lf_present_flag = 0;
5673 }
5674
av1_encode_frame(AV1_COMP * cpi)5675 void av1_encode_frame(AV1_COMP *cpi) {
5676 AV1_COMMON *const cm = &cpi->common;
5677 CurrentFrame *const current_frame = &cm->current_frame;
5678 const int num_planes = av1_num_planes(cm);
5679 // Indicates whether or not to use a default reduced set for ext-tx
5680 // rather than the potential full set of 16 transforms
5681 cm->reduced_tx_set_used = cpi->oxcf.reduced_tx_type_set;
5682
5683 // Make sure segment_id is no larger than last_active_segid.
5684 if (cm->seg.enabled && cm->seg.update_map) {
5685 const int mi_rows = cm->mi_rows;
5686 const int mi_cols = cm->mi_cols;
5687 const int last_active_segid = cm->seg.last_active_segid;
5688 uint8_t *map = cpi->segmentation_map;
5689 for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
5690 for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
5691 map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
5692 }
5693 map += mi_cols;
5694 }
5695 }
5696
5697 av1_setup_frame_buf_refs(cm);
5698 enforce_max_ref_frames(cpi);
5699 av1_setup_frame_sign_bias(cm);
5700
5701 #if CONFIG_MISMATCH_DEBUG
5702 mismatch_reset_frame(num_planes);
5703 #else
5704 (void)num_planes;
5705 #endif
5706
5707 if (cpi->sf.frame_parameter_update) {
5708 int i;
5709 RD_OPT *const rd_opt = &cpi->rd;
5710 RD_COUNTS *const rdc = &cpi->td.rd_counts;
5711
5712 // This code does a single RD pass over the whole frame assuming
5713 // either compound, single or hybrid prediction as per whatever has
5714 // worked best for that type of frame in the past.
5715 // It also predicts whether another coding mode would have worked
5716 // better than this coding mode. If that is the case, it remembers
5717 // that for subsequent frames.
5718 // It does the same analysis for transform size selection also.
5719 //
5720 // TODO(zoeliu): To investigate whether a frame_type other than
5721 // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
5722 const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
5723 int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
5724 const int is_alt_ref = frame_type == ALTREF_FRAME;
5725
5726 /* prediction (compound, single or hybrid) mode selection */
5727 // NOTE: "is_alt_ref" is true only for OVERLAY/INTNL_OVERLAY frames
5728 if (is_alt_ref || frame_is_intra_only(cm))
5729 current_frame->reference_mode = SINGLE_REFERENCE;
5730 else
5731 current_frame->reference_mode = REFERENCE_MODE_SELECT;
5732
5733 cm->interp_filter = SWITCHABLE;
5734 if (cm->large_scale_tile) cm->interp_filter = EIGHTTAP_REGULAR;
5735
5736 cm->switchable_motion_mode = 1;
5737
5738 rdc->compound_ref_used_flag = 0;
5739 rdc->skip_mode_used_flag = 0;
5740
5741 encode_frame_internal(cpi);
5742
5743 for (i = 0; i < REFERENCE_MODES; ++i)
5744 mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
5745
5746 if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
5747 // Use a flag that includes 4x4 blocks
5748 if (rdc->compound_ref_used_flag == 0) {
5749 current_frame->reference_mode = SINGLE_REFERENCE;
5750 #if CONFIG_ENTROPY_STATS
5751 av1_zero(cpi->td.counts->comp_inter);
5752 #endif // CONFIG_ENTROPY_STATS
5753 }
5754 }
5755 // Re-check on the skip mode status as reference mode may have been
5756 // changed.
5757 SkipModeInfo *const skip_mode_info = ¤t_frame->skip_mode_info;
5758 if (frame_is_intra_only(cm) ||
5759 current_frame->reference_mode == SINGLE_REFERENCE) {
5760 skip_mode_info->skip_mode_allowed = 0;
5761 skip_mode_info->skip_mode_flag = 0;
5762 }
5763 if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
5764 skip_mode_info->skip_mode_flag = 0;
5765
5766 if (!cm->large_scale_tile) {
5767 if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0)
5768 cm->tx_mode = TX_MODE_LARGEST;
5769 }
5770 } else {
5771 encode_frame_internal(cpi);
5772 }
5773 }
5774
update_txfm_count(MACROBLOCK * x,MACROBLOCKD * xd,FRAME_COUNTS * counts,TX_SIZE tx_size,int depth,int blk_row,int blk_col,uint8_t allow_update_cdf)5775 static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
5776 FRAME_COUNTS *counts, TX_SIZE tx_size, int depth,
5777 int blk_row, int blk_col,
5778 uint8_t allow_update_cdf) {
5779 MB_MODE_INFO *mbmi = xd->mi[0];
5780 const BLOCK_SIZE bsize = mbmi->sb_type;
5781 const int max_blocks_high = max_block_high(xd, bsize, 0);
5782 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
5783 int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
5784 xd->left_txfm_context + blk_row,
5785 mbmi->sb_type, tx_size);
5786 const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
5787 const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
5788
5789 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
5790 assert(tx_size > TX_4X4);
5791
5792 if (depth == MAX_VARTX_DEPTH) {
5793 // Don't add to counts in this case
5794 mbmi->tx_size = tx_size;
5795 txfm_partition_update(xd->above_txfm_context + blk_col,
5796 xd->left_txfm_context + blk_row, tx_size, tx_size);
5797 return;
5798 }
5799
5800 if (tx_size == plane_tx_size) {
5801 #if CONFIG_ENTROPY_STATS
5802 ++counts->txfm_partition[ctx][0];
5803 #endif
5804 if (allow_update_cdf)
5805 update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2);
5806 mbmi->tx_size = tx_size;
5807 txfm_partition_update(xd->above_txfm_context + blk_col,
5808 xd->left_txfm_context + blk_row, tx_size, tx_size);
5809 } else {
5810 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
5811 const int bsw = tx_size_wide_unit[sub_txs];
5812 const int bsh = tx_size_high_unit[sub_txs];
5813
5814 #if CONFIG_ENTROPY_STATS
5815 ++counts->txfm_partition[ctx][1];
5816 #endif
5817 if (allow_update_cdf)
5818 update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2);
5819 ++x->txb_split_count;
5820
5821 if (sub_txs == TX_4X4) {
5822 mbmi->inter_tx_size[txb_size_index] = TX_4X4;
5823 mbmi->tx_size = TX_4X4;
5824 txfm_partition_update(xd->above_txfm_context + blk_col,
5825 xd->left_txfm_context + blk_row, TX_4X4, tx_size);
5826 return;
5827 }
5828
5829 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
5830 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
5831 int offsetr = row;
5832 int offsetc = col;
5833
5834 update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr,
5835 blk_col + offsetc, allow_update_cdf);
5836 }
5837 }
5838 }
5839 }
5840
tx_partition_count_update(const AV1_COMMON * const cm,MACROBLOCK * x,BLOCK_SIZE plane_bsize,int mi_row,int mi_col,FRAME_COUNTS * td_counts,uint8_t allow_update_cdf)5841 static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
5842 BLOCK_SIZE plane_bsize, int mi_row,
5843 int mi_col, FRAME_COUNTS *td_counts,
5844 uint8_t allow_update_cdf) {
5845 MACROBLOCKD *xd = &x->e_mbd;
5846 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
5847 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
5848 const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
5849 const int bh = tx_size_high_unit[max_tx_size];
5850 const int bw = tx_size_wide_unit[max_tx_size];
5851 int idx, idy;
5852
5853 xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
5854 xd->left_txfm_context =
5855 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
5856
5857 for (idy = 0; idy < mi_height; idy += bh)
5858 for (idx = 0; idx < mi_width; idx += bw)
5859 update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx,
5860 allow_update_cdf);
5861 }
5862
set_txfm_context(MACROBLOCKD * xd,TX_SIZE tx_size,int blk_row,int blk_col)5863 static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
5864 int blk_col) {
5865 MB_MODE_INFO *mbmi = xd->mi[0];
5866 const BLOCK_SIZE bsize = mbmi->sb_type;
5867 const int max_blocks_high = max_block_high(xd, bsize, 0);
5868 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
5869 const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
5870 const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
5871
5872 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
5873
5874 if (tx_size == plane_tx_size) {
5875 mbmi->tx_size = tx_size;
5876 txfm_partition_update(xd->above_txfm_context + blk_col,
5877 xd->left_txfm_context + blk_row, tx_size, tx_size);
5878
5879 } else {
5880 if (tx_size == TX_8X8) {
5881 mbmi->inter_tx_size[txb_size_index] = TX_4X4;
5882 mbmi->tx_size = TX_4X4;
5883 txfm_partition_update(xd->above_txfm_context + blk_col,
5884 xd->left_txfm_context + blk_row, TX_4X4, tx_size);
5885 return;
5886 }
5887 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
5888 const int bsw = tx_size_wide_unit[sub_txs];
5889 const int bsh = tx_size_high_unit[sub_txs];
5890 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
5891 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
5892 const int offsetr = blk_row + row;
5893 const int offsetc = blk_col + col;
5894 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
5895 set_txfm_context(xd, sub_txs, offsetr, offsetc);
5896 }
5897 }
5898 }
5899 }
5900
tx_partition_set_contexts(const AV1_COMMON * const cm,MACROBLOCKD * xd,BLOCK_SIZE plane_bsize,int mi_row,int mi_col)5901 static void tx_partition_set_contexts(const AV1_COMMON *const cm,
5902 MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
5903 int mi_row, int mi_col) {
5904 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
5905 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
5906 const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
5907 const int bh = tx_size_high_unit[max_tx_size];
5908 const int bw = tx_size_wide_unit[max_tx_size];
5909 int idx, idy;
5910
5911 xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
5912 xd->left_txfm_context =
5913 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
5914
5915 for (idy = 0; idy < mi_height; idy += bh)
5916 for (idx = 0; idx < mi_width; idx += bw)
5917 set_txfm_context(xd, max_tx_size, idy, idx);
5918 }
5919
encode_superblock(const AV1_COMP * const cpi,TileDataEnc * tile_data,ThreadData * td,TOKENEXTRA ** t,RUN_TYPE dry_run,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate)5920 static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
5921 ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
5922 int mi_row, int mi_col, BLOCK_SIZE bsize,
5923 int *rate) {
5924 const AV1_COMMON *const cm = &cpi->common;
5925 const int num_planes = av1_num_planes(cm);
5926 MACROBLOCK *const x = &td->mb;
5927 MACROBLOCKD *const xd = &x->e_mbd;
5928 MB_MODE_INFO **mi_4x4 = xd->mi;
5929 MB_MODE_INFO *mbmi = mi_4x4[0];
5930 const int seg_skip =
5931 segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
5932 const int mis = cm->mi_stride;
5933 const int mi_width = mi_size_wide[bsize];
5934 const int mi_height = mi_size_high[bsize];
5935 const int is_inter = is_inter_block(mbmi);
5936
5937 if (cpi->two_pass_partition_search && x->cb_partition_scan) {
5938 for (int row = mi_row; row < mi_row + mi_width;
5939 row += FIRST_PARTITION_PASS_SAMPLE_REGION) {
5940 for (int col = mi_col; col < mi_col + mi_height;
5941 col += FIRST_PARTITION_PASS_SAMPLE_REGION) {
5942 const int index = av1_first_partition_pass_stats_index(row, col);
5943 FIRST_PARTITION_PASS_STATS *const stats =
5944 &x->first_partition_pass_stats[index];
5945 // Increase the counter of data samples.
5946 ++stats->sample_counts;
5947 // Increase the counter for ref_frame[0] and ref_frame[1].
5948 if (stats->ref0_counts[mbmi->ref_frame[0]] < 255)
5949 ++stats->ref0_counts[mbmi->ref_frame[0]];
5950 if (mbmi->ref_frame[1] >= 0 &&
5951 stats->ref1_counts[mbmi->ref_frame[1]] < 255)
5952 ++stats->ref1_counts[mbmi->ref_frame[1]];
5953 if (cpi->sf.use_first_partition_pass_interintra_stats) {
5954 // Increase the counter for interintra_motion_mode_count
5955 if (mbmi->motion_mode == 0 && mbmi->ref_frame[1] == INTRA_FRAME &&
5956 stats->interintra_motion_mode_count[mbmi->ref_frame[0]] < 255) {
5957 ++stats->interintra_motion_mode_count[mbmi->ref_frame[0]];
5958 }
5959 }
5960 }
5961 }
5962 }
5963
5964 if (!is_inter) {
5965 xd->cfl.is_chroma_reference =
5966 is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x,
5967 cm->seq_params.subsampling_y);
5968 xd->cfl.store_y = store_cfl_required(cm, xd);
5969 mbmi->skip = 1;
5970 for (int plane = 0; plane < num_planes; ++plane) {
5971 av1_encode_intra_block_plane(cpi, x, bsize, plane,
5972 cpi->optimize_seg_arr[mbmi->segment_id],
5973 mi_row, mi_col);
5974 }
5975
5976 // If there is at least one lossless segment, force the skip for intra
5977 // block to be 0, in order to avoid the segment_id to be changed by in
5978 // write_segment_id().
5979 if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map &&
5980 cpi->has_lossless_segment)
5981 mbmi->skip = 0;
5982
5983 xd->cfl.store_y = 0;
5984 if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) {
5985 for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) {
5986 if (mbmi->palette_mode_info.palette_size[plane] > 0) {
5987 if (!dry_run) {
5988 av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size,
5989 PALETTE_MAP, tile_data->allow_update_cdf,
5990 td->counts);
5991 } else if (dry_run == DRY_RUN_COSTCOEFFS) {
5992 rate +=
5993 av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP);
5994 }
5995 }
5996 }
5997 }
5998
5999 av1_update_txb_context(cpi, td, dry_run, bsize, rate, mi_row, mi_col,
6000 tile_data->allow_update_cdf);
6001 } else {
6002 int ref;
6003 const int is_compound = has_second_ref(mbmi);
6004
6005 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
6006 for (ref = 0; ref < 1 + is_compound; ++ref) {
6007 const YV12_BUFFER_CONFIG *cfg =
6008 get_ref_frame_yv12_buf(cm, mbmi->ref_frame[ref]);
6009 assert(IMPLIES(!is_intrabc_block(mbmi), cfg));
6010 av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
6011 xd->block_ref_scale_factors[ref], num_planes);
6012 }
6013
6014 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
6015 av1_num_planes(cm) - 1);
6016 if (mbmi->motion_mode == OBMC_CAUSAL) {
6017 assert(cpi->oxcf.enable_obmc == 1);
6018 av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
6019 }
6020
6021 #if CONFIG_MISMATCH_DEBUG
6022 if (dry_run == OUTPUT_ENABLED) {
6023 for (int plane = 0; plane < num_planes; ++plane) {
6024 const struct macroblockd_plane *pd = &xd->plane[plane];
6025 int pixel_c, pixel_r;
6026 mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
6027 pd->subsampling_x, pd->subsampling_y);
6028 if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
6029 pd->subsampling_y))
6030 continue;
6031 mismatch_record_block_pre(pd->dst.buf, pd->dst.stride,
6032 cm->current_frame.order_hint, plane, pixel_c,
6033 pixel_r, pd->width, pd->height,
6034 xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
6035 }
6036 }
6037 #else
6038 (void)num_planes;
6039 #endif
6040
6041 av1_encode_sb(cpi, x, bsize, mi_row, mi_col, dry_run);
6042 av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, bsize, rate,
6043 tile_data->allow_update_cdf);
6044 }
6045
6046 if (!dry_run) {
6047 if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi)) td->intrabc_used = 1;
6048 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id] &&
6049 mbmi->sb_type > BLOCK_4X4 && !(is_inter && (mbmi->skip || seg_skip))) {
6050 if (is_inter) {
6051 tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts,
6052 tile_data->allow_update_cdf);
6053 } else {
6054 if (mbmi->tx_size != max_txsize_rect_lookup[bsize])
6055 ++x->txb_split_count;
6056 if (block_signals_txsize(bsize)) {
6057 const int tx_size_ctx = get_tx_size_context(xd);
6058 const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
6059 const int depth = tx_size_to_depth(mbmi->tx_size, bsize);
6060 const int max_depths = bsize_to_max_depth(bsize);
6061
6062 if (tile_data->allow_update_cdf)
6063 update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
6064 depth, max_depths + 1);
6065 #if CONFIG_ENTROPY_STATS
6066 ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth];
6067 #endif
6068 }
6069 }
6070 assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
6071 } else {
6072 int i, j;
6073 TX_SIZE intra_tx_size;
6074 // The new intra coding scheme requires no change of transform size
6075 if (is_inter) {
6076 if (xd->lossless[mbmi->segment_id]) {
6077 intra_tx_size = TX_4X4;
6078 } else {
6079 intra_tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
6080 }
6081 } else {
6082 intra_tx_size = mbmi->tx_size;
6083 }
6084
6085 for (j = 0; j < mi_height; j++)
6086 for (i = 0; i < mi_width; i++)
6087 if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows)
6088 mi_4x4[mis * j + i]->tx_size = intra_tx_size;
6089
6090 if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count;
6091 }
6092 }
6093
6094 if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type) &&
6095 is_inter && !(mbmi->skip || seg_skip) &&
6096 !xd->lossless[mbmi->segment_id]) {
6097 if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
6098 } else {
6099 TX_SIZE tx_size = mbmi->tx_size;
6100 // The new intra coding scheme requires no change of transform size
6101 if (is_inter) {
6102 if (xd->lossless[mbmi->segment_id]) {
6103 tx_size = TX_4X4;
6104 } else {
6105 tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
6106 }
6107 } else {
6108 tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
6109 }
6110 mbmi->tx_size = tx_size;
6111 set_txfm_ctxs(tx_size, xd->n4_w, xd->n4_h,
6112 (mbmi->skip || seg_skip) && is_inter_block(mbmi), xd);
6113 }
6114 CFL_CTX *const cfl = &xd->cfl;
6115 if (is_inter_block(mbmi) &&
6116 !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x,
6117 cfl->subsampling_y) &&
6118 is_cfl_allowed(xd)) {
6119 cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
6120 }
6121 }
6122