1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #ifndef AOM_AV1_ENCODER_BLOCK_H_
13 #define AOM_AV1_ENCODER_BLOCK_H_
14
15 #include "av1/common/entropymv.h"
16 #include "av1/common/entropy.h"
17 #include "av1/common/mvref_common.h"
18 #include "av1/encoder/hash.h"
19 #if CONFIG_DIST_8X8
20 #include "aom/aomcx.h"
21 #endif
22
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26
27 typedef struct {
28 unsigned int sse;
29 int sum;
30 unsigned int var;
31 } DIFF;
32
33 typedef struct macroblock_plane {
34 DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
35 tran_low_t *qcoeff;
36 tran_low_t *coeff;
37 uint16_t *eobs;
38 uint8_t *txb_entropy_ctx;
39 struct buf_2d src;
40
41 // Quantizer setings
42 // These are used/accessed only in the quantization process
43 // RDO does not / must not depend on any of these values
44 // All values below share the coefficient scale/shift used in TX
45 const int16_t *quant_fp_QTX;
46 const int16_t *round_fp_QTX;
47 const int16_t *quant_QTX;
48 const int16_t *quant_shift_QTX;
49 const int16_t *zbin_QTX;
50 const int16_t *round_QTX;
51 const int16_t *dequant_QTX;
52 } MACROBLOCK_PLANE;
53
54 typedef struct {
55 int txb_skip_cost[TXB_SKIP_CONTEXTS][2];
56 int base_eob_cost[SIG_COEF_CONTEXTS_EOB][3];
57 int base_cost[SIG_COEF_CONTEXTS][8];
58 int eob_extra_cost[EOB_COEF_CONTEXTS][2];
59 int dc_sign_cost[DC_SIGN_CONTEXTS][2];
60 int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1];
61 } LV_MAP_COEFF_COST;
62
63 typedef struct {
64 int eob_cost[2][11];
65 } LV_MAP_EOB_COST;
66
67 typedef struct {
68 tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
69 uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
70 uint8_t txb_skip_ctx[MAX_MB_PLANE]
71 [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
72 int dc_sign_ctx[MAX_MB_PLANE]
73 [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
74 } CB_COEFF_BUFFER;
75
76 typedef struct {
77 // TODO(angiebird): Reduce the buffer size according to sb_type
78 CB_COEFF_BUFFER *cb_coef_buff;
79 CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
80 int_mv global_mvs[REF_FRAMES];
81 int cb_offset;
82 int16_t mode_context[MODE_CTX_REF_FRAMES];
83 uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
84 } MB_MODE_INFO_EXT;
85
86 typedef struct {
87 int col_min;
88 int col_max;
89 int row_min;
90 int row_max;
91 } MvLimits;
92
93 typedef struct {
94 uint8_t best_palette_color_map[MAX_PALETTE_SQUARE];
95 int kmeans_data_buf[2 * MAX_PALETTE_SQUARE];
96 } PALETTE_BUFFER;
97
98 typedef struct {
99 TX_SIZE tx_size;
100 TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN];
101 uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
102 TX_TYPE txk_type[TXK_TYPE_BUF_LEN];
103 RD_STATS rd_stats;
104 uint32_t hash_value;
105 } MB_RD_INFO;
106
107 #define RD_RECORD_BUFFER_LEN 8
108 typedef struct {
109 MB_RD_INFO tx_rd_info[RD_RECORD_BUFFER_LEN]; // Circular buffer.
110 int index_start;
111 int num;
112 CRC32C crc_calculator; // Hash function.
113 } MB_RD_RECORD;
114
115 typedef struct {
116 int64_t dist;
117 int64_t sse;
118 int rate;
119 uint16_t eob;
120 TX_TYPE tx_type;
121 uint16_t entropy_context;
122 uint8_t txb_entropy_ctx;
123 uint8_t valid;
124 uint8_t fast; // This is not being used now.
125 } TXB_RD_INFO;
126
127 #define TX_SIZE_RD_RECORD_BUFFER_LEN 256
128 typedef struct {
129 uint32_t hash_vals[TX_SIZE_RD_RECORD_BUFFER_LEN];
130 TXB_RD_INFO tx_rd_info[TX_SIZE_RD_RECORD_BUFFER_LEN];
131 int index_start;
132 int num;
133 } TXB_RD_RECORD;
134
135 typedef struct tx_size_rd_info_node {
136 TXB_RD_INFO *rd_info_array; // Points to array of size TX_TYPES.
137 struct tx_size_rd_info_node *children[4];
138 } TXB_RD_INFO_NODE;
139
140 // Simple translation rd state for prune_comp_search_by_single_result
141 typedef struct {
142 RD_STATS rd_stats;
143 RD_STATS rd_stats_y;
144 RD_STATS rd_stats_uv;
145 uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
146 uint8_t skip;
147 uint8_t disable_skip;
148 uint8_t early_skipped;
149 } SimpleRDState;
150
151 // 4: NEAREST, NEW, NEAR, GLOBAL
152 #define SINGLE_REF_MODES ((REF_FRAMES - 1) * 4)
153
154 // Region size for mode decision sampling in the first pass of partition
155 // search(two_pass_partition_search speed feature), in units of mi size(4).
156 // Used by the mode pruning in two_pass_partition_search feature.
157 #define FIRST_PARTITION_PASS_SAMPLE_REGION 8
158 #define FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2 3
159 #define FIRST_PARTITION_PASS_STATS_TABLES \
160 (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) * \
161 (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
162 #define FIRST_PARTITION_PASS_STATS_STRIDE \
163 (MAX_MIB_SIZE_LOG2 - FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
164
av1_first_partition_pass_stats_index(int mi_row,int mi_col)165 static INLINE int av1_first_partition_pass_stats_index(int mi_row, int mi_col) {
166 const int row =
167 (mi_row & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
168 const int col =
169 (mi_col & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
170 return (row << FIRST_PARTITION_PASS_STATS_STRIDE) + col;
171 }
172
173 typedef struct {
174 uint8_t ref0_counts[REF_FRAMES]; // Counters for ref_frame[0].
175 uint8_t ref1_counts[REF_FRAMES]; // Counters for ref_frame[1].
176 int sample_counts; // Number of samples collected.
177 uint8_t interintra_motion_mode_count[REF_FRAMES]; // Counter for interintra
178 // motion mode
179 } FIRST_PARTITION_PASS_STATS;
180
181 #define MAX_INTERP_FILTER_STATS 64
182 typedef struct {
183 InterpFilters filters;
184 int_mv mv[2];
185 int8_t ref_frames[2];
186 COMPOUND_TYPE comp_type;
187 int64_t rd;
188 int skip_txfm_sb;
189 int64_t skip_sse_sb;
190 unsigned int pred_sse;
191 } INTERPOLATION_FILTER_STATS;
192
193 #define MAX_COMP_RD_STATS 64
194 typedef struct {
195 int32_t rate[COMPOUND_TYPES];
196 int64_t dist[COMPOUND_TYPES];
197 int64_t comp_model_rd[COMPOUND_TYPES];
198 int_mv mv[2];
199 MV_REFERENCE_FRAME ref_frames[2];
200 PREDICTION_MODE mode;
201 InterpFilters filter;
202 int ref_mv_idx;
203 int is_global[2];
204 } COMP_RD_STATS;
205
206 struct inter_modes_info;
207 typedef struct macroblock MACROBLOCK;
208 struct macroblock {
209 struct macroblock_plane plane[MAX_MB_PLANE];
210
211 // Determine if one would go with reduced complexity transform block
212 // search model to select prediction modes, or full complexity model
213 // to select transform kernel.
214 int rd_model;
215
216 // Indicate if the encoder is running in the first pass partition search.
217 // In that case, apply certain speed features therein to reduce the overhead
218 // cost in the first pass search.
219 int cb_partition_scan;
220
221 FIRST_PARTITION_PASS_STATS
222 first_partition_pass_stats[FIRST_PARTITION_PASS_STATS_TABLES];
223
224 // [comp_idx][saved stat_idx]
225 INTERPOLATION_FILTER_STATS interp_filter_stats[2][MAX_INTERP_FILTER_STATS];
226 int interp_filter_stats_idx[2];
227
228 // prune_comp_search_by_single_result (3:MAX_REF_MV_SERCH)
229 SimpleRDState simple_rd_state[SINGLE_REF_MODES][3];
230
231 // Activate constrained coding block partition search range.
232 int use_cb_search_range;
233
234 // Inter macroblock RD search info.
235 MB_RD_RECORD mb_rd_record;
236
237 // Inter transform block RD search info. for square TX sizes.
238 TXB_RD_RECORD txb_rd_record_8X8[(MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1)];
239 TXB_RD_RECORD txb_rd_record_16X16[(MAX_MIB_SIZE >> 2) * (MAX_MIB_SIZE >> 2)];
240 TXB_RD_RECORD txb_rd_record_32X32[(MAX_MIB_SIZE >> 3) * (MAX_MIB_SIZE >> 3)];
241 TXB_RD_RECORD txb_rd_record_64X64[(MAX_MIB_SIZE >> 4) * (MAX_MIB_SIZE >> 4)];
242
243 // Intra transform block RD search info. for square TX sizes.
244 TXB_RD_RECORD txb_rd_record_intra;
245
246 MACROBLOCKD e_mbd;
247 MB_MODE_INFO_EXT *mbmi_ext;
248 int skip_block;
249 int qindex;
250
251 // The equivalent error at the current rdmult of one whole bit (not one
252 // bitcost unit).
253 int errorperbit;
254 // The equivalend SAD error of one (whole) bit at the current quantizer
255 // for large blocks.
256 int sadperbit16;
257 // The equivalend SAD error of one (whole) bit at the current quantizer
258 // for sub-8x8 blocks.
259 int sadperbit4;
260 int rdmult;
261 int cb_rdmult;
262 int mb_energy;
263 int sb_energy_level;
264 int *m_search_count_ptr;
265 int *ex_search_count_ptr;
266
267 unsigned int txb_split_count;
268 #if CONFIG_SPEED_STATS
269 unsigned int tx_search_count;
270 #endif // CONFIG_SPEED_STATS
271
272 // These are set to their default values at the beginning, and then adjusted
273 // further in the encoding process.
274 BLOCK_SIZE min_partition_size;
275 BLOCK_SIZE max_partition_size;
276
277 unsigned int max_mv_context[REF_FRAMES];
278 unsigned int source_variance;
279 unsigned int simple_motion_pred_sse;
280 unsigned int pred_sse[REF_FRAMES];
281 int pred_mv_sad[REF_FRAMES];
282
283 int nmv_vec_cost[MV_JOINTS];
284 int *nmvcost[2];
285 int *nmvcost_hp[2];
286 int **mv_cost_stack;
287
288 int32_t *wsrc_buf;
289 int32_t *mask_buf;
290 uint8_t *above_pred_buf;
291 uint8_t *left_pred_buf;
292
293 PALETTE_BUFFER *palette_buffer;
294
295 CONV_BUF_TYPE *tmp_conv_dst;
296 uint8_t *tmp_obmc_bufs[2];
297
298 FRAME_CONTEXT *row_ctx;
299 // This context will be used to update color_map_cdf pointer which would be
300 // used during pack bitstream. For single thread and tile-multithreading case
301 // this ponter will be same as xd->tile_ctx, but for the case of row-mt:
302 // xd->tile_ctx will point to a temporary context while tile_pb_ctx will point
303 // to the accurate tile context.
304 FRAME_CONTEXT *tile_pb_ctx;
305
306 struct inter_modes_info *inter_modes_info;
307
308 // buffer for hash value calculation of a block
309 // used only in av1_get_block_hash_value()
310 // [first hash/second hash]
311 // [two buffers used ping-pong]
312 uint32_t *hash_value_buffer[2][2];
313
314 CRC_CALCULATOR crc_calculator1;
315 CRC_CALCULATOR crc_calculator2;
316 int g_crc_initialized;
317
318 // These define limits to motion vector components to prevent them
319 // from extending outside the UMV borders
320 MvLimits mv_limits;
321
322 uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
323
324 int skip;
325 int skip_chroma_rd;
326 int skip_cost[SKIP_CONTEXTS][2];
327
328 int skip_mode; // 0: off; 1: on
329 int skip_mode_cost[SKIP_CONTEXTS][2];
330
331 int compound_idx;
332
333 LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES];
334 LV_MAP_EOB_COST eob_costs[7][2];
335 uint16_t cb_offset;
336
337 // mode costs
338 int intra_inter_cost[INTRA_INTER_CONTEXTS][2];
339
340 int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES];
341 int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2];
342 int zeromv_mode_cost[GLOBALMV_MODE_CONTEXTS][2];
343 int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
344 int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
345
346 int comp_inter_cost[COMP_INTER_CONTEXTS][2];
347 int single_ref_cost[REF_CONTEXTS][SINGLE_REFS - 1][2];
348 int comp_ref_type_cost[COMP_REF_TYPE_CONTEXTS]
349 [CDF_SIZE(COMP_REFERENCE_TYPES)];
350 int uni_comp_ref_cost[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1]
351 [CDF_SIZE(2)];
352 // Cost for signaling ref_frame[0] (LAST_FRAME, LAST2_FRAME, LAST3_FRAME or
353 // GOLDEN_FRAME) in bidir-comp mode.
354 int comp_ref_cost[REF_CONTEXTS][FWD_REFS - 1][2];
355 // Cost for signaling ref_frame[1] (ALTREF_FRAME, ALTREF2_FRAME, or
356 // BWDREF_FRAME) in bidir-comp mode.
357 int comp_bwdref_cost[REF_CONTEXTS][BWD_REFS - 1][2];
358 int inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
359 int compound_type_cost[BLOCK_SIZES_ALL][MASKED_COMPOUND_TYPES];
360 int wedge_idx_cost[BLOCK_SIZES_ALL][16];
361 int interintra_cost[BLOCK_SIZE_GROUPS][2];
362 int wedge_interintra_cost[BLOCK_SIZES_ALL][2];
363 int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
364 int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES];
365 int motion_mode_cost1[BLOCK_SIZES_ALL][2];
366 int intra_uv_mode_cost[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES];
367 int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
368 int filter_intra_cost[BLOCK_SIZES_ALL][2];
369 int filter_intra_mode_cost[FILTER_INTRA_MODES];
370 int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
371 int partition_cost[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
372 int palette_y_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
373 int palette_uv_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
374 int palette_y_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
375 [PALETTE_COLORS];
376 int palette_uv_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
377 [PALETTE_COLORS];
378 int palette_y_mode_cost[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2];
379 int palette_uv_mode_cost[PALETTE_UV_MODE_CONTEXTS][2];
380 // The rate associated with each alpha codeword
381 int cfl_cost[CFL_JOINT_SIGNS][CFL_PRED_PLANES][CFL_ALPHABET_SIZE];
382 int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
383 int txfm_partition_cost[TXFM_PARTITION_CONTEXTS][2];
384 int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
385 int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
386 [TX_TYPES];
387 int angle_delta_cost[DIRECTIONAL_MODES][2 * MAX_ANGLE_DELTA + 1];
388 int switchable_restore_cost[RESTORE_SWITCHABLE_TYPES];
389 int wiener_restore_cost[2];
390 int sgrproj_restore_cost[2];
391 int intrabc_cost[2];
392
393 // Used to store sub partition's choices.
394 MV pred_mv[REF_FRAMES];
395
396 // Store the best motion vector during motion search
397 int_mv best_mv;
398 // Store the second best motion vector during full-pixel motion search
399 int_mv second_best_mv;
400
401 // Store the fractional best motion vector during sub/Qpel-pixel motion search
402 int_mv fractional_best_mv[3];
403
404 // Ref frames that are selected by square partition blocks within a super-
405 // block, in MI resolution. They can be used to prune ref frames for
406 // rectangular blocks.
407 int picked_ref_frames_mask[32 * 32];
408
409 // use default transform and skip transform type search for intra modes
410 int use_default_intra_tx_type;
411 // use default transform and skip transform type search for inter modes
412 int use_default_inter_tx_type;
413 #if CONFIG_DIST_8X8
414 int using_dist_8x8;
415 aom_tune_metric tune_metric;
416 #endif // CONFIG_DIST_8X8
417 int comp_idx_cost[COMP_INDEX_CONTEXTS][2];
418 int comp_group_idx_cost[COMP_GROUP_IDX_CONTEXTS][2];
419 // Bit flags for pruning tx type search, tx split, etc.
420 int tx_search_prune[EXT_TX_SET_TYPES];
421 int must_find_valid_partition;
422 int tx_split_prune_flag; // Flag to skip tx split RD search.
423 int recalc_luma_mc_data; // Flag to indicate recalculation of MC data during
424 // interpolation filter search
425 // The likelihood of an edge existing in the block (using partial Canny edge
426 // detection). For reference, 556 is the value returned for a solid
427 // vertical black/white edge.
428 uint16_t edge_strength;
429 // The strongest edge strength seen along the x/y axis.
430 uint16_t edge_strength_x;
431 uint16_t edge_strength_y;
432
433 // [Saved stat index]
434 COMP_RD_STATS comp_rd_stats[MAX_COMP_RD_STATS];
435 int comp_rd_stats_idx;
436 };
437
is_rect_tx_allowed_bsize(BLOCK_SIZE bsize)438 static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
439 static const char LUT[BLOCK_SIZES_ALL] = {
440 0, // BLOCK_4X4
441 1, // BLOCK_4X8
442 1, // BLOCK_8X4
443 0, // BLOCK_8X8
444 1, // BLOCK_8X16
445 1, // BLOCK_16X8
446 0, // BLOCK_16X16
447 1, // BLOCK_16X32
448 1, // BLOCK_32X16
449 0, // BLOCK_32X32
450 1, // BLOCK_32X64
451 1, // BLOCK_64X32
452 0, // BLOCK_64X64
453 0, // BLOCK_64X128
454 0, // BLOCK_128X64
455 0, // BLOCK_128X128
456 1, // BLOCK_4X16
457 1, // BLOCK_16X4
458 1, // BLOCK_8X32
459 1, // BLOCK_32X8
460 1, // BLOCK_16X64
461 1, // BLOCK_64X16
462 };
463
464 return LUT[bsize];
465 }
466
is_rect_tx_allowed(const MACROBLOCKD * xd,const MB_MODE_INFO * mbmi)467 static INLINE int is_rect_tx_allowed(const MACROBLOCKD *xd,
468 const MB_MODE_INFO *mbmi) {
469 return is_rect_tx_allowed_bsize(mbmi->sb_type) &&
470 !xd->lossless[mbmi->segment_id];
471 }
472
tx_size_to_depth(TX_SIZE tx_size,BLOCK_SIZE bsize)473 static INLINE int tx_size_to_depth(TX_SIZE tx_size, BLOCK_SIZE bsize) {
474 TX_SIZE ctx_size = max_txsize_rect_lookup[bsize];
475 int depth = 0;
476 while (tx_size != ctx_size) {
477 depth++;
478 ctx_size = sub_tx_size_map[ctx_size];
479 assert(depth <= MAX_TX_DEPTH);
480 }
481 return depth;
482 }
483
set_blk_skip(MACROBLOCK * x,int plane,int blk_idx,int skip)484 static INLINE void set_blk_skip(MACROBLOCK *x, int plane, int blk_idx,
485 int skip) {
486 if (skip)
487 x->blk_skip[blk_idx] |= 1UL << plane;
488 else
489 x->blk_skip[blk_idx] &= ~(1UL << plane);
490 #ifndef NDEBUG
491 // Set chroma planes to uninitialized states when luma is set to check if
492 // it will be set later
493 if (plane == 0) {
494 x->blk_skip[blk_idx] |= 1UL << (1 + 4);
495 x->blk_skip[blk_idx] |= 1UL << (2 + 4);
496 }
497
498 // Clear the initialization checking bit
499 x->blk_skip[blk_idx] &= ~(1UL << (plane + 4));
500 #endif
501 }
502
is_blk_skip(MACROBLOCK * x,int plane,int blk_idx)503 static INLINE int is_blk_skip(MACROBLOCK *x, int plane, int blk_idx) {
504 #ifndef NDEBUG
505 // Check if this is initialized
506 assert(!(x->blk_skip[blk_idx] & (1UL << (plane + 4))));
507
508 // The magic number is 0x77, this is to test if there is garbage data
509 assert((x->blk_skip[blk_idx] & 0x88) == 0);
510 #endif
511 return (x->blk_skip[blk_idx] >> plane) & 1;
512 }
513
514 #ifdef __cplusplus
515 } // extern "C"
516 #endif
517
518 #endif // AOM_AV1_ENCODER_BLOCK_H_
519