1 /*
2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
13 #define AOM_AV1_ENCODER_NONRD_OPT_H_
14
15 #include "av1/encoder/context_tree.h"
16 #include "av1/encoder/rdopt_utils.h"
17 #include "av1/encoder/rdopt.h"
18
19 #define RTC_INTER_MODES (4)
20 #define RTC_INTRA_MODES (4)
21 #define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
22 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
23 #define NUM_COMP_INTER_MODES_RT (6)
24 #define NUM_INTER_MODES 12
25 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
26 (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
27 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
28 #define FILTER_SEARCH_SIZE 2
29 #if !CONFIG_REALTIME_ONLY
30 #define MOTION_MODE_SEARCH_SIZE 2
31 #endif
32
33 extern int g_pick_inter_mode_cnt;
34 /*!\cond */
35 typedef struct {
36 uint8_t *data;
37 int stride;
38 int in_use;
39 } PRED_BUFFER;
40
41 typedef struct {
42 PRED_BUFFER *best_pred;
43 PREDICTION_MODE best_mode;
44 TX_SIZE best_tx_size;
45 TX_TYPE tx_type;
46 MV_REFERENCE_FRAME best_ref_frame;
47 MV_REFERENCE_FRAME best_second_ref_frame;
48 uint8_t best_mode_skip_txfm;
49 uint8_t best_mode_initial_skip_flag;
50 int_interpfilters best_pred_filter;
51 MOTION_MODE best_motion_mode;
52 WarpedMotionParams wm_params;
53 int num_proj_ref;
54 PALETTE_MODE_INFO pmi;
55 int64_t best_sse;
56 } BEST_PICKMODE;
57
58 typedef struct {
59 MV_REFERENCE_FRAME ref_frame;
60 PREDICTION_MODE pred_mode;
61 } REF_MODE;
62
63 typedef struct {
64 MV_REFERENCE_FRAME ref_frame[2];
65 PREDICTION_MODE pred_mode;
66 } COMP_REF_MODE;
67
68 struct estimate_block_intra_args {
69 AV1_COMP *cpi;
70 MACROBLOCK *x;
71 PREDICTION_MODE mode;
72 int skippable;
73 RD_STATS *rdc;
74 unsigned int best_sad;
75 bool prune_mode_based_on_sad;
76 bool prune_palette_sad;
77 };
78 /*!\endcond */
79
80 /*!\brief Structure to store parameters and statistics used in non-rd inter mode
81 * evaluation.
82 */
83 typedef struct {
84 //! Structure to hold best inter mode data
85 BEST_PICKMODE best_pickmode;
86 //! Structure to RD cost of current mode
87 RD_STATS this_rdc;
88 //! Pointer to the RD Cost for the best mode found so far
89 RD_STATS best_rdc;
90 //! Distortion of chroma planes for all modes and reference frames
91 int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
92 //! Buffer to hold predicted block for all reference frames and planes
93 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
94 //! Array to hold variance of all modes and reference frames
95 unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
96 //! Array to hold ref cost of single reference mode for all ref frames
97 unsigned int ref_costs_single[REF_FRAMES];
98 //! Array to hold motion vector for all modes and reference frames
99 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
100 //! Array to hold best mv for all modes and reference frames
101 int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
102 //! Array to hold inter mode cost of single ref mode for all ref frames
103 int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
104 //! Array to hold use reference frame mask for each reference frame
105 int use_ref_frame_mask[REF_FRAMES];
106 //! Array to hold flags of evaluated modes for each reference frame
107 uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
108 //! Array to hold flag indicating if scaled reference frame is used.
109 bool use_scaled_ref_frame[REF_FRAMES];
110 } InterModeSearchStateNonrd;
111
112 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
113 2, 2, 3, 3, 3, 4,
114 4, 4, 5, 5 };
115 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
116 2, 3, 2, 3, 4, 3,
117 4, 5, 4, 5 };
118
119 static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
120 SMOOTH_PRED };
121
122 static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
123 NEWMV };
124
125 static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
126 { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
127 { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
128 { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
129 { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
130 { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
131 { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
132 { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
133 { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
134 };
135
136 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
137 // mode
138 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
139 { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
140 { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
141 { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
142 { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
143 { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
144 { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
145 };
146
147 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
148 { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
149 { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
150 { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
151 { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
152 { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
153 { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
154 };
155
156 static const int_interpfilters filters_ref_set[9] = {
157 [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
158 [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
159 [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
160 [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
161 [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
162 [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
163 [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
164 [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
165 [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
166 };
167
168 enum {
169 // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
170 INTER_NEAREST = (1 << NEARESTMV),
171 INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
172 INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
173 INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
174 };
175
176 // The original scan order (default_scan_8x8) is modified according to the extra
177 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
178 // aom_hadamard_8x8_c.
179 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
180 0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
181 33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
182 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
183 23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
184 };
185
186 // The original scan order (av1_default_iscan_8x8) is modified to match
187 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
188 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
189 // order of coefficients, such that the normal scan order is no longer
190 // guaranteed to scan low coefficients first, therefore we modify the scan order
191 // accordingly.
192 // Note that this one has to be used together with default_scan_8x8_transpose.
193 DECLARE_ALIGNED(16, static const int16_t,
194 av1_default_iscan_8x8_transpose[64]) = {
195 0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36,
196 5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49,
197 14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
198 27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
199 };
200
201 // The original scan order (default_scan_16x16) is modified according to the
202 // extra transpose in hadamard c implementation in lp case, i.e.,
203 // aom_hadamard_lp_16x16_c.
204 DECLARE_ALIGNED(16, static const int16_t,
205 default_scan_lp_16x16_transpose[256]) = {
206 0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32,
207 40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50,
208 44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1,
209 9, 3, 60, 54, 104, 98, 92, 86, 136, 130, 132, 138, 144, 94, 100,
210 106, 112, 62, 5, 11, 17, 25, 19, 13, 7, 120, 114, 108, 102, 152,
211 146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65, 15, 21, 27,
212 33, 41, 35, 29, 23, 73, 67, 124, 118, 168, 162, 156, 150, 200, 194,
213 196, 202, 208, 158, 164, 170, 176, 126, 69, 75, 81, 31, 37, 43, 49,
214 57, 51, 45, 39, 89, 83, 77, 71, 184, 178, 172, 166, 216, 210, 204,
215 198, 206, 212, 218, 224, 174, 180, 186, 129, 79, 85, 91, 97, 47, 53,
216 59, 61, 55, 105, 99, 93, 87, 137, 131, 188, 182, 232, 226, 220, 214,
217 222, 228, 234, 240, 190, 133, 139, 145, 95, 101, 107, 113, 63, 121, 115,
218 109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
219 149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
220 246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
221 211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
222 215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
223 255
224 };
225
226 #if CONFIG_AV1_HIGHBITDEPTH
227 // The original scan order (default_scan_16x16) is modified according to the
228 // extra shift in hadamard c implementation in fp case, i.e.,
229 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
230 // outputs, so we handle them separately.
231 DECLARE_ALIGNED(16, static const int16_t,
232 default_scan_fp_16x16_transpose[256]) = {
233 0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32,
234 36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50,
235 44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1,
236 5, 3, 60, 58, 100, 98, 92, 90, 132, 130, 136, 134, 144, 94, 104,
237 102, 112, 62, 9, 7, 17, 21, 19, 13, 11, 116, 114, 108, 106, 148,
238 146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65, 15, 25, 23,
239 33, 37, 35, 29, 27, 69, 67, 124, 122, 164, 162, 156, 154, 196, 194,
240 200, 198, 208, 158, 168, 166, 176, 126, 73, 71, 81, 31, 41, 39, 49,
241 53, 51, 45, 43, 85, 83, 77, 75, 180, 178, 172, 170, 212, 210, 204,
242 202, 206, 216, 214, 224, 174, 184, 182, 129, 79, 89, 87, 97, 47, 57,
243 55, 61, 59, 101, 99, 93, 91, 133, 131, 188, 186, 228, 226, 220, 218,
244 222, 232, 230, 240, 190, 137, 135, 145, 95, 105, 103, 113, 63, 117, 115,
245 109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
246 153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
247 250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
248 211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
249 219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
250 255
251 };
252 #endif
253
254 // The original scan order (av1_default_iscan_16x16) is modified to match
255 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
256 // Since hadamard AVX2 implementation will modify the order of coefficients,
257 // such that the normal scan order is no longer guaranteed to scan low
258 // coefficients first, therefore we modify the scan order accordingly. Note that
259 // this one has to be used together with default_scan_lp_16x16_transpose.
260 DECLARE_ALIGNED(16, static const int16_t,
261 av1_default_iscan_lp_16x16_transpose[256]) = {
262 0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11,
263 87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93,
264 24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30,
265 122, 41, 148, 27, 119, 29, 121, 42, 149, 48, 152, 28, 120, 43, 150,
266 47, 151, 62, 177, 10, 86, 20, 96, 21, 113, 35, 127, 19, 95, 22,
267 114, 34, 126, 37, 144, 23, 115, 33, 125, 38, 145, 52, 156, 32, 124,
268 39, 146, 51, 155, 58, 173, 40, 147, 50, 154, 59, 174, 73, 181, 49,
269 153, 60, 175, 72, 180, 83, 198, 61, 176, 71, 179, 84, 199, 98, 202,
270 70, 178, 85, 200, 97, 201, 112, 219, 36, 143, 54, 158, 55, 170, 77,
271 185, 53, 157, 56, 171, 76, 184, 79, 194, 57, 172, 75, 183, 80, 195,
272 102, 206, 74, 182, 81, 196, 101, 205, 108, 215, 82, 197, 100, 204, 109,
273 216, 131, 223, 99, 203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
274 141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78, 193, 104,
275 208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
276 133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
277 231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
278 168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
279 255
280 };
281
282 #if CONFIG_AV1_HIGHBITDEPTH
283 // The original scan order (av1_default_iscan_16x16) is modified to match
284 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
285 // Since hadamard AVX2 implementation will modify the order of coefficients,
286 // such that the normal scan order is no longer guaranteed to scan low
287 // coefficients first, therefore we modify the scan order accordingly. Note that
288 // this one has to be used together with default_scan_fp_16x16_transpose.
289 DECLARE_ALIGNED(16, static const int16_t,
290 av1_default_iscan_fp_16x16_transpose[256]) = {
291 0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11,
292 87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93,
293 24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30,
294 122, 41, 148, 27, 119, 29, 121, 28, 120, 43, 150, 42, 149, 48, 152,
295 47, 151, 62, 177, 10, 86, 20, 96, 19, 95, 22, 114, 21, 113, 35,
296 127, 34, 126, 37, 144, 23, 115, 33, 125, 32, 124, 39, 146, 38, 145,
297 52, 156, 51, 155, 58, 173, 40, 147, 50, 154, 49, 153, 60, 175, 59,
298 174, 73, 181, 72, 180, 83, 198, 61, 176, 71, 179, 70, 178, 85, 200,
299 84, 199, 98, 202, 97, 201, 112, 219, 36, 143, 54, 158, 53, 157, 56,
300 171, 55, 170, 77, 185, 76, 184, 79, 194, 57, 172, 75, 183, 74, 182,
301 81, 196, 80, 195, 102, 206, 101, 205, 108, 215, 82, 197, 100, 204, 99,
302 203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
303 128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78, 193, 104,
304 208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
305 133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
306 231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
307 168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
308 255
309 };
310 #endif
311
312 // For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
313 // but the fastest way to calculate the IDTX transform (i.e. no transposes)
314 // results in coefficients that are a transposition of the entropy coding
315 // versions. These tables are used as substitute for the scan order for the
316 // faster version of IDTX.
317
318 // Must be used together with av1_fast_idtx_iscan_4x4
319 DECLARE_ALIGNED(16, static const int16_t,
320 av1_fast_idtx_scan_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6,
321 9, 12, 13, 10, 7, 11, 14, 15 };
322
323 // Must be used together with av1_fast_idtx_scan_4x4
324 DECLARE_ALIGNED(16, static const int16_t,
325 av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5, 6, 2, 4, 7, 12,
326 3, 8, 11, 13, 9, 10, 14, 15 };
327
328 static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
329 av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
330 };
331
332 // Must be used together with av1_fast_idtx_iscan_8x8
333 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
334 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
335 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
336 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
337 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
338 };
339
340 // Must be used together with av1_fast_idtx_scan_8x8
341 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
342 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42,
343 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53,
344 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
345 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
346 };
347
348 static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
349 av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
350 };
351
352 // Must be used together with av1_fast_idtx_iscan_16x16
353 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
354 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4,
355 5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22,
356 37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8,
357 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100,
358 85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131,
359 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27,
360 12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
361 224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14,
362 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
363 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46,
364 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
365 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94,
366 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
367 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231,
368 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
369 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
370 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
371 255
372 };
373
374 // Must be used together with av1_fast_idtx_scan_16x16
375 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
376 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119,
377 120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118,
378 121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117,
379 122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116,
380 123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115,
381 124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114,
382 125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113,
383 126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112,
384 127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111,
385 128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110,
386 129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109,
387 130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108,
388 131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107,
389 132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
390 133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
391 134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
392 135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
393 255
394 };
395
396 // Indicates the blocks for which RD model should be based on special logic
get_model_rd_flag(const AV1_COMP * cpi,const MACROBLOCKD * xd,BLOCK_SIZE bsize)397 static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
398 BLOCK_SIZE bsize) {
399 const AV1_COMMON *const cm = &cpi->common;
400 const int large_block = bsize >= BLOCK_32X32;
401 // Only enable for low bitdepth to mitigate issue: b/303023614.
402 return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
403 !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
404 cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
405 }
406 /*!\brief Finds predicted motion vectors for a block.
407 *
408 * \ingroup nonrd_mode_search
409 * \callgraph
410 * \callergraph
411 * Finds predicted motion vectors for a block from a certain reference frame.
412 * First, it fills reference MV stack, then picks the test from the stack and
413 * predicts the final MV for a block for each mode.
414 * \param[in] cpi Top-level encoder structure
415 * \param[in] x Pointer to structure holding all the
416 * data for the current macroblock
417 * \param[in] ref_frame Reference frame for which to find
418 * ref MVs
419 * \param[out] frame_mv Predicted MVs for a block
420 * \param[in] yv12_mb Buffer to hold predicted block
421 * \param[in] bsize Current block size
422 * \param[in] force_skip_low_temp_var Flag indicating possible mode search
423 * prune for low temporal variance block
424 * \param[in] skip_pred_mv Flag indicating to skip av1_mv_pred
425 * \param[out] use_scaled_ref_frame Flag to indicate if scaled reference
426 * frame is used.
427 *
428 * \remark Nothing is returned. Instead, predicted MVs are placed into
429 * \c frame_mv array, and use_scaled_ref_frame is set.
430 */
find_predictors(AV1_COMP * cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],struct buf_2d yv12_mb[8][MAX_MB_PLANE],BLOCK_SIZE bsize,int force_skip_low_temp_var,int skip_pred_mv,bool * use_scaled_ref_frame)431 static inline void find_predictors(
432 AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
433 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
434 struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
435 int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
436 AV1_COMMON *const cm = &cpi->common;
437 MACROBLOCKD *const xd = &x->e_mbd;
438 MB_MODE_INFO *const mbmi = xd->mi[0];
439 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
440 const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
441 const bool ref_is_scaled =
442 ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
443 const YV12_BUFFER_CONFIG *scaled_ref =
444 av1_get_scaled_ref_frame(cpi, ref_frame);
445 const YV12_BUFFER_CONFIG *yv12 =
446 ref_is_scaled && scaled_ref ? scaled_ref : ref;
447 const int num_planes = av1_num_planes(cm);
448 x->pred_mv_sad[ref_frame] = INT_MAX;
449 x->pred_mv0_sad[ref_frame] = INT_MAX;
450 x->pred_mv1_sad[ref_frame] = INT_MAX;
451 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
452 // TODO(kyslov) this needs various further optimizations. to be continued..
453 assert(yv12 != NULL);
454 if (yv12 != NULL) {
455 struct scale_factors *const sf =
456 scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
457 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
458 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
459 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
460 mbmi_ext->mode_context);
461 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
462 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
463 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
464 av1_find_best_ref_mvs_from_stack(
465 cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
466 &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
467 frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
468 // Early exit for non-LAST frame if force_skip_low_temp_var is set.
469 if (!ref_is_scaled && bsize >= BLOCK_8X8 && !skip_pred_mv &&
470 !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
471 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
472 bsize);
473 }
474 }
475 if (cm->features.switchable_motion_mode) {
476 av1_count_overlappable_neighbors(cm, xd);
477 }
478 mbmi->num_proj_ref = 1;
479 *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
480 }
481
init_mbmi_nonrd(MB_MODE_INFO * mbmi,PREDICTION_MODE pred_mode,MV_REFERENCE_FRAME ref_frame0,MV_REFERENCE_FRAME ref_frame1,const AV1_COMMON * cm)482 static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
483 PREDICTION_MODE pred_mode,
484 MV_REFERENCE_FRAME ref_frame0,
485 MV_REFERENCE_FRAME ref_frame1,
486 const AV1_COMMON *cm) {
487 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
488 mbmi->ref_mv_idx = 0;
489 mbmi->mode = pred_mode;
490 mbmi->uv_mode = UV_DC_PRED;
491 mbmi->ref_frame[0] = ref_frame0;
492 mbmi->ref_frame[1] = ref_frame1;
493 pmi->palette_size[PLANE_TYPE_Y] = 0;
494 pmi->palette_size[PLANE_TYPE_UV] = 0;
495 mbmi->filter_intra_mode_info.use_filter_intra = 0;
496 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
497 mbmi->motion_mode = SIMPLE_TRANSLATION;
498 mbmi->num_proj_ref = 1;
499 mbmi->interintra_mode = 0;
500 set_default_interp_filters(mbmi, cm->features.interp_filter);
501 }
502
init_estimate_block_intra_args(struct estimate_block_intra_args * args,AV1_COMP * cpi,MACROBLOCK * x)503 static inline void init_estimate_block_intra_args(
504 struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
505 args->cpi = cpi;
506 args->x = x;
507 args->mode = DC_PRED;
508 args->skippable = 1;
509 args->rdc = 0;
510 args->best_sad = UINT_MAX;
511 args->prune_mode_based_on_sad = false;
512 args->prune_palette_sad = false;
513 }
514
get_pred_buffer(PRED_BUFFER * p,int len)515 static inline int get_pred_buffer(PRED_BUFFER *p, int len) {
516 for (int buf_idx = 0; buf_idx < len; buf_idx++) {
517 if (!p[buf_idx].in_use) {
518 p[buf_idx].in_use = 1;
519 return buf_idx;
520 }
521 }
522 return -1;
523 }
524
prune_palette_testing_inter(AV1_COMP * cpi,unsigned int source_variance)525 static inline bool prune_palette_testing_inter(AV1_COMP *cpi,
526 unsigned int source_variance) {
527 return (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
528 cpi->oxcf.speed >= 11 && cpi->rc.high_source_sad &&
529 cpi->sf.rt_sf.rc_compute_spatial_var_sc &&
530 cpi->rc.frame_spatial_variance < 1200 &&
531 cpi->rc.perc_spatial_flat_blocks < 5 &&
532 cpi->rc.percent_blocks_with_motion > 98 && source_variance < 4000);
533 }
534
free_pred_buffer(PRED_BUFFER * p)535 static inline void free_pred_buffer(PRED_BUFFER *p) {
536 if (p != NULL) p->in_use = 0;
537 }
538
539 #if CONFIG_INTERNAL_STATS
store_coding_context_nonrd(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index)540 static inline void store_coding_context_nonrd(MACROBLOCK *x,
541 PICK_MODE_CONTEXT *ctx,
542 int mode_index) {
543 #else
544 static inline void store_coding_context_nonrd(MACROBLOCK *x,
545 PICK_MODE_CONTEXT *ctx) {
546 #endif // CONFIG_INTERNAL_STATS
547 MACROBLOCKD *const xd = &x->e_mbd;
548 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
549
550 // Take a snapshot of the coding context so it can be
551 // restored if we decide to encode this way
552 ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
553
554 ctx->skippable = txfm_info->skip_txfm;
555 #if CONFIG_INTERNAL_STATS
556 ctx->best_mode_index = mode_index;
557 #endif // CONFIG_INTERNAL_STATS
558 ctx->mic = *xd->mi[0];
559 ctx->skippable = txfm_info->skip_txfm;
560 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
561 av1_ref_frame_type(xd->mi[0]->ref_frame));
562 }
563
564 void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
565 BLOCK_SIZE bsize, TX_SIZE tx_size);
566
567 void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
568 int pred_stride, RD_STATS *this_rdc, int *skippable,
569 BLOCK_SIZE bsize, TX_SIZE tx_size);
570
571 int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
572 MACROBLOCK *x, MACROBLOCKD *xd,
573 RD_STATS *this_rdc, int start_plane,
574 int stop_plane);
575
576 void av1_estimate_block_intra(int plane, int block, int row, int col,
577 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
578 void *arg);
579
580 void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
581 int best_early_term, unsigned int ref_cost_intra,
582 int reuse_prediction, struct buf_2d *orig_dst,
583 PRED_BUFFER *tmp_buffers,
584 PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
585 BEST_PICKMODE *best_pickmode,
586 PICK_MODE_CONTEXT *ctx,
587 unsigned int *best_sad_norm);
588
589 #endif // AOM_AV1_ENCODER_NONRD_OPT_H_
590