• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
13 #define AOM_AV1_ENCODER_NONRD_OPT_H_
14 
15 #include "av1/encoder/context_tree.h"
16 #include "av1/encoder/rdopt_utils.h"
17 #include "av1/encoder/rdopt.h"
18 
19 #define RTC_INTER_MODES (4)
20 #define RTC_INTRA_MODES (4)
21 #define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
22 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
23 #define NUM_COMP_INTER_MODES_RT (6)
24 #define NUM_INTER_MODES 12
25 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
26   (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
27 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
28 #define FILTER_SEARCH_SIZE 2
29 #if !CONFIG_REALTIME_ONLY
30 #define MOTION_MODE_SEARCH_SIZE 2
31 #endif
32 
33 extern int g_pick_inter_mode_cnt;
34 /*!\cond */
35 typedef struct {
36   uint8_t *data;
37   int stride;
38   int in_use;
39 } PRED_BUFFER;
40 
41 typedef struct {
42   PRED_BUFFER *best_pred;
43   PREDICTION_MODE best_mode;
44   TX_SIZE best_tx_size;
45   TX_TYPE tx_type;
46   MV_REFERENCE_FRAME best_ref_frame;
47   MV_REFERENCE_FRAME best_second_ref_frame;
48   uint8_t best_mode_skip_txfm;
49   uint8_t best_mode_initial_skip_flag;
50   int_interpfilters best_pred_filter;
51   MOTION_MODE best_motion_mode;
52   WarpedMotionParams wm_params;
53   int num_proj_ref;
54   PALETTE_MODE_INFO pmi;
55   int64_t best_sse;
56 } BEST_PICKMODE;
57 
58 typedef struct {
59   MV_REFERENCE_FRAME ref_frame;
60   PREDICTION_MODE pred_mode;
61 } REF_MODE;
62 
63 typedef struct {
64   MV_REFERENCE_FRAME ref_frame[2];
65   PREDICTION_MODE pred_mode;
66 } COMP_REF_MODE;
67 
68 struct estimate_block_intra_args {
69   AV1_COMP *cpi;
70   MACROBLOCK *x;
71   PREDICTION_MODE mode;
72   int skippable;
73   RD_STATS *rdc;
74   unsigned int best_sad;
75   bool prune_mode_based_on_sad;
76   bool prune_palette_sad;
77 };
78 /*!\endcond */
79 
80 /*!\brief Structure to store parameters and statistics used in non-rd inter mode
81  * evaluation.
82  */
83 typedef struct {
84   //! Structure to hold best inter mode data
85   BEST_PICKMODE best_pickmode;
86   //! Structure to RD cost of current mode
87   RD_STATS this_rdc;
88   //! Pointer to the RD Cost for the best mode found so far
89   RD_STATS best_rdc;
90   //! Distortion of chroma planes for all modes and reference frames
91   int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
92   //! Buffer to hold predicted block for all reference frames and planes
93   struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
94   //! Array to hold variance of all modes and reference frames
95   unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
96   //! Array to hold ref cost of single reference mode for all ref frames
97   unsigned int ref_costs_single[REF_FRAMES];
98   //! Array to hold motion vector for all modes and reference frames
99   int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
100   //! Array to hold best mv for all modes and reference frames
101   int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
102   //! Array to hold inter mode cost of single ref mode for all ref frames
103   int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
104   //! Array to hold use reference frame mask for each reference frame
105   int use_ref_frame_mask[REF_FRAMES];
106   //! Array to hold flags of evaluated modes for each reference frame
107   uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
108   //! Array to hold flag indicating if scaled reference frame is used.
109   bool use_scaled_ref_frame[REF_FRAMES];
110 } InterModeSearchStateNonrd;
111 
112 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
113                                                           2, 2, 3, 3, 3, 4,
114                                                           4, 4, 5, 5 };
115 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
116                                                            2, 3, 2, 3, 4, 3,
117                                                            4, 5, 4, 5 };
118 
119 static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
120                                                    SMOOTH_PRED };
121 
122 static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
123                                                    NEWMV };
124 
125 static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
126   { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
127   { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
128   { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
129   { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
130   { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
131   { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
132   { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
133   { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
134 };
135 
136 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
137 // mode
138 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
139   { LAST_FRAME, NEARESTMV },   { LAST_FRAME, NEARMV },
140   { LAST_FRAME, GLOBALMV },    { LAST_FRAME, NEWMV },
141   { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
142   { GOLDEN_FRAME, GLOBALMV },  { GOLDEN_FRAME, NEWMV },
143   { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
144   { ALTREF_FRAME, GLOBALMV },  { ALTREF_FRAME, NEWMV },
145 };
146 
147 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
148   { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
149   { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
150   { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
151   { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
152   { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
153   { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
154 };
155 
156 static const int_interpfilters filters_ref_set[9] = {
157   [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
158   [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
159   [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
160   [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
161   [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
162   [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
163   [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
164   [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
165   [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
166 };
167 
168 enum {
169   //  INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
170   INTER_NEAREST = (1 << NEARESTMV),
171   INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
172   INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
173   INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
174 };
175 
176 // The original scan order (default_scan_8x8) is modified according to the extra
177 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
178 // aom_hadamard_8x8_c.
179 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
180   0,  8,  1,  2,  9,  16, 24, 17, 10, 3,  4,  11, 18, 25, 32, 40,
181   33, 26, 19, 12, 5,  6,  13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
182   28, 21, 14, 7,  15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
183   23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
184 };
185 
186 // The original scan order (av1_default_iscan_8x8) is modified to match
187 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
188 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
189 // order of coefficients, such that the normal scan order is no longer
190 // guaranteed to scan low coefficients first, therefore we modify the scan order
191 // accordingly.
192 // Note that this one has to be used together with default_scan_8x8_transpose.
193 DECLARE_ALIGNED(16, static const int16_t,
194                 av1_default_iscan_8x8_transpose[64]) = {
195   0,  2,  3,  9,  10, 20, 21, 35, 1,  4,  8,  11, 19, 22, 34, 36,
196   5,  7,  12, 18, 23, 33, 37, 48, 6,  13, 17, 24, 32, 38, 47, 49,
197   14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
198   27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
199 };
200 
201 // The original scan order (default_scan_16x16) is modified according to the
202 // extra transpose in hadamard c implementation in lp case, i.e.,
203 // aom_hadamard_lp_16x16_c.
204 DECLARE_ALIGNED(16, static const int16_t,
205                 default_scan_lp_16x16_transpose[256]) = {
206   0,   8,   2,   4,   10,  16,  24,  18,  12,  6,   64,  14,  20,  26,  32,
207   40,  34,  28,  22,  72,  66,  68,  74,  80,  30,  36,  42,  48,  56,  50,
208   44,  38,  88,  82,  76,  70,  128, 78,  84,  90,  96,  46,  52,  58,  1,
209   9,   3,   60,  54,  104, 98,  92,  86,  136, 130, 132, 138, 144, 94,  100,
210   106, 112, 62,  5,   11,  17,  25,  19,  13,  7,   120, 114, 108, 102, 152,
211   146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65,  15,  21,  27,
212   33,  41,  35,  29,  23,  73,  67,  124, 118, 168, 162, 156, 150, 200, 194,
213   196, 202, 208, 158, 164, 170, 176, 126, 69,  75,  81,  31,  37,  43,  49,
214   57,  51,  45,  39,  89,  83,  77,  71,  184, 178, 172, 166, 216, 210, 204,
215   198, 206, 212, 218, 224, 174, 180, 186, 129, 79,  85,  91,  97,  47,  53,
216   59,  61,  55,  105, 99,  93,  87,  137, 131, 188, 182, 232, 226, 220, 214,
217   222, 228, 234, 240, 190, 133, 139, 145, 95,  101, 107, 113, 63,  121, 115,
218   109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
219   149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
220   246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
221   211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
222   215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
223   255
224 };
225 
226 #if CONFIG_AV1_HIGHBITDEPTH
227 // The original scan order (default_scan_16x16) is modified according to the
228 // extra shift in hadamard c implementation in fp case, i.e.,
229 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
230 // outputs, so we handle them separately.
231 DECLARE_ALIGNED(16, static const int16_t,
232                 default_scan_fp_16x16_transpose[256]) = {
233   0,   4,   2,   8,   6,   16,  20,  18,  12,  10,  64,  14,  24,  22,  32,
234   36,  34,  28,  26,  68,  66,  72,  70,  80,  30,  40,  38,  48,  52,  50,
235   44,  42,  84,  82,  76,  74,  128, 78,  88,  86,  96,  46,  56,  54,  1,
236   5,   3,   60,  58,  100, 98,  92,  90,  132, 130, 136, 134, 144, 94,  104,
237   102, 112, 62,  9,   7,   17,  21,  19,  13,  11,  116, 114, 108, 106, 148,
238   146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65,  15,  25,  23,
239   33,  37,  35,  29,  27,  69,  67,  124, 122, 164, 162, 156, 154, 196, 194,
240   200, 198, 208, 158, 168, 166, 176, 126, 73,  71,  81,  31,  41,  39,  49,
241   53,  51,  45,  43,  85,  83,  77,  75,  180, 178, 172, 170, 212, 210, 204,
242   202, 206, 216, 214, 224, 174, 184, 182, 129, 79,  89,  87,  97,  47,  57,
243   55,  61,  59,  101, 99,  93,  91,  133, 131, 188, 186, 228, 226, 220, 218,
244   222, 232, 230, 240, 190, 137, 135, 145, 95,  105, 103, 113, 63,  117, 115,
245   109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
246   153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
247   250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
248   211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
249   219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
250   255
251 };
252 #endif
253 
254 // The original scan order (av1_default_iscan_16x16) is modified to match
255 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
256 // Since hadamard AVX2 implementation will modify the order of coefficients,
257 // such that the normal scan order is no longer guaranteed to scan low
258 // coefficients first, therefore we modify the scan order accordingly. Note that
259 // this one has to be used together with default_scan_lp_16x16_transpose.
260 DECLARE_ALIGNED(16, static const int16_t,
261                 av1_default_iscan_lp_16x16_transpose[256]) = {
262   0,   44,  2,   46,  3,   63,  9,   69,  1,   45,  4,   64,  8,   68,  11,
263   87,  5,   65,  7,   67,  12,  88,  18,  94,  6,   66,  13,  89,  17,  93,
264   24,  116, 14,  90,  16,  92,  25,  117, 31,  123, 15,  91,  26,  118, 30,
265   122, 41,  148, 27,  119, 29,  121, 42,  149, 48,  152, 28,  120, 43,  150,
266   47,  151, 62,  177, 10,  86,  20,  96,  21,  113, 35,  127, 19,  95,  22,
267   114, 34,  126, 37,  144, 23,  115, 33,  125, 38,  145, 52,  156, 32,  124,
268   39,  146, 51,  155, 58,  173, 40,  147, 50,  154, 59,  174, 73,  181, 49,
269   153, 60,  175, 72,  180, 83,  198, 61,  176, 71,  179, 84,  199, 98,  202,
270   70,  178, 85,  200, 97,  201, 112, 219, 36,  143, 54,  158, 55,  170, 77,
271   185, 53,  157, 56,  171, 76,  184, 79,  194, 57,  172, 75,  183, 80,  195,
272   102, 206, 74,  182, 81,  196, 101, 205, 108, 215, 82,  197, 100, 204, 109,
273   216, 131, 223, 99,  203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
274   141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78,  193, 104,
275   208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
276   133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
277   231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
278   168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
279   255
280 };
281 
282 #if CONFIG_AV1_HIGHBITDEPTH
283 // The original scan order (av1_default_iscan_16x16) is modified to match
284 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
285 // Since hadamard AVX2 implementation will modify the order of coefficients,
286 // such that the normal scan order is no longer guaranteed to scan low
287 // coefficients first, therefore we modify the scan order accordingly. Note that
288 // this one has to be used together with default_scan_fp_16x16_transpose.
289 DECLARE_ALIGNED(16, static const int16_t,
290                 av1_default_iscan_fp_16x16_transpose[256]) = {
291   0,   44,  2,   46,  1,   45,  4,   64,  3,   63,  9,   69,  8,   68,  11,
292   87,  5,   65,  7,   67,  6,   66,  13,  89,  12,  88,  18,  94,  17,  93,
293   24,  116, 14,  90,  16,  92,  15,  91,  26,  118, 25,  117, 31,  123, 30,
294   122, 41,  148, 27,  119, 29,  121, 28,  120, 43,  150, 42,  149, 48,  152,
295   47,  151, 62,  177, 10,  86,  20,  96,  19,  95,  22,  114, 21,  113, 35,
296   127, 34,  126, 37,  144, 23,  115, 33,  125, 32,  124, 39,  146, 38,  145,
297   52,  156, 51,  155, 58,  173, 40,  147, 50,  154, 49,  153, 60,  175, 59,
298   174, 73,  181, 72,  180, 83,  198, 61,  176, 71,  179, 70,  178, 85,  200,
299   84,  199, 98,  202, 97,  201, 112, 219, 36,  143, 54,  158, 53,  157, 56,
300   171, 55,  170, 77,  185, 76,  184, 79,  194, 57,  172, 75,  183, 74,  182,
301   81,  196, 80,  195, 102, 206, 101, 205, 108, 215, 82,  197, 100, 204, 99,
302   203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
303   128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78,  193, 104,
304   208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
305   133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
306   231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
307   168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
308   255
309 };
310 #endif
311 
312 // For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
313 // but the fastest way to calculate the IDTX transform (i.e. no transposes)
314 // results in coefficients that are a transposition of the entropy coding
315 // versions. These tables are used as substitute for the scan order for the
316 // faster version of IDTX.
317 
318 // Must be used together with av1_fast_idtx_iscan_4x4
319 DECLARE_ALIGNED(16, static const int16_t,
320                 av1_fast_idtx_scan_4x4[16]) = { 0, 1,  4,  8,  5, 2,  3,  6,
321                                                 9, 12, 13, 10, 7, 11, 14, 15 };
322 
323 // Must be used together with av1_fast_idtx_scan_4x4
324 DECLARE_ALIGNED(16, static const int16_t,
325                 av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5,  6,  2, 4,  7,  12,
326                                                  3, 8, 11, 13, 9, 10, 14, 15 };
327 
328 static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
329   av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
330 };
331 
332 // Must be used together with av1_fast_idtx_iscan_8x8
333 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
334   0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
335   12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
336   35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
337   58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
338 };
339 
340 // Must be used together with av1_fast_idtx_scan_8x8
341 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
342   0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42,
343   3,  8,  12, 17, 25, 30, 41, 43, 9,  11, 18, 24, 31, 40, 44, 53,
344   10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
345   21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
346 };
347 
348 static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
349   av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
350 };
351 
352 // Must be used together with av1_fast_idtx_iscan_16x16
353 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
354   0,   1,   16,  32,  17,  2,   3,   18,  33,  48,  64,  49,  34,  19,  4,
355   5,   20,  35,  50,  65,  80,  96,  81,  66,  51,  36,  21,  6,   7,   22,
356   37,  52,  67,  82,  97,  112, 128, 113, 98,  83,  68,  53,  38,  23,  8,
357   9,   24,  39,  54,  69,  84,  99,  114, 129, 144, 160, 145, 130, 115, 100,
358   85,  70,  55,  40,  25,  10,  11,  26,  41,  56,  71,  86,  101, 116, 131,
359   146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87,  72,  57,  42,  27,
360   12,  13,  28,  43,  58,  73,  88,  103, 118, 133, 148, 163, 178, 193, 208,
361   224, 209, 194, 179, 164, 149, 134, 119, 104, 89,  74,  59,  44,  29,  14,
362   15,  30,  45,  60,  75,  90,  105, 120, 135, 150, 165, 180, 195, 210, 225,
363   240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91,  76,  61,  46,
364   31,  47,  62,  77,  92,  107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
365   243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93,  78,  63,  79,  94,
366   109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
367   170, 155, 140, 125, 110, 95,  111, 126, 141, 156, 171, 186, 201, 216, 231,
368   246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
369   218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
370   250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
371   255
372 };
373 
374 // Must be used together with av1_fast_idtx_scan_16x16
375 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
376   0,   1,   5,   6,   14,  15,  27,  28,  44,  45,  65,  66,  90,  91,  119,
377   120, 2,   4,   7,   13,  16,  26,  29,  43,  46,  64,  67,  89,  92,  118,
378   121, 150, 3,   8,   12,  17,  25,  30,  42,  47,  63,  68,  88,  93,  117,
379   122, 149, 151, 9,   11,  18,  24,  31,  41,  48,  62,  69,  87,  94,  116,
380   123, 148, 152, 177, 10,  19,  23,  32,  40,  49,  61,  70,  86,  95,  115,
381   124, 147, 153, 176, 178, 20,  22,  33,  39,  50,  60,  71,  85,  96,  114,
382   125, 146, 154, 175, 179, 200, 21,  34,  38,  51,  59,  72,  84,  97,  113,
383   126, 145, 155, 174, 180, 199, 201, 35,  37,  52,  58,  73,  83,  98,  112,
384   127, 144, 156, 173, 181, 198, 202, 219, 36,  53,  57,  74,  82,  99,  111,
385   128, 143, 157, 172, 182, 197, 203, 218, 220, 54,  56,  75,  81,  100, 110,
386   129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55,  76,  80,  101, 109,
387   130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77,  79,  102, 108,
388   131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78,  103, 107,
389   132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
390   133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
391   134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
392   135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
393   255
394 };
395 
396 // Indicates the blocks for which RD model should be based on special logic
get_model_rd_flag(const AV1_COMP * cpi,const MACROBLOCKD * xd,BLOCK_SIZE bsize)397 static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
398                                     BLOCK_SIZE bsize) {
399   const AV1_COMMON *const cm = &cpi->common;
400   const int large_block = bsize >= BLOCK_32X32;
401   // Only enable for low bitdepth to mitigate issue: b/303023614.
402   return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
403          !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
404          cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
405 }
406 /*!\brief Finds predicted motion vectors for a block.
407  *
408  * \ingroup nonrd_mode_search
409  * \callgraph
410  * \callergraph
411  * Finds predicted motion vectors for a block from a certain reference frame.
412  * First, it fills reference MV stack, then picks the test from the stack and
413  * predicts the final MV for a block for each mode.
414  * \param[in]    cpi                      Top-level encoder structure
415  * \param[in]    x                        Pointer to structure holding all the
416  *                                        data for the current macroblock
417  * \param[in]    ref_frame                Reference frame for which to find
418  *                                        ref MVs
419  * \param[out]   frame_mv                 Predicted MVs for a block
420  * \param[in]    yv12_mb                  Buffer to hold predicted block
421  * \param[in]    bsize                    Current block size
422  * \param[in]    force_skip_low_temp_var  Flag indicating possible mode search
423  *                                        prune for low temporal variance block
424  * \param[in]    skip_pred_mv             Flag indicating to skip av1_mv_pred
425  * \param[out]   use_scaled_ref_frame     Flag to indicate if scaled reference
426  *                                        frame is used.
427  *
428  * \remark Nothing is returned. Instead, predicted MVs are placed into
429  * \c frame_mv array, and use_scaled_ref_frame is set.
430  */
find_predictors(AV1_COMP * cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],struct buf_2d yv12_mb[8][MAX_MB_PLANE],BLOCK_SIZE bsize,int force_skip_low_temp_var,int skip_pred_mv,bool * use_scaled_ref_frame)431 static inline void find_predictors(
432     AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
433     int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
434     struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
435     int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
436   AV1_COMMON *const cm = &cpi->common;
437   MACROBLOCKD *const xd = &x->e_mbd;
438   MB_MODE_INFO *const mbmi = xd->mi[0];
439   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
440   const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
441   const bool ref_is_scaled =
442       ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
443   const YV12_BUFFER_CONFIG *scaled_ref =
444       av1_get_scaled_ref_frame(cpi, ref_frame);
445   const YV12_BUFFER_CONFIG *yv12 =
446       ref_is_scaled && scaled_ref ? scaled_ref : ref;
447   const int num_planes = av1_num_planes(cm);
448   x->pred_mv_sad[ref_frame] = INT_MAX;
449   x->pred_mv0_sad[ref_frame] = INT_MAX;
450   x->pred_mv1_sad[ref_frame] = INT_MAX;
451   frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
452   // TODO(kyslov) this needs various further optimizations. to be continued..
453   assert(yv12 != NULL);
454   if (yv12 != NULL) {
455     struct scale_factors *const sf =
456         scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
457     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
458     av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
459                      xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
460                      mbmi_ext->mode_context);
461     // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
462     // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
463     av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
464     av1_find_best_ref_mvs_from_stack(
465         cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
466         &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
467     frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
468     // Early exit for non-LAST frame if force_skip_low_temp_var is set.
469     if (!ref_is_scaled && bsize >= BLOCK_8X8 && !skip_pred_mv &&
470         !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
471       av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
472                   bsize);
473     }
474   }
475   if (cm->features.switchable_motion_mode) {
476     av1_count_overlappable_neighbors(cm, xd);
477   }
478   mbmi->num_proj_ref = 1;
479   *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
480 }
481 
init_mbmi_nonrd(MB_MODE_INFO * mbmi,PREDICTION_MODE pred_mode,MV_REFERENCE_FRAME ref_frame0,MV_REFERENCE_FRAME ref_frame1,const AV1_COMMON * cm)482 static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
483                                    PREDICTION_MODE pred_mode,
484                                    MV_REFERENCE_FRAME ref_frame0,
485                                    MV_REFERENCE_FRAME ref_frame1,
486                                    const AV1_COMMON *cm) {
487   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
488   mbmi->ref_mv_idx = 0;
489   mbmi->mode = pred_mode;
490   mbmi->uv_mode = UV_DC_PRED;
491   mbmi->ref_frame[0] = ref_frame0;
492   mbmi->ref_frame[1] = ref_frame1;
493   pmi->palette_size[PLANE_TYPE_Y] = 0;
494   pmi->palette_size[PLANE_TYPE_UV] = 0;
495   mbmi->filter_intra_mode_info.use_filter_intra = 0;
496   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
497   mbmi->motion_mode = SIMPLE_TRANSLATION;
498   mbmi->num_proj_ref = 1;
499   mbmi->interintra_mode = 0;
500   set_default_interp_filters(mbmi, cm->features.interp_filter);
501 }
502 
init_estimate_block_intra_args(struct estimate_block_intra_args * args,AV1_COMP * cpi,MACROBLOCK * x)503 static inline void init_estimate_block_intra_args(
504     struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
505   args->cpi = cpi;
506   args->x = x;
507   args->mode = DC_PRED;
508   args->skippable = 1;
509   args->rdc = 0;
510   args->best_sad = UINT_MAX;
511   args->prune_mode_based_on_sad = false;
512   args->prune_palette_sad = false;
513 }
514 
get_pred_buffer(PRED_BUFFER * p,int len)515 static inline int get_pred_buffer(PRED_BUFFER *p, int len) {
516   for (int buf_idx = 0; buf_idx < len; buf_idx++) {
517     if (!p[buf_idx].in_use) {
518       p[buf_idx].in_use = 1;
519       return buf_idx;
520     }
521   }
522   return -1;
523 }
524 
prune_palette_testing_inter(AV1_COMP * cpi,unsigned int source_variance)525 static inline bool prune_palette_testing_inter(AV1_COMP *cpi,
526                                                unsigned int source_variance) {
527   return (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
528           cpi->oxcf.speed >= 11 && cpi->rc.high_source_sad &&
529           cpi->sf.rt_sf.rc_compute_spatial_var_sc &&
530           cpi->rc.frame_spatial_variance < 1200 &&
531           cpi->rc.perc_spatial_flat_blocks < 5 &&
532           cpi->rc.percent_blocks_with_motion > 98 && source_variance < 4000);
533 }
534 
free_pred_buffer(PRED_BUFFER * p)535 static inline void free_pred_buffer(PRED_BUFFER *p) {
536   if (p != NULL) p->in_use = 0;
537 }
538 
539 #if CONFIG_INTERNAL_STATS
store_coding_context_nonrd(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index)540 static inline void store_coding_context_nonrd(MACROBLOCK *x,
541                                               PICK_MODE_CONTEXT *ctx,
542                                               int mode_index) {
543 #else
544 static inline void store_coding_context_nonrd(MACROBLOCK *x,
545                                               PICK_MODE_CONTEXT *ctx) {
546 #endif  // CONFIG_INTERNAL_STATS
547   MACROBLOCKD *const xd = &x->e_mbd;
548   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
549 
550   // Take a snapshot of the coding context so it can be
551   // restored if we decide to encode this way
552   ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
553 
554   ctx->skippable = txfm_info->skip_txfm;
555 #if CONFIG_INTERNAL_STATS
556   ctx->best_mode_index = mode_index;
557 #endif  // CONFIG_INTERNAL_STATS
558   ctx->mic = *xd->mi[0];
559   ctx->skippable = txfm_info->skip_txfm;
560   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
561                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
562 }
563 
564 void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
565                    BLOCK_SIZE bsize, TX_SIZE tx_size);
566 
567 void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
568                         int pred_stride, RD_STATS *this_rdc, int *skippable,
569                         BLOCK_SIZE bsize, TX_SIZE tx_size);
570 
571 int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
572                                MACROBLOCK *x, MACROBLOCKD *xd,
573                                RD_STATS *this_rdc, int start_plane,
574                                int stop_plane);
575 
576 void av1_estimate_block_intra(int plane, int block, int row, int col,
577                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
578                               void *arg);
579 
580 void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
581                              int best_early_term, unsigned int ref_cost_intra,
582                              int reuse_prediction, struct buf_2d *orig_dst,
583                              PRED_BUFFER *tmp_buffers,
584                              PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
585                              BEST_PICKMODE *best_pickmode,
586                              PICK_MODE_CONTEXT *ctx,
587                              unsigned int *best_sad_norm);
588 
589 #endif  // AOM_AV1_ENCODER_NONRD_OPT_H_
590