1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
11 */
12
13 #include <assert.h>
14 #include <limits.h>
15 #include <math.h>
16 #include <stdio.h>
17
18 #include "config/aom_dsp_rtcd.h"
19 #include "config/av1_rtcd.h"
20
21 #include "aom_dsp/aom_dsp_common.h"
22 #include "aom_dsp/txfm_common.h"
23 #include "aom_ports/mem.h"
24
25 #include "av1/common/blockd.h"
26 #include "av1/common/mvref_common.h"
27 #include "av1/common/pred_common.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30
31 #include "av1/encoder/encodemv.h"
32 #include "av1/encoder/encoder.h"
33 #include "av1/encoder/intra_mode_search.h"
34 #include "av1/encoder/model_rd.h"
35 #include "av1/encoder/motion_search_facade.h"
36 #include "av1/encoder/nonrd_opt.h"
37 #include "av1/encoder/rdopt.h"
38 #include "av1/encoder/reconinter_enc.h"
39 #include "av1/encoder/var_based_part.h"
40
41 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
42 extern int g_pick_inter_mode_cnt;
43 /*!\cond */
44 typedef struct {
45 uint8_t *data;
46 int stride;
47 int in_use;
48 } PRED_BUFFER;
49
50 typedef struct {
51 PRED_BUFFER *best_pred;
52 PREDICTION_MODE best_mode;
53 TX_SIZE best_tx_size;
54 TX_TYPE tx_type;
55 MV_REFERENCE_FRAME best_ref_frame;
56 MV_REFERENCE_FRAME best_second_ref_frame;
57 uint8_t best_mode_skip_txfm;
58 uint8_t best_mode_initial_skip_flag;
59 int_interpfilters best_pred_filter;
60 MOTION_MODE best_motion_mode;
61 WarpedMotionParams wm_params;
62 int num_proj_ref;
63 uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE / 4];
64 PALETTE_MODE_INFO pmi;
65 int64_t best_sse;
66 } BEST_PICKMODE;
67
68 typedef struct {
69 MV_REFERENCE_FRAME ref_frame;
70 PREDICTION_MODE pred_mode;
71 } REF_MODE;
72
73 typedef struct {
74 MV_REFERENCE_FRAME ref_frame[2];
75 PREDICTION_MODE pred_mode;
76 } COMP_REF_MODE;
77
78 typedef struct {
79 InterpFilter filter_x;
80 InterpFilter filter_y;
81 } INTER_FILTER;
82
83 /*!\brief Structure to store parameters and statistics used in non-rd inter mode
84 * evaluation.
85 */
86 typedef struct {
87 BEST_PICKMODE best_pickmode;
88 RD_STATS this_rdc;
89 RD_STATS best_rdc;
90 int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
91 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
92 unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
93 unsigned int ref_costs_single[REF_FRAMES];
94 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
95 int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
96 int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
97 int use_ref_frame_mask[REF_FRAMES];
98 uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
99 } InterModeSearchStateNonrd;
100 /*!\endcond */
101
102 #define NUM_COMP_INTER_MODES_RT (6)
103 #define NUM_INTER_MODES 12
104
105 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
106 // mode
107 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
108 { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
109 { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
110 { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
111 { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
112 { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
113 { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
114 };
115
116 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
117 { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
118 { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
119 { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
120 { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
121 { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
122 { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
123 };
124
125 static const INTER_FILTER filters_ref_set[9] = {
126 { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR }, { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
127 { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH }, { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
128 { MULTITAP_SHARP, MULTITAP_SHARP }, { EIGHTTAP_REGULAR, MULTITAP_SHARP },
129 { MULTITAP_SHARP, EIGHTTAP_REGULAR }, { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
130 { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
131 };
132
133 enum {
134 // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
135 INTER_NEAREST = (1 << NEARESTMV),
136 INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
137 INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
138 INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
139 };
140
141 // The original scan order (default_scan_8x8) is modified according to the extra
142 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
143 // aom_hadamard_8x8_c.
144 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
145 0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
146 33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
147 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
148 23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
149 };
150
151 // The original scan order (av1_default_iscan_8x8) is modified to match
152 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
153 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
154 // order of coefficients, such that the normal scan order is no longer
155 // guaranteed to scan low coefficients first, therefore we modify the scan order
156 // accordingly.
157 // Note that this one has to be used together with default_scan_8x8_transpose.
158 DECLARE_ALIGNED(16, static const int16_t,
159 av1_default_iscan_8x8_transpose[64]) = {
160 0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36,
161 5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49,
162 14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
163 27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
164 };
165
166 // The original scan order (default_scan_16x16) is modified according to the
167 // extra transpose in hadamard c implementation in lp case, i.e.,
168 // aom_hadamard_lp_16x16_c.
169 DECLARE_ALIGNED(16, static const int16_t,
170 default_scan_lp_16x16_transpose[256]) = {
171 0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32,
172 40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50,
173 44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1,
174 9, 3, 60, 54, 104, 98, 92, 86, 136, 130, 132, 138, 144, 94, 100,
175 106, 112, 62, 5, 11, 17, 25, 19, 13, 7, 120, 114, 108, 102, 152,
176 146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65, 15, 21, 27,
177 33, 41, 35, 29, 23, 73, 67, 124, 118, 168, 162, 156, 150, 200, 194,
178 196, 202, 208, 158, 164, 170, 176, 126, 69, 75, 81, 31, 37, 43, 49,
179 57, 51, 45, 39, 89, 83, 77, 71, 184, 178, 172, 166, 216, 210, 204,
180 198, 206, 212, 218, 224, 174, 180, 186, 129, 79, 85, 91, 97, 47, 53,
181 59, 61, 55, 105, 99, 93, 87, 137, 131, 188, 182, 232, 226, 220, 214,
182 222, 228, 234, 240, 190, 133, 139, 145, 95, 101, 107, 113, 63, 121, 115,
183 109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
184 149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
185 246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
186 211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
187 215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
188 255
189 };
190
191 #if CONFIG_AV1_HIGHBITDEPTH
192 // The original scan order (default_scan_16x16) is modified according to the
193 // extra shift in hadamard c implementation in fp case, i.e.,
194 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
195 // outputs, so we handle them separately.
196 DECLARE_ALIGNED(16, static const int16_t,
197 default_scan_fp_16x16_transpose[256]) = {
198 0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32,
199 36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50,
200 44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1,
201 5, 3, 60, 58, 100, 98, 92, 90, 132, 130, 136, 134, 144, 94, 104,
202 102, 112, 62, 9, 7, 17, 21, 19, 13, 11, 116, 114, 108, 106, 148,
203 146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65, 15, 25, 23,
204 33, 37, 35, 29, 27, 69, 67, 124, 122, 164, 162, 156, 154, 196, 194,
205 200, 198, 208, 158, 168, 166, 176, 126, 73, 71, 81, 31, 41, 39, 49,
206 53, 51, 45, 43, 85, 83, 77, 75, 180, 178, 172, 170, 212, 210, 204,
207 202, 206, 216, 214, 224, 174, 184, 182, 129, 79, 89, 87, 97, 47, 57,
208 55, 61, 59, 101, 99, 93, 91, 133, 131, 188, 186, 228, 226, 220, 218,
209 222, 232, 230, 240, 190, 137, 135, 145, 95, 105, 103, 113, 63, 117, 115,
210 109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
211 153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
212 250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
213 211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
214 219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
215 255
216 };
217 #endif
218
219 // The original scan order (av1_default_iscan_16x16) is modified to match
220 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
221 // Since hadamard AVX2 implementation will modify the order of coefficients,
222 // such that the normal scan order is no longer guaranteed to scan low
223 // coefficients first, therefore we modify the scan order accordingly. Note that
224 // this one has to be used together with default_scan_lp_16x16_transpose.
225 DECLARE_ALIGNED(16, static const int16_t,
226 av1_default_iscan_lp_16x16_transpose[256]) = {
227 0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11,
228 87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93,
229 24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30,
230 122, 41, 148, 27, 119, 29, 121, 42, 149, 48, 152, 28, 120, 43, 150,
231 47, 151, 62, 177, 10, 86, 20, 96, 21, 113, 35, 127, 19, 95, 22,
232 114, 34, 126, 37, 144, 23, 115, 33, 125, 38, 145, 52, 156, 32, 124,
233 39, 146, 51, 155, 58, 173, 40, 147, 50, 154, 59, 174, 73, 181, 49,
234 153, 60, 175, 72, 180, 83, 198, 61, 176, 71, 179, 84, 199, 98, 202,
235 70, 178, 85, 200, 97, 201, 112, 219, 36, 143, 54, 158, 55, 170, 77,
236 185, 53, 157, 56, 171, 76, 184, 79, 194, 57, 172, 75, 183, 80, 195,
237 102, 206, 74, 182, 81, 196, 101, 205, 108, 215, 82, 197, 100, 204, 109,
238 216, 131, 223, 99, 203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
239 141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78, 193, 104,
240 208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
241 133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
242 231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
243 168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
244 255
245 };
246
247 #if CONFIG_AV1_HIGHBITDEPTH
248 // The original scan order (av1_default_iscan_16x16) is modified to match
249 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
250 // Since hadamard AVX2 implementation will modify the order of coefficients,
251 // such that the normal scan order is no longer guaranteed to scan low
252 // coefficients first, therefore we modify the scan order accordingly. Note that
253 // this one has to be used together with default_scan_fp_16x16_transpose.
254 DECLARE_ALIGNED(16, static const int16_t,
255 av1_default_iscan_fp_16x16_transpose[256]) = {
256 0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11,
257 87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93,
258 24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30,
259 122, 41, 148, 27, 119, 29, 121, 28, 120, 43, 150, 42, 149, 48, 152,
260 47, 151, 62, 177, 10, 86, 20, 96, 19, 95, 22, 114, 21, 113, 35,
261 127, 34, 126, 37, 144, 23, 115, 33, 125, 32, 124, 39, 146, 38, 145,
262 52, 156, 51, 155, 58, 173, 40, 147, 50, 154, 49, 153, 60, 175, 59,
263 174, 73, 181, 72, 180, 83, 198, 61, 176, 71, 179, 70, 178, 85, 200,
264 84, 199, 98, 202, 97, 201, 112, 219, 36, 143, 54, 158, 53, 157, 56,
265 171, 55, 170, 77, 185, 76, 184, 79, 194, 57, 172, 75, 183, 74, 182,
266 81, 196, 80, 195, 102, 206, 101, 205, 108, 215, 82, 197, 100, 204, 99,
267 203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
268 128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78, 193, 104,
269 208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
270 133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
271 231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
272 168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
273 255
274 };
275 #endif
276
early_term_inter_search_with_sse(int early_term_idx,BLOCK_SIZE bsize,int64_t this_sse,int64_t best_sse,PREDICTION_MODE this_mode)277 static INLINE int early_term_inter_search_with_sse(int early_term_idx,
278 BLOCK_SIZE bsize,
279 int64_t this_sse,
280 int64_t best_sse,
281 PREDICTION_MODE this_mode) {
282 // Aggressiveness to terminate inter mode search early is adjusted based on
283 // speed and block size.
284 static const double early_term_thresh[4][4] = { { 0.65, 0.65, 0.65, 0.7 },
285 { 0.6, 0.65, 0.85, 0.9 },
286 { 0.5, 0.5, 0.55, 0.6 },
287 { 0.6, 0.75, 0.85, 0.85 } };
288 static const double early_term_thresh_newmv_nearestmv[4] = { 0.3, 0.3, 0.3,
289 0.3 };
290
291 const int size_group = size_group_lookup[bsize];
292 assert(size_group < 4);
293 assert((early_term_idx > 0) && (early_term_idx < EARLY_TERM_INDICES));
294 const double threshold =
295 ((early_term_idx == EARLY_TERM_IDX_4) &&
296 (this_mode == NEWMV || this_mode == NEARESTMV))
297 ? early_term_thresh_newmv_nearestmv[size_group]
298 : early_term_thresh[early_term_idx - 1][size_group];
299
300 // Terminate inter mode search early based on best sse so far.
301 if ((early_term_idx > 0) && (threshold * this_sse > best_sse)) {
302 return 1;
303 }
304 return 0;
305 }
306
init_best_pickmode(BEST_PICKMODE * bp)307 static INLINE void init_best_pickmode(BEST_PICKMODE *bp) {
308 bp->best_sse = INT64_MAX;
309 bp->best_mode = NEARESTMV;
310 bp->best_ref_frame = LAST_FRAME;
311 bp->best_second_ref_frame = NONE_FRAME;
312 bp->best_tx_size = TX_8X8;
313 bp->tx_type = DCT_DCT;
314 bp->best_pred_filter = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
315 bp->best_mode_skip_txfm = 0;
316 bp->best_mode_initial_skip_flag = 0;
317 bp->best_pred = NULL;
318 bp->best_motion_mode = SIMPLE_TRANSLATION;
319 bp->num_proj_ref = 0;
320 memset(&bp->wm_params, 0, sizeof(bp->wm_params));
321 memset(&bp->blk_skip, 0, sizeof(bp->blk_skip));
322 memset(&bp->pmi, 0, sizeof(bp->pmi));
323 }
324
subpel_select(AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int_mv * mv,MV ref_mv,FULLPEL_MV start_mv,bool fullpel_performed_well)325 static INLINE int subpel_select(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
326 int_mv *mv, MV ref_mv, FULLPEL_MV start_mv,
327 bool fullpel_performed_well) {
328 const int frame_lowmotion = cpi->rc.avg_frame_low_motion;
329 // Reduce MV precision for higher int MV value & frame-level motion
330 if (cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion >= 3) {
331 int mv_thresh = 4;
332 const int is_low_resoln =
333 (cpi->common.width * cpi->common.height <= 320 * 240);
334 mv_thresh = (bsize > BLOCK_32X32) ? 2 : (bsize > BLOCK_16X16) ? 4 : 6;
335 if (frame_lowmotion > 0 && frame_lowmotion < 40) mv_thresh = 12;
336 mv_thresh = (is_low_resoln) ? mv_thresh >> 1 : mv_thresh;
337 if (abs(mv->as_fullmv.row) >= mv_thresh ||
338 abs(mv->as_fullmv.col) >= mv_thresh)
339 return HALF_PEL;
340 } else if (cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion >= 1) {
341 int mv_thresh;
342 const int th_vals[2][3] = { { 4, 8, 10 }, { 4, 6, 8 } };
343 const int th_idx = cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion - 1;
344 assert(th_idx >= 0 && th_idx < 2);
345 if (frame_lowmotion > 0 && frame_lowmotion < 40)
346 mv_thresh = 12;
347 else
348 mv_thresh = (bsize >= BLOCK_32X32) ? th_vals[th_idx][0]
349 : (bsize >= BLOCK_16X16) ? th_vals[th_idx][1]
350 : th_vals[th_idx][2];
351 if (abs(mv->as_fullmv.row) >= (mv_thresh << 1) ||
352 abs(mv->as_fullmv.col) >= (mv_thresh << 1))
353 return FULL_PEL;
354 else if (abs(mv->as_fullmv.row) >= mv_thresh ||
355 abs(mv->as_fullmv.col) >= mv_thresh)
356 return HALF_PEL;
357 }
358 // Reduce MV precision for relatively static (e.g. background), low-complex
359 // large areas
360 if (cpi->sf.rt_sf.reduce_mv_pel_precision_lowcomplex >= 2) {
361 const int qband = x->qindex >> (QINDEX_BITS - 2);
362 assert(qband < 4);
363 if (x->content_state_sb.source_sad_nonrd <= kVeryLowSad &&
364 bsize > BLOCK_16X16 && qband != 0) {
365 if (x->source_variance < 500)
366 return FULL_PEL;
367 else if (x->source_variance < 5000)
368 return HALF_PEL;
369 }
370 } else if (cpi->sf.rt_sf.reduce_mv_pel_precision_lowcomplex >= 1) {
371 if (fullpel_performed_well && ref_mv.row == 0 && ref_mv.col == 0 &&
372 start_mv.row == 0 && start_mv.col == 0)
373 return HALF_PEL;
374 }
375 return cpi->sf.mv_sf.subpel_force_stop;
376 }
377
use_aggressive_subpel_search_method(MACROBLOCK * x,bool use_adaptive_subpel_search,const bool fullpel_performed_well)378 static bool use_aggressive_subpel_search_method(
379 MACROBLOCK *x, bool use_adaptive_subpel_search,
380 const bool fullpel_performed_well) {
381 if (!use_adaptive_subpel_search) return false;
382 const int qband = x->qindex >> (QINDEX_BITS - 2);
383 assert(qband < 4);
384 if ((qband > 0) && (fullpel_performed_well ||
385 (x->content_state_sb.source_sad_nonrd <= kLowSad) ||
386 (x->source_variance < 100)))
387 return true;
388 return false;
389 }
390
391 /*!\brief Runs Motion Estimation for a specific block and specific ref frame.
392 *
393 * \ingroup nonrd_mode_search
394 * \callgraph
395 * \callergraph
396 * Finds the best Motion Vector by running Motion Estimation for a specific
397 * block and a specific reference frame. Exits early if RDCost of Full Pel part
398 * exceeds best RD Cost fund so far
399 * \param[in] cpi Top-level encoder structure
400 * \param[in] x Pointer to structure holding all the
401 * data for the current macroblock
402 * \param[in] bsize Current block size
403 * \param[in] mi_row Row index in 4x4 units
404 * \param[in] mi_col Column index in 4x4 units
405 * \param[in] tmp_mv Pointer to best found New MV
406 * \param[in] rate_mv Pointer to Rate of the best new MV
407 * \param[in] best_rd_sofar RD Cost of the best mode found so far
408 * \param[in] use_base_mv Flag, indicating that tmp_mv holds
409 * specific MV to start the search with
410 *
411 * \return Returns 0 if ME was terminated after Full Pel Search because too
412 * high RD Cost. Otherwise returns 1. Best New MV is placed into \c tmp_mv.
413 * Rate estimation for this vector is placed to \c rate_mv
414 */
combined_motion_search(AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * tmp_mv,int * rate_mv,int64_t best_rd_sofar,int use_base_mv)415 static int combined_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
416 BLOCK_SIZE bsize, int mi_row, int mi_col,
417 int_mv *tmp_mv, int *rate_mv,
418 int64_t best_rd_sofar, int use_base_mv) {
419 MACROBLOCKD *xd = &x->e_mbd;
420 const AV1_COMMON *cm = &cpi->common;
421 const int num_planes = av1_num_planes(cm);
422 const SPEED_FEATURES *sf = &cpi->sf;
423 MB_MODE_INFO *mi = xd->mi[0];
424 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
425 int step_param = (sf->rt_sf.fullpel_search_step_param)
426 ? sf->rt_sf.fullpel_search_step_param
427 : cpi->mv_search_params.mv_step_param;
428 FULLPEL_MV start_mv;
429 const int ref = mi->ref_frame[0];
430 const MV ref_mv = av1_get_ref_mv(x, mi->ref_mv_idx).as_mv;
431 MV center_mv;
432 int dis;
433 int rv = 0;
434 int cost_list[5];
435 int search_subpel = 1;
436 const YV12_BUFFER_CONFIG *scaled_ref_frame =
437 av1_get_scaled_ref_frame(cpi, ref);
438
439 if (scaled_ref_frame) {
440 int i;
441 // Swap out the reference frame for a version that's been scaled to
442 // match the resolution of the current frame, allowing the existing
443 // motion search code to be used without additional modifications.
444 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
445 av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL,
446 num_planes);
447 }
448
449 start_mv = get_fullmv_from_mv(&ref_mv);
450
451 if (!use_base_mv)
452 center_mv = ref_mv;
453 else
454 center_mv = tmp_mv->as_mv;
455
456 const SEARCH_METHODS search_method = sf->mv_sf.search_method;
457 const search_site_config *src_search_sites =
458 av1_get_search_site_config(cpi, x, search_method);
459 FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
460 av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, ¢er_mv,
461 src_search_sites,
462 /*fine_search_interval=*/0);
463
464 const unsigned int full_var_rd = av1_full_pixel_search(
465 start_mv, &full_ms_params, step_param, cond_cost_list(cpi, cost_list),
466 &tmp_mv->as_fullmv, NULL);
467
468 // calculate the bit cost on motion vector
469 MV mvp_full = get_mv_from_fullmv(&tmp_mv->as_fullmv);
470
471 *rate_mv = av1_mv_bit_cost(&mvp_full, &ref_mv, x->mv_costs->nmv_joint_cost,
472 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
473
474 // TODO(kyslov) Account for Rate Mode!
475 rv = !(RDCOST(x->rdmult, (*rate_mv), 0) > best_rd_sofar);
476
477 if (rv && search_subpel) {
478 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
479 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv,
480 cost_list);
481 const bool fullpel_performed_well =
482 (bsize == BLOCK_64X64 && full_var_rd * 40 < 62267 * 7) ||
483 (bsize == BLOCK_32X32 && full_var_rd * 8 < 42380) ||
484 (bsize == BLOCK_16X16 && full_var_rd * 8 < 10127);
485 if (sf->rt_sf.reduce_mv_pel_precision_highmotion ||
486 sf->rt_sf.reduce_mv_pel_precision_lowcomplex)
487 ms_params.forced_stop = subpel_select(cpi, x, bsize, tmp_mv, ref_mv,
488 start_mv, fullpel_performed_well);
489
490 MV subpel_start_mv = get_mv_from_fullmv(&tmp_mv->as_fullmv);
491 // adaptively downgrade subpel search method based on block properties
492 if (use_aggressive_subpel_search_method(
493 x, sf->rt_sf.use_adaptive_subpel_search, fullpel_performed_well))
494 av1_find_best_sub_pixel_tree_pruned_more(xd, cm, &ms_params,
495 subpel_start_mv, &tmp_mv->as_mv,
496 &dis, &x->pred_sse[ref], NULL);
497 else
498 cpi->mv_search_params.find_fractional_mv_step(
499 xd, cm, &ms_params, subpel_start_mv, &tmp_mv->as_mv, &dis,
500 &x->pred_sse[ref], NULL);
501 *rate_mv =
502 av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->mv_costs->nmv_joint_cost,
503 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
504 }
505
506 if (scaled_ref_frame) {
507 int i;
508 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
509 }
510 // The final MV can not be equal to the reference MV as this will trigger an
511 // assert later. This can happen if both NEAREST and NEAR modes were skipped.
512 rv = (tmp_mv->as_mv.col != ref_mv.col || tmp_mv->as_mv.row != ref_mv.row);
513 return rv;
514 }
515
516 /*!\brief Searches for the best New Motion Vector.
517 *
518 * \ingroup nonrd_mode_search
519 * \callgraph
520 * \callergraph
521 * Finds the best Motion Vector by doing Motion Estimation. Uses reduced
522 * complexity ME for non-LAST frames or calls \c combined_motion_search
523 * for LAST reference frame
524 * \param[in] cpi Top-level encoder structure
525 * \param[in] x Pointer to structure holding all the
526 * data for the current macroblock
527 * \param[in] frame_mv Array that holds MVs for all modes
528 * and ref frames
529 * \param[in] ref_frame Reference frame for which to find
530 * the best New MVs
531 * \param[in] gf_temporal_ref Flag, indicating temporal reference
532 * for GOLDEN frame
533 * \param[in] bsize Current block size
534 * \param[in] mi_row Row index in 4x4 units
535 * \param[in] mi_col Column index in 4x4 units
536 * \param[in] rate_mv Pointer to Rate of the best new MV
537 * \param[in] best_rdc Pointer to the RD Cost for the best
538 * mode found so far
539 *
540 * \return Returns -1 if the search was not done, otherwise returns 0.
541 * Best New MV is placed into \c frame_mv array, Rate estimation for this
542 * vector is placed to \c rate_mv
543 */
search_new_mv(AV1_COMP * cpi,MACROBLOCK * x,int_mv frame_mv[][REF_FRAMES],MV_REFERENCE_FRAME ref_frame,int gf_temporal_ref,BLOCK_SIZE bsize,int mi_row,int mi_col,int * rate_mv,RD_STATS * best_rdc)544 static int search_new_mv(AV1_COMP *cpi, MACROBLOCK *x,
545 int_mv frame_mv[][REF_FRAMES],
546 MV_REFERENCE_FRAME ref_frame, int gf_temporal_ref,
547 BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_mv,
548 RD_STATS *best_rdc) {
549 MACROBLOCKD *const xd = &x->e_mbd;
550 MB_MODE_INFO *const mi = xd->mi[0];
551 AV1_COMMON *cm = &cpi->common;
552 if (ref_frame > LAST_FRAME && cpi->oxcf.rc_cfg.mode == AOM_CBR &&
553 gf_temporal_ref) {
554 int tmp_sad;
555 int dis;
556
557 if (bsize < BLOCK_16X16) return -1;
558
559 tmp_sad = av1_int_pro_motion_estimation(
560 cpi, x, bsize, mi_row, mi_col,
561 &x->mbmi_ext.ref_mv_stack[ref_frame][0].this_mv.as_mv);
562
563 if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) return -1;
564
565 frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int;
566 int_mv best_mv = mi->mv[0];
567 best_mv.as_mv.row >>= 3;
568 best_mv.as_mv.col >>= 3;
569 MV ref_mv = av1_get_ref_mv(x, 0).as_mv;
570 frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
571 frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
572
573 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
574 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv, NULL);
575 if (cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion ||
576 cpi->sf.rt_sf.reduce_mv_pel_precision_lowcomplex) {
577 FULLPEL_MV start_mv = { .row = 0, .col = 0 };
578 ms_params.forced_stop =
579 subpel_select(cpi, x, bsize, &best_mv, ref_mv, start_mv, false);
580 }
581 MV start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
582 cpi->mv_search_params.find_fractional_mv_step(
583 xd, cm, &ms_params, start_mv, &best_mv.as_mv, &dis,
584 &x->pred_sse[ref_frame], NULL);
585 frame_mv[NEWMV][ref_frame].as_int = best_mv.as_int;
586
587 // When NEWMV is same as ref_mv from the drl, it is preferred to code the
588 // MV as NEARESTMV or NEARMV. In this case, NEWMV needs to be skipped to
589 // avoid an assert failure at a later stage. The scenario can occur if
590 // NEARESTMV was not evaluated for ALTREF.
591 if (frame_mv[NEWMV][ref_frame].as_mv.col == ref_mv.col &&
592 frame_mv[NEWMV][ref_frame].as_mv.row == ref_mv.row)
593 return -1;
594
595 *rate_mv = av1_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv, &ref_mv,
596 x->mv_costs->nmv_joint_cost,
597 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
598 } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
599 &frame_mv[NEWMV][ref_frame], rate_mv,
600 best_rdc->rdcost, 0)) {
601 return -1;
602 }
603
604 return 0;
605 }
606
estimate_single_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,BLOCK_SIZE bsize,unsigned int * ref_costs_single)607 static void estimate_single_ref_frame_costs(const AV1_COMMON *cm,
608 const MACROBLOCKD *xd,
609 const ModeCosts *mode_costs,
610 int segment_id, BLOCK_SIZE bsize,
611 unsigned int *ref_costs_single) {
612 int seg_ref_active =
613 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
614 if (seg_ref_active) {
615 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
616 } else {
617 int intra_inter_ctx = av1_get_intra_inter_context(xd);
618 ref_costs_single[INTRA_FRAME] =
619 mode_costs->intra_inter_cost[intra_inter_ctx][0];
620 unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
621 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT &&
622 is_comp_ref_allowed(bsize)) {
623 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
624 base_cost += mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
625 }
626 ref_costs_single[LAST_FRAME] = base_cost;
627 ref_costs_single[GOLDEN_FRAME] = base_cost;
628 ref_costs_single[ALTREF_FRAME] = base_cost;
629 // add cost for last, golden, altref
630 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[0][0][0];
631 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[0][0][1];
632 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[0][1][0];
633 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[0][0][1];
634 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[0][2][0];
635 }
636 }
637
set_force_skip_flag(const AV1_COMP * const cpi,MACROBLOCK * const x,unsigned int sse,int * force_skip)638 static INLINE void set_force_skip_flag(const AV1_COMP *const cpi,
639 MACROBLOCK *const x, unsigned int sse,
640 int *force_skip) {
641 if (x->txfm_search_params.tx_mode_search_type == TX_MODE_SELECT &&
642 cpi->sf.rt_sf.tx_size_level_based_on_qstep &&
643 cpi->sf.rt_sf.tx_size_level_based_on_qstep >= 2) {
644 const int qstep = x->plane[0].dequant_QTX[1] >> (x->e_mbd.bd - 5);
645 const unsigned int qstep_sq = qstep * qstep;
646 // If the sse is low for low source variance blocks, mark those as
647 // transform skip.
648 // Note: Though qstep_sq is based on ac qstep, the threshold is kept
649 // low so that reliable early estimate of tx skip can be obtained
650 // through its comparison with sse.
651 if (sse < qstep_sq && x->source_variance < qstep_sq &&
652 x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0)
653 *force_skip = 1;
654 }
655 }
656
657 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
658 (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
659 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
660
calculate_tx_size(const AV1_COMP * const cpi,BLOCK_SIZE bsize,MACROBLOCK * const x,unsigned int var,unsigned int sse,int * force_skip)661 static TX_SIZE calculate_tx_size(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
662 MACROBLOCK *const x, unsigned int var,
663 unsigned int sse, int *force_skip) {
664 MACROBLOCKD *const xd = &x->e_mbd;
665 TX_SIZE tx_size;
666 const TxfmSearchParams *txfm_params = &x->txfm_search_params;
667 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT) {
668 int multiplier = 8;
669 unsigned int var_thresh = 0;
670 unsigned int is_high_var = 1;
671 // Use quantizer based thresholds to determine transform size.
672 if (cpi->sf.rt_sf.tx_size_level_based_on_qstep) {
673 const int qband = x->qindex >> (QINDEX_BITS - 2);
674 const int mult[4] = { 8, 7, 6, 5 };
675 assert(qband < 4);
676 multiplier = mult[qband];
677 const int qstep = x->plane[0].dequant_QTX[1] >> (xd->bd - 5);
678 const unsigned int qstep_sq = qstep * qstep;
679 var_thresh = qstep_sq * 2;
680 if (cpi->sf.rt_sf.tx_size_level_based_on_qstep >= 2) {
681 // If the sse is low for low source variance blocks, mark those as
682 // transform skip.
683 // Note: Though qstep_sq is based on ac qstep, the threshold is kept
684 // low so that reliable early estimate of tx skip can be obtained
685 // through its comparison with sse.
686 if (sse < qstep_sq && x->source_variance < qstep_sq &&
687 x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0)
688 *force_skip = 1;
689 // Further lower transform size based on aq mode only if residual
690 // variance is high.
691 is_high_var = (var >= var_thresh);
692 }
693 }
694 // Choose larger transform size for blocks where dc component is dominant or
695 // the ac component is low.
696 if (sse > ((var * multiplier) >> 2) || (var < var_thresh))
697 tx_size =
698 AOMMIN(max_txsize_lookup[bsize],
699 tx_mode_to_biggest_tx_size[txfm_params->tx_mode_search_type]);
700 else
701 tx_size = TX_8X8;
702
703 if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
704 cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) && is_high_var)
705 tx_size = TX_8X8;
706 else if (tx_size > TX_16X16)
707 tx_size = TX_16X16;
708 } else {
709 tx_size =
710 AOMMIN(max_txsize_lookup[bsize],
711 tx_mode_to_biggest_tx_size[txfm_params->tx_mode_search_type]);
712 }
713
714 if (CAP_TX_SIZE_FOR_BSIZE_GT32(txfm_params->tx_mode_search_type, bsize))
715 tx_size = TX_SIZE_FOR_BSIZE_GT32;
716
717 return AOMMIN(tx_size, TX_16X16);
718 }
719
720 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
721 2, 2, 3, 3, 3, 4,
722 4, 4, 5, 5 };
723 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
724 2, 3, 2, 3, 4, 3,
725 4, 5, 4, 5 };
726
block_variance(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int w,int h,unsigned int * sse,int * sum,int block_size,uint32_t * sse8x8,int * sum8x8,uint32_t * var8x8)727 static void block_variance(const uint8_t *src, int src_stride,
728 const uint8_t *ref, int ref_stride, int w, int h,
729 unsigned int *sse, int *sum, int block_size,
730 uint32_t *sse8x8, int *sum8x8, uint32_t *var8x8) {
731 int k = 0;
732 *sse = 0;
733 *sum = 0;
734
735 // This function is called for block sizes >= BLOCK_32x32. As per the design
736 // the aom_get_var_sse_sum_8x8_quad() processes four 8x8 blocks (in a 8x32)
737 // per call. Hence the width and height of the block need to be at least 8 and
738 // 32 samples respectively.
739 assert(w >= 32);
740 assert(h >= 8);
741 for (int i = 0; i < h; i += block_size) {
742 for (int j = 0; j < w; j += 32) {
743 aom_get_var_sse_sum_8x8_quad(
744 src + src_stride * i + j, src_stride, ref + ref_stride * i + j,
745 ref_stride, &sse8x8[k], &sum8x8[k], sse, sum, &var8x8[k]);
746 k += 4;
747 }
748 }
749 }
750
block_variance_16x16_dual(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int w,int h,unsigned int * sse,int * sum,int block_size,uint32_t * sse16x16,uint32_t * var16x16)751 static void block_variance_16x16_dual(const uint8_t *src, int src_stride,
752 const uint8_t *ref, int ref_stride, int w,
753 int h, unsigned int *sse, int *sum,
754 int block_size, uint32_t *sse16x16,
755 uint32_t *var16x16) {
756 int k = 0;
757 *sse = 0;
758 *sum = 0;
759 // This function is called for block sizes >= BLOCK_32x32. As per the design
760 // the aom_get_var_sse_sum_16x16_dual() processes four 16x16 blocks (in a
761 // 16x32) per call. Hence the width and height of the block need to be at
762 // least 16 and 32 samples respectively.
763 assert(w >= 32);
764 assert(h >= 16);
765 for (int i = 0; i < h; i += block_size) {
766 for (int j = 0; j < w; j += 32) {
767 aom_get_var_sse_sum_16x16_dual(src + src_stride * i + j, src_stride,
768 ref + ref_stride * i + j, ref_stride,
769 &sse16x16[k], sse, sum, &var16x16[k]);
770 k += 2;
771 }
772 }
773 }
774
calculate_variance(int bw,int bh,TX_SIZE tx_size,unsigned int * sse_i,int * sum_i,unsigned int * var_o,unsigned int * sse_o,int * sum_o)775 static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
776 unsigned int *sse_i, int *sum_i,
777 unsigned int *var_o, unsigned int *sse_o,
778 int *sum_o) {
779 const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
780 const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
781 const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
782 int i, j, k = 0;
783
784 for (i = 0; i < nh; i += 2) {
785 for (j = 0; j < nw; j += 2) {
786 sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
787 sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
788 sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
789 sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
790 var_o[k] = sse_o[k] - (uint32_t)(((int64_t)sum_o[k] * sum_o[k]) >>
791 (b_width_log2_lookup[unit_size] +
792 b_height_log2_lookup[unit_size] + 6));
793 k++;
794 }
795 }
796 }
797
798 // Adjust the ac_thr according to speed, width, height and normalized sum
ac_thr_factor(const int speed,const int width,const int height,const int norm_sum)799 static int ac_thr_factor(const int speed, const int width, const int height,
800 const int norm_sum) {
801 if (speed >= 8 && norm_sum < 5) {
802 if (width <= 640 && height <= 480)
803 return 4;
804 else
805 return 2;
806 }
807 return 1;
808 }
809
810 // Sets early_term flag based on chroma planes prediction
set_early_term_based_on_uv_plane(AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,MACROBLOCKD * xd,int mi_row,int mi_col,int * early_term,int num_blk,const unsigned int * sse_tx,const unsigned int * var_tx,int sum,unsigned int var,unsigned int sse)811 static INLINE void set_early_term_based_on_uv_plane(
812 AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MACROBLOCKD *xd, int mi_row,
813 int mi_col, int *early_term, int num_blk, const unsigned int *sse_tx,
814 const unsigned int *var_tx, int sum, unsigned int var, unsigned int sse) {
815 AV1_COMMON *const cm = &cpi->common;
816 struct macroblock_plane *const p = &x->plane[0];
817 const uint32_t dc_quant = p->dequant_QTX[0];
818 const uint32_t ac_quant = p->dequant_QTX[1];
819 const int64_t dc_thr = dc_quant * dc_quant >> 6;
820 int64_t ac_thr = ac_quant * ac_quant >> 6;
821 const int bw = b_width_log2_lookup[bsize];
822 const int bh = b_height_log2_lookup[bsize];
823 int ac_test = 1;
824 int dc_test = 1;
825 const int norm_sum = abs(sum) >> (bw + bh);
826
827 #if CONFIG_AV1_TEMPORAL_DENOISING
828 if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
829 cpi->oxcf.speed > 5)
830 ac_thr = av1_scale_acskip_thresh(ac_thr, cpi->denoiser.denoising_level,
831 norm_sum, cpi->svc.temporal_layer_id);
832 else
833 ac_thr *= ac_thr_factor(cpi->oxcf.speed, cm->width, cm->height, norm_sum);
834 #else
835 ac_thr *= ac_thr_factor(cpi->oxcf.speed, cm->width, cm->height, norm_sum);
836
837 #endif
838
839 for (int k = 0; k < num_blk; k++) {
840 // Check if all ac coefficients can be quantized to zero.
841 if (!(var_tx[k] < ac_thr || var == 0)) {
842 ac_test = 0;
843 break;
844 }
845 // Check if dc coefficient can be quantized to zero.
846 if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
847 dc_test = 0;
848 break;
849 }
850 }
851
852 // Check if chroma can be skipped based on ac and dc test flags.
853 if (ac_test && dc_test) {
854 int skip_uv[2] = { 0 };
855 unsigned int var_uv[2];
856 unsigned int sse_uv[2];
857 // Transform skipping test in UV planes.
858 for (int i = 1; i <= 2; i++) {
859 int j = i - 1;
860 skip_uv[j] = 1;
861 if (x->color_sensitivity[j]) {
862 skip_uv[j] = 0;
863 struct macroblock_plane *const puv = &x->plane[i];
864 struct macroblockd_plane *const puvd = &xd->plane[i];
865 const BLOCK_SIZE uv_bsize = get_plane_block_size(
866 bsize, puvd->subsampling_x, puvd->subsampling_y);
867 // Adjust these thresholds for UV.
868 const int64_t uv_dc_thr =
869 (puv->dequant_QTX[0] * puv->dequant_QTX[0]) >> 3;
870 const int64_t uv_ac_thr =
871 (puv->dequant_QTX[1] * puv->dequant_QTX[1]) >> 3;
872 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, i,
873 i);
874 var_uv[j] = cpi->ppi->fn_ptr[uv_bsize].vf(puv->src.buf, puv->src.stride,
875 puvd->dst.buf,
876 puvd->dst.stride, &sse_uv[j]);
877 if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) &&
878 (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j]))
879 skip_uv[j] = 1;
880 else
881 break;
882 }
883 }
884 if (skip_uv[0] & skip_uv[1]) {
885 *early_term = 1;
886 }
887 }
888 }
889
calc_rate_dist_block_param(AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_stats,int calculate_rd,int * early_term,BLOCK_SIZE bsize,unsigned int sse)890 static INLINE void calc_rate_dist_block_param(AV1_COMP *cpi, MACROBLOCK *x,
891 RD_STATS *rd_stats,
892 int calculate_rd, int *early_term,
893 BLOCK_SIZE bsize,
894 unsigned int sse) {
895 if (calculate_rd) {
896 if (!*early_term) {
897 const int bw = block_size_wide[bsize];
898 const int bh = block_size_high[bsize];
899
900 model_rd_with_curvfit(cpi, x, bsize, AOM_PLANE_Y, rd_stats->sse, bw * bh,
901 &rd_stats->rate, &rd_stats->dist);
902 }
903
904 if (*early_term) {
905 rd_stats->rate = 0;
906 rd_stats->dist = sse << 4;
907 }
908 }
909 }
910
model_skip_for_sb_y_large_64(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,MACROBLOCK * x,MACROBLOCKD * xd,RD_STATS * rd_stats,int * early_term,int calculate_rd,int64_t best_sse,unsigned int * var_output,unsigned int var_prune_threshold)911 static void model_skip_for_sb_y_large_64(AV1_COMP *cpi, BLOCK_SIZE bsize,
912 int mi_row, int mi_col, MACROBLOCK *x,
913 MACROBLOCKD *xd, RD_STATS *rd_stats,
914 int *early_term, int calculate_rd,
915 int64_t best_sse,
916 unsigned int *var_output,
917 unsigned int var_prune_threshold) {
918 // Note our transform coeffs are 8 times an orthogonal transform.
919 // Hence quantizer step is also 8 times. To get effective quantizer
920 // we need to divide by 8 before sending to modeling function.
921 unsigned int sse;
922 struct macroblock_plane *const p = &x->plane[0];
923 struct macroblockd_plane *const pd = &xd->plane[0];
924 int test_skip = 1;
925 unsigned int var;
926 int sum;
927 const int bw = b_width_log2_lookup[bsize];
928 const int bh = b_height_log2_lookup[bsize];
929 unsigned int sse16x16[64] = { 0 };
930 unsigned int var16x16[64] = { 0 };
931 assert(xd->mi[0]->tx_size == TX_16X16);
932 assert(bsize > BLOCK_32X32);
933
934 // Calculate variance for whole partition, and also save 16x16 blocks'
935 // variance to be used in following transform skipping test.
936 block_variance_16x16_dual(p->src.buf, p->src.stride, pd->dst.buf,
937 pd->dst.stride, 4 << bw, 4 << bh, &sse, &sum, 16,
938 sse16x16, var16x16);
939
940 var = sse - (unsigned int)(((int64_t)sum * sum) >> (bw + bh + 4));
941 if (var_output) {
942 *var_output = var;
943 if (*var_output > var_prune_threshold) {
944 return;
945 }
946 }
947
948 rd_stats->sse = sse;
949 // Skipping test
950 *early_term = 0;
951 set_force_skip_flag(cpi, x, sse, early_term);
952 // The code below for setting skip flag assumes transform size of at least
953 // 8x8, so force this lower limit on transform.
954 MB_MODE_INFO *const mi = xd->mi[0];
955 if (!calculate_rd && cpi->sf.rt_sf.sse_early_term_inter_search &&
956 early_term_inter_search_with_sse(
957 cpi->sf.rt_sf.sse_early_term_inter_search, bsize, sse, best_sse,
958 mi->mode))
959 test_skip = 0;
960
961 if (*early_term) test_skip = 0;
962
963 // Evaluate if the partition block is a skippable block in Y plane.
964 if (test_skip) {
965 const unsigned int *sse_tx = sse16x16;
966 const unsigned int *var_tx = var16x16;
967 const unsigned int num_block = (1 << (bw + bh - 2)) >> 2;
968 set_early_term_based_on_uv_plane(cpi, x, bsize, xd, mi_row, mi_col,
969 early_term, num_block, sse_tx, var_tx, sum,
970 var, sse);
971 }
972 calc_rate_dist_block_param(cpi, x, rd_stats, calculate_rd, early_term, bsize,
973 sse);
974 }
975
model_skip_for_sb_y_large(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,MACROBLOCK * x,MACROBLOCKD * xd,RD_STATS * rd_stats,int * early_term,int calculate_rd,int64_t best_sse,unsigned int * var_output,unsigned int var_prune_threshold)976 static void model_skip_for_sb_y_large(AV1_COMP *cpi, BLOCK_SIZE bsize,
977 int mi_row, int mi_col, MACROBLOCK *x,
978 MACROBLOCKD *xd, RD_STATS *rd_stats,
979 int *early_term, int calculate_rd,
980 int64_t best_sse,
981 unsigned int *var_output,
982 unsigned int var_prune_threshold) {
983 if (x->force_zeromv_skip_for_blk) {
984 *early_term = 1;
985 rd_stats->rate = 0;
986 rd_stats->dist = 0;
987 rd_stats->sse = 0;
988 return;
989 }
990
991 // For block sizes greater than 32x32, the transform size is always 16x16.
992 // This function avoids calling calculate_variance() for tx_size 16x16 cases
993 // by directly populating variance at tx_size level from
994 // block_variance_16x16_dual() function.
995 const TxfmSearchParams *txfm_params = &x->txfm_search_params;
996 if (CAP_TX_SIZE_FOR_BSIZE_GT32(txfm_params->tx_mode_search_type, bsize)) {
997 xd->mi[0]->tx_size = TX_SIZE_FOR_BSIZE_GT32;
998 model_skip_for_sb_y_large_64(cpi, bsize, mi_row, mi_col, x, xd, rd_stats,
999 early_term, calculate_rd, best_sse, var_output,
1000 var_prune_threshold);
1001 return;
1002 }
1003
1004 // Note our transform coeffs are 8 times an orthogonal transform.
1005 // Hence quantizer step is also 8 times. To get effective quantizer
1006 // we need to divide by 8 before sending to modeling function.
1007 unsigned int sse;
1008 struct macroblock_plane *const p = &x->plane[0];
1009 struct macroblockd_plane *const pd = &xd->plane[0];
1010 int test_skip = 1;
1011 unsigned int var;
1012 int sum;
1013
1014 const int bw = b_width_log2_lookup[bsize];
1015 const int bh = b_height_log2_lookup[bsize];
1016 unsigned int sse8x8[256] = { 0 };
1017 int sum8x8[256] = { 0 };
1018 unsigned int var8x8[256] = { 0 };
1019 TX_SIZE tx_size;
1020
1021 // Calculate variance for whole partition, and also save 8x8 blocks' variance
1022 // to be used in following transform skipping test.
1023 block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
1024 4 << bw, 4 << bh, &sse, &sum, 8, sse8x8, sum8x8, var8x8);
1025 var = sse - (unsigned int)(((int64_t)sum * sum) >> (bw + bh + 4));
1026 if (var_output) {
1027 *var_output = var;
1028 if (*var_output > var_prune_threshold) {
1029 return;
1030 }
1031 }
1032
1033 rd_stats->sse = sse;
1034 // Skipping test
1035 *early_term = 0;
1036 tx_size = calculate_tx_size(cpi, bsize, x, var, sse, early_term);
1037 assert(tx_size <= TX_16X16);
1038 // The code below for setting skip flag assumes transform size of at least
1039 // 8x8, so force this lower limit on transform.
1040 if (tx_size < TX_8X8) tx_size = TX_8X8;
1041 xd->mi[0]->tx_size = tx_size;
1042
1043 MB_MODE_INFO *const mi = xd->mi[0];
1044 if (!calculate_rd && cpi->sf.rt_sf.sse_early_term_inter_search &&
1045 early_term_inter_search_with_sse(
1046 cpi->sf.rt_sf.sse_early_term_inter_search, bsize, sse, best_sse,
1047 mi->mode))
1048 test_skip = 0;
1049
1050 if (*early_term) test_skip = 0;
1051
1052 // Evaluate if the partition block is a skippable block in Y plane.
1053 if (test_skip) {
1054 unsigned int sse16x16[64] = { 0 };
1055 int sum16x16[64] = { 0 };
1056 unsigned int var16x16[64] = { 0 };
1057 const unsigned int *sse_tx = sse8x8;
1058 const unsigned int *var_tx = var8x8;
1059 unsigned int num_blks = 1 << (bw + bh - 2);
1060
1061 if (tx_size >= TX_16X16) {
1062 calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
1063 sum16x16);
1064 sse_tx = sse16x16;
1065 var_tx = var16x16;
1066 num_blks = num_blks >> 2;
1067 }
1068 set_early_term_based_on_uv_plane(cpi, x, bsize, xd, mi_row, mi_col,
1069 early_term, num_blks, sse_tx, var_tx, sum,
1070 var, sse);
1071 }
1072 calc_rate_dist_block_param(cpi, x, rd_stats, calculate_rd, early_term, bsize,
1073 sse);
1074 }
1075
model_rd_for_sb_y(const AV1_COMP * const cpi,BLOCK_SIZE bsize,MACROBLOCK * x,MACROBLOCKD * xd,RD_STATS * rd_stats,unsigned int * var_out,int calculate_rd,int * early_term)1076 static void model_rd_for_sb_y(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
1077 MACROBLOCK *x, MACROBLOCKD *xd,
1078 RD_STATS *rd_stats, unsigned int *var_out,
1079 int calculate_rd, int *early_term) {
1080 if (x->force_zeromv_skip_for_blk && early_term != NULL) {
1081 *early_term = 1;
1082 rd_stats->rate = 0;
1083 rd_stats->dist = 0;
1084 rd_stats->sse = 0;
1085 }
1086
1087 // Note our transform coeffs are 8 times an orthogonal transform.
1088 // Hence quantizer step is also 8 times. To get effective quantizer
1089 // we need to divide by 8 before sending to modeling function.
1090 const int ref = xd->mi[0]->ref_frame[0];
1091
1092 assert(bsize < BLOCK_SIZES_ALL);
1093
1094 struct macroblock_plane *const p = &x->plane[0];
1095 struct macroblockd_plane *const pd = &xd->plane[0];
1096 unsigned int sse;
1097 int rate;
1098 int64_t dist;
1099
1100 unsigned int var = cpi->ppi->fn_ptr[bsize].vf(
1101 p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse);
1102 int force_skip = 0;
1103 xd->mi[0]->tx_size = calculate_tx_size(cpi, bsize, x, var, sse, &force_skip);
1104 if (var_out) {
1105 *var_out = var;
1106 }
1107
1108 if (calculate_rd && (!force_skip || ref == INTRA_FRAME)) {
1109 const int bwide = block_size_wide[bsize];
1110 const int bhigh = block_size_high[bsize];
1111 model_rd_with_curvfit(cpi, x, bsize, AOM_PLANE_Y, sse, bwide * bhigh, &rate,
1112 &dist);
1113 } else {
1114 rate = INT_MAX; // this will be overwritten later with block_yrd
1115 dist = INT_MAX;
1116 }
1117 rd_stats->sse = sse;
1118 x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
1119
1120 if (force_skip && ref > INTRA_FRAME) {
1121 rate = 0;
1122 dist = (int64_t)sse << 4;
1123 }
1124
1125 assert(rate >= 0);
1126
1127 rd_stats->skip_txfm = (rate == 0);
1128 rate = AOMMIN(rate, INT_MAX);
1129 rd_stats->rate = rate;
1130 rd_stats->dist = dist;
1131 }
1132
aom_process_hadamard_lp_8x16(MACROBLOCK * x,int max_blocks_high,int max_blocks_wide,int num_4x4_w,int step,int block_step)1133 static INLINE void aom_process_hadamard_lp_8x16(MACROBLOCK *x,
1134 int max_blocks_high,
1135 int max_blocks_wide,
1136 int num_4x4_w, int step,
1137 int block_step) {
1138 struct macroblock_plane *const p = &x->plane[0];
1139 const int bw = 4 * num_4x4_w;
1140 const int num_4x4 = AOMMIN(num_4x4_w, max_blocks_wide);
1141 int block = 0;
1142
1143 for (int r = 0; r < max_blocks_high; r += block_step) {
1144 for (int c = 0; c < num_4x4; c += 2 * block_step) {
1145 const int16_t *src_diff = &p->src_diff[(r * bw + c) << 2];
1146 int16_t *low_coeff = (int16_t *)p->coeff + BLOCK_OFFSET(block);
1147 aom_hadamard_lp_8x8_dual(src_diff, (ptrdiff_t)bw, low_coeff);
1148 block += 2 * step;
1149 }
1150 }
1151 }
1152
1153 #define DECLARE_BLOCK_YRD_BUFFERS() \
1154 DECLARE_ALIGNED(64, tran_low_t, dqcoeff_buf[16 * 16]); \
1155 DECLARE_ALIGNED(64, tran_low_t, qcoeff_buf[16 * 16]); \
1156 DECLARE_ALIGNED(64, tran_low_t, coeff_buf[16 * 16]); \
1157 uint16_t eob[1];
1158
1159 #define DECLARE_BLOCK_YRD_VARS() \
1160 /* When is_tx_8x8_dual_applicable is true, we compute the txfm for the \
1161 * entire bsize and write macroblock_plane::coeff. So low_coeff is kept \
1162 * as a non-const so we can reassign it to macroblock_plane::coeff. */ \
1163 int16_t *low_coeff = (int16_t *)coeff_buf; \
1164 int16_t *const low_qcoeff = (int16_t *)qcoeff_buf; \
1165 int16_t *const low_dqcoeff = (int16_t *)dqcoeff_buf; \
1166 const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; \
1167 const int diff_stride = bw;
1168
1169 #define DECLARE_LOOP_VARS_BLOCK_YRD() \
1170 const int16_t *src_diff = &p->src_diff[(r * diff_stride + c) << 2];
1171
1172 #if CONFIG_AV1_HIGHBITDEPTH
1173 #define DECLARE_BLOCK_YRD_HBD_VARS() \
1174 tran_low_t *const coeff = coeff_buf; \
1175 tran_low_t *const qcoeff = qcoeff_buf; \
1176 tran_low_t *const dqcoeff = dqcoeff_buf;
1177
update_yrd_loop_vars_hbd(MACROBLOCK * x,int * skippable,const int step,const int ncoeffs,tran_low_t * const coeff,tran_low_t * const qcoeff,tran_low_t * const dqcoeff,RD_STATS * this_rdc,int * eob_cost,const int tx_blk_id)1178 static AOM_FORCE_INLINE void update_yrd_loop_vars_hbd(
1179 MACROBLOCK *x, int *skippable, const int step, const int ncoeffs,
1180 tran_low_t *const coeff, tran_low_t *const qcoeff,
1181 tran_low_t *const dqcoeff, RD_STATS *this_rdc, int *eob_cost,
1182 const int tx_blk_id) {
1183 const int is_txfm_skip = (ncoeffs == 0);
1184 *skippable &= is_txfm_skip;
1185 x->txfm_search_info.blk_skip[tx_blk_id] = is_txfm_skip;
1186 *eob_cost += get_msb(ncoeffs + 1);
1187
1188 int64_t dummy;
1189 if (ncoeffs == 1)
1190 this_rdc->rate += (int)abs(qcoeff[0]);
1191 else if (ncoeffs > 1)
1192 this_rdc->rate += aom_satd(qcoeff, step << 4);
1193
1194 this_rdc->dist += av1_block_error(coeff, dqcoeff, step << 4, &dummy) >> 2;
1195 }
1196 #endif
update_yrd_loop_vars(MACROBLOCK * x,int * skippable,const int step,const int ncoeffs,int16_t * const low_coeff,int16_t * const low_qcoeff,int16_t * const low_dqcoeff,RD_STATS * this_rdc,int * eob_cost,const int tx_blk_id)1197 static AOM_FORCE_INLINE void update_yrd_loop_vars(
1198 MACROBLOCK *x, int *skippable, const int step, const int ncoeffs,
1199 int16_t *const low_coeff, int16_t *const low_qcoeff,
1200 int16_t *const low_dqcoeff, RD_STATS *this_rdc, int *eob_cost,
1201 const int tx_blk_id) {
1202 const int is_txfm_skip = (ncoeffs == 0);
1203 *skippable &= is_txfm_skip;
1204 x->txfm_search_info.blk_skip[tx_blk_id] = is_txfm_skip;
1205 *eob_cost += get_msb(ncoeffs + 1);
1206 if (ncoeffs == 1)
1207 this_rdc->rate += (int)abs(low_qcoeff[0]);
1208 else if (ncoeffs > 1)
1209 this_rdc->rate += aom_satd_lp(low_qcoeff, step << 4);
1210
1211 this_rdc->dist += av1_block_error_lp(low_coeff, low_dqcoeff, step << 4) >> 2;
1212 }
1213
1214 /*!\brief Calculates RD Cost using Hadamard transform.
1215 *
1216 * \ingroup nonrd_mode_search
1217 * \callgraph
1218 * \callergraph
1219 * Calculates RD Cost using Hadamard transform. For low bit depth this function
1220 * uses low-precision set of functions (16-bit) and 32 bit for high bit depth
1221 * \param[in] x Pointer to structure holding all the data for
1222 the current macroblock
1223 * \param[in] this_rdc Pointer to calculated RD Cost
1224 * \param[in] skippable Pointer to a flag indicating possible tx skip
1225 * \param[in] bsize Current block size
1226 * \param[in] tx_size Transform size
1227 * \param[in] is_inter_mode Flag to indicate inter mode
1228 *
1229 * \remark Nothing is returned. Instead, calculated RD cost is placed to
1230 * \c this_rdc. \c skippable flag is set if there is no non-zero quantized
1231 * coefficients for Hadamard transform
1232 */
block_yrd(MACROBLOCK * x,RD_STATS * this_rdc,int * skippable,const BLOCK_SIZE bsize,const TX_SIZE tx_size,const int is_inter_mode)1233 static void block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
1234 const BLOCK_SIZE bsize, const TX_SIZE tx_size,
1235 const int is_inter_mode) {
1236 MACROBLOCKD *xd = &x->e_mbd;
1237 const struct macroblockd_plane *pd = &xd->plane[0];
1238 struct macroblock_plane *const p = &x->plane[0];
1239 assert(bsize < BLOCK_SIZES_ALL);
1240 const int num_4x4_w = mi_size_wide[bsize];
1241 const int num_4x4_h = mi_size_high[bsize];
1242 const int step = 1 << (tx_size << 1);
1243 const int block_step = (1 << tx_size);
1244 const int row_step = step * num_4x4_w >> tx_size;
1245 int block = 0;
1246 const int max_blocks_wide =
1247 num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> 5);
1248 const int max_blocks_high =
1249 num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> 5);
1250 int eob_cost = 0;
1251 const int bw = 4 * num_4x4_w;
1252 const int bh = 4 * num_4x4_h;
1253 const int use_hbd = is_cur_buf_hbd(xd);
1254 int num_blk_skip_w = num_4x4_w;
1255 int sh_blk_skip = 0;
1256 if (is_inter_mode) {
1257 num_blk_skip_w = num_4x4_w >> 1;
1258 sh_blk_skip = 1;
1259 }
1260
1261 #if CONFIG_AV1_HIGHBITDEPTH
1262 if (use_hbd) {
1263 aom_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
1264 p->src.stride, pd->dst.buf, pd->dst.stride);
1265 } else {
1266 aom_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
1267 pd->dst.buf, pd->dst.stride);
1268 }
1269 #else
1270 aom_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
1271 pd->dst.buf, pd->dst.stride);
1272 #endif
1273
1274 // Keep the intermediate value on the stack here. Writing directly to
1275 // skippable causes speed regression due to load-and-store issues in
1276 // update_yrd_loop_vars.
1277 int temp_skippable = 1;
1278 this_rdc->dist = 0;
1279 this_rdc->rate = 0;
1280 // For block sizes 8x16 or above, Hadamard txfm of two adjacent 8x8 blocks
1281 // can be done per function call. Hence the call of Hadamard txfm is
1282 // abstracted here for the specified cases.
1283 int is_tx_8x8_dual_applicable =
1284 (tx_size == TX_8X8 && block_size_wide[bsize] >= 16 &&
1285 block_size_high[bsize] >= 8);
1286
1287 #if CONFIG_AV1_HIGHBITDEPTH
1288 // As of now, dual implementation of hadamard txfm is available for low
1289 // bitdepth.
1290 if (use_hbd) is_tx_8x8_dual_applicable = 0;
1291 #endif
1292
1293 if (is_tx_8x8_dual_applicable) {
1294 aom_process_hadamard_lp_8x16(x, max_blocks_high, max_blocks_wide, num_4x4_w,
1295 step, block_step);
1296 }
1297
1298 DECLARE_BLOCK_YRD_BUFFERS()
1299 DECLARE_BLOCK_YRD_VARS()
1300 #if CONFIG_AV1_HIGHBITDEPTH
1301 DECLARE_BLOCK_YRD_HBD_VARS()
1302 #else
1303 (void)use_hbd;
1304 #endif
1305
1306 // Keep track of the row and column of the blocks we use so that we know
1307 // if we are in the unrestricted motion border.
1308 for (int r = 0; r < max_blocks_high; r += block_step) {
1309 for (int c = 0, s = 0; c < max_blocks_wide; c += block_step, s += step) {
1310 DECLARE_LOOP_VARS_BLOCK_YRD()
1311
1312 switch (tx_size) {
1313 #if CONFIG_AV1_HIGHBITDEPTH
1314 case TX_16X16:
1315 if (use_hbd) {
1316 aom_hadamard_16x16(src_diff, diff_stride, coeff);
1317 av1_quantize_fp(coeff, 16 * 16, p->zbin_QTX, p->round_fp_QTX,
1318 p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
1319 dqcoeff, p->dequant_QTX, eob,
1320 // default_scan_fp_16x16_transpose and
1321 // av1_default_iscan_fp_16x16_transpose have to be
1322 // used together.
1323 default_scan_fp_16x16_transpose,
1324 av1_default_iscan_fp_16x16_transpose);
1325 } else {
1326 aom_hadamard_lp_16x16(src_diff, diff_stride, low_coeff);
1327 av1_quantize_lp(low_coeff, 16 * 16, p->round_fp_QTX,
1328 p->quant_fp_QTX, low_qcoeff, low_dqcoeff,
1329 p->dequant_QTX, eob,
1330 // default_scan_lp_16x16_transpose and
1331 // av1_default_iscan_lp_16x16_transpose have to be
1332 // used together.
1333 default_scan_lp_16x16_transpose,
1334 av1_default_iscan_lp_16x16_transpose);
1335 }
1336 break;
1337 case TX_8X8:
1338 if (use_hbd) {
1339 aom_hadamard_8x8(src_diff, diff_stride, coeff);
1340 av1_quantize_fp(
1341 coeff, 8 * 8, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
1342 p->quant_shift_QTX, qcoeff, dqcoeff, p->dequant_QTX, eob,
1343 default_scan_8x8_transpose, av1_default_iscan_8x8_transpose);
1344 } else {
1345 if (is_tx_8x8_dual_applicable) {
1346 // The coeffs are pre-computed for the whole block, so re-assign
1347 // low_coeff to the appropriate location.
1348 const int block_offset = BLOCK_OFFSET(block + s);
1349 low_coeff = (int16_t *)p->coeff + block_offset;
1350 } else {
1351 aom_hadamard_lp_8x8(src_diff, diff_stride, low_coeff);
1352 }
1353 av1_quantize_lp(
1354 low_coeff, 8 * 8, p->round_fp_QTX, p->quant_fp_QTX, low_qcoeff,
1355 low_dqcoeff, p->dequant_QTX, eob,
1356 // default_scan_8x8_transpose and
1357 // av1_default_iscan_8x8_transpose have to be used together.
1358 default_scan_8x8_transpose, av1_default_iscan_8x8_transpose);
1359 }
1360 break;
1361 default:
1362 assert(tx_size == TX_4X4);
1363 // In tx_size=4x4 case, aom_fdct4x4 and aom_fdct4x4_lp generate
1364 // normal coefficients order, so we don't need to change the scan
1365 // order here.
1366 if (use_hbd) {
1367 aom_fdct4x4(src_diff, coeff, diff_stride);
1368 av1_quantize_fp(coeff, 4 * 4, p->zbin_QTX, p->round_fp_QTX,
1369 p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
1370 dqcoeff, p->dequant_QTX, eob, scan_order->scan,
1371 scan_order->iscan);
1372 } else {
1373 aom_fdct4x4_lp(src_diff, low_coeff, diff_stride);
1374 av1_quantize_lp(low_coeff, 4 * 4, p->round_fp_QTX, p->quant_fp_QTX,
1375 low_qcoeff, low_dqcoeff, p->dequant_QTX, eob,
1376 scan_order->scan, scan_order->iscan);
1377 }
1378 break;
1379 #else
1380 case TX_16X16:
1381 aom_hadamard_lp_16x16(src_diff, diff_stride, low_coeff);
1382 av1_quantize_lp(low_coeff, 16 * 16, p->round_fp_QTX, p->quant_fp_QTX,
1383 low_qcoeff, low_dqcoeff, p->dequant_QTX, eob,
1384 default_scan_lp_16x16_transpose,
1385 av1_default_iscan_lp_16x16_transpose);
1386 break;
1387 case TX_8X8:
1388 if (is_tx_8x8_dual_applicable) {
1389 // The coeffs are pre-computed for the whole block, so re-assign
1390 // low_coeff to the appropriate location.
1391 const int block_offset = BLOCK_OFFSET(block + s);
1392 low_coeff = (int16_t *)p->coeff + block_offset;
1393 } else {
1394 aom_hadamard_lp_8x8(src_diff, diff_stride, low_coeff);
1395 }
1396 av1_quantize_lp(low_coeff, 8 * 8, p->round_fp_QTX, p->quant_fp_QTX,
1397 low_qcoeff, low_dqcoeff, p->dequant_QTX, eob,
1398 default_scan_8x8_transpose,
1399 av1_default_iscan_8x8_transpose);
1400 break;
1401 default:
1402 aom_fdct4x4_lp(src_diff, low_coeff, diff_stride);
1403 av1_quantize_lp(low_coeff, 4 * 4, p->round_fp_QTX, p->quant_fp_QTX,
1404 low_qcoeff, low_dqcoeff, p->dequant_QTX, eob,
1405 scan_order->scan, scan_order->iscan);
1406 break;
1407 #endif
1408 }
1409 assert(*eob <= 1024);
1410 #if CONFIG_AV1_HIGHBITDEPTH
1411 if (use_hbd)
1412 update_yrd_loop_vars_hbd(x, &temp_skippable, step, *eob, coeff, qcoeff,
1413 dqcoeff, this_rdc, &eob_cost,
1414 (r * num_blk_skip_w + c) >> sh_blk_skip);
1415 else
1416 #endif
1417 update_yrd_loop_vars(x, &temp_skippable, step, *eob, low_coeff,
1418 low_qcoeff, low_dqcoeff, this_rdc, &eob_cost,
1419 (r * num_blk_skip_w + c) >> sh_blk_skip);
1420 }
1421 block += row_step;
1422 }
1423
1424 this_rdc->skip_txfm = *skippable = temp_skippable;
1425 if (this_rdc->sse < INT64_MAX) {
1426 this_rdc->sse = (this_rdc->sse << 6) >> 2;
1427 if (temp_skippable) {
1428 this_rdc->dist = 0;
1429 this_rdc->dist = this_rdc->sse;
1430 return;
1431 }
1432 }
1433
1434 // If skippable is set, rate gets clobbered later.
1435 this_rdc->rate <<= (2 + AV1_PROB_COST_SHIFT);
1436 this_rdc->rate += (eob_cost << AV1_PROB_COST_SHIFT);
1437 }
1438
1439 // Explicitly enumerate the cases so the compiler can generate SIMD for the
1440 // function. According to the disassembler, gcc generates SSE codes for each of
1441 // the possible block sizes. The hottest case is tx_width 16, which takes up
1442 // about 8% of the self cycle of av1_nonrd_pick_inter_mode_sb. Since
1443 // av1_nonrd_pick_inter_mode_sb takes up about 3% of total encoding time, the
1444 // potential room of improvement for writing AVX2 optimization is only 3% * 8% =
1445 // 0.24% of total encoding time.
scale_square_buf_vals(int16_t * dst,const int tx_width,const int16_t * src,const int src_stride)1446 static AOM_INLINE void scale_square_buf_vals(int16_t *dst, const int tx_width,
1447 const int16_t *src,
1448 const int src_stride) {
1449 #define DO_SCALING \
1450 do { \
1451 for (int idy = 0; idy < tx_width; ++idy) { \
1452 for (int idx = 0; idx < tx_width; ++idx) { \
1453 dst[idy * tx_width + idx] = src[idy * src_stride + idx] * 8; \
1454 } \
1455 } \
1456 } while (0)
1457
1458 if (tx_width == 4) {
1459 DO_SCALING;
1460 } else if (tx_width == 8) {
1461 DO_SCALING;
1462 } else if (tx_width == 16) {
1463 DO_SCALING;
1464 } else {
1465 assert(0);
1466 }
1467
1468 #undef DO_SCALING
1469 }
1470
1471 /*!\brief Calculates RD Cost when the block uses Identity transform.
1472 * Note that thie function is only for low bit depth encoding, since it
1473 * is called in real-time mode for now, which sets high bit depth to 0:
1474 * -DCONFIG_AV1_HIGHBITDEPTH=0
1475 *
1476 * \ingroup nonrd_mode_search
1477 * \callgraph
1478 * \callergraph
1479 * Calculates RD Cost. For low bit depth this function
1480 * uses low-precision set of functions (16-bit) and 32 bit for high bit depth
1481 * \param[in] x Pointer to structure holding all the data for
1482 the current macroblock
1483 * \param[in] this_rdc Pointer to calculated RD Cost
1484 * \param[in] skippable Pointer to a flag indicating possible tx skip
1485 * \param[in] bsize Current block size
1486 * \param[in] tx_size Transform size
1487 *
1488 * \remark Nothing is returned. Instead, calculated RD cost is placed to
1489 * \c this_rdc. \c skippable flag is set if all coefficients are zero.
1490 */
block_yrd_idtx(MACROBLOCK * x,RD_STATS * this_rdc,int * skippable,const BLOCK_SIZE bsize,const TX_SIZE tx_size)1491 static void block_yrd_idtx(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
1492 const BLOCK_SIZE bsize, const TX_SIZE tx_size) {
1493 MACROBLOCKD *xd = &x->e_mbd;
1494 const struct macroblockd_plane *pd = &xd->plane[0];
1495 struct macroblock_plane *const p = &x->plane[0];
1496 assert(bsize < BLOCK_SIZES_ALL);
1497 const int num_4x4_w = mi_size_wide[bsize];
1498 const int num_4x4_h = mi_size_high[bsize];
1499 const int step = 1 << (tx_size << 1);
1500 const int block_step = (1 << tx_size);
1501 const int max_blocks_wide =
1502 num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> 5);
1503 const int max_blocks_high =
1504 num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> 5);
1505 int eob_cost = 0;
1506 const int bw = 4 * num_4x4_w;
1507 const int bh = 4 * num_4x4_h;
1508 const int num_blk_skip_w = num_4x4_w >> 1;
1509 const int sh_blk_skip = 1;
1510 // Keep the intermediate value on the stack here. Writing directly to
1511 // skippable causes speed regression due to load-and-store issues in
1512 // update_yrd_loop_vars.
1513 int temp_skippable = 1;
1514 int tx_wd = 0;
1515 switch (tx_size) {
1516 case TX_64X64:
1517 assert(0); // Not implemented
1518 break;
1519 case TX_32X32:
1520 assert(0); // Not used
1521 break;
1522 case TX_16X16: tx_wd = 16; break;
1523 case TX_8X8: tx_wd = 8; break;
1524 default:
1525 assert(tx_size == TX_4X4);
1526 tx_wd = 4;
1527 break;
1528 }
1529 this_rdc->dist = 0;
1530 this_rdc->rate = 0;
1531 aom_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
1532 pd->dst.buf, pd->dst.stride);
1533 // Keep track of the row and column of the blocks we use so that we know
1534 // if we are in the unrestricted motion border.
1535 DECLARE_BLOCK_YRD_BUFFERS()
1536 DECLARE_BLOCK_YRD_VARS()
1537 for (int r = 0; r < max_blocks_high; r += block_step) {
1538 for (int c = 0, s = 0; c < max_blocks_wide; c += block_step, s += step) {
1539 DECLARE_LOOP_VARS_BLOCK_YRD()
1540 scale_square_buf_vals(low_coeff, tx_wd, src_diff, diff_stride);
1541 av1_quantize_lp(low_coeff, tx_wd * tx_wd, p->round_fp_QTX,
1542 p->quant_fp_QTX, low_qcoeff, low_dqcoeff, p->dequant_QTX,
1543 eob, scan_order->scan, scan_order->iscan);
1544 assert(*eob <= 1024);
1545 update_yrd_loop_vars(x, &temp_skippable, step, *eob, low_coeff,
1546 low_qcoeff, low_dqcoeff, this_rdc, &eob_cost,
1547 (r * num_blk_skip_w + c) >> sh_blk_skip);
1548 }
1549 }
1550 this_rdc->skip_txfm = *skippable = temp_skippable;
1551 if (this_rdc->sse < INT64_MAX) {
1552 this_rdc->sse = (this_rdc->sse << 6) >> 2;
1553 if (temp_skippable) {
1554 this_rdc->dist = 0;
1555 this_rdc->dist = this_rdc->sse;
1556 return;
1557 }
1558 }
1559 // If skippable is set, rate gets clobbered later.
1560 this_rdc->rate <<= (2 + AV1_PROB_COST_SHIFT);
1561 this_rdc->rate += (eob_cost << AV1_PROB_COST_SHIFT);
1562 }
1563
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE pred_mode,MV_REFERENCE_FRAME ref_frame0,MV_REFERENCE_FRAME ref_frame1,const AV1_COMMON * cm)1564 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE pred_mode,
1565 MV_REFERENCE_FRAME ref_frame0,
1566 MV_REFERENCE_FRAME ref_frame1,
1567 const AV1_COMMON *cm) {
1568 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1569 mbmi->ref_mv_idx = 0;
1570 mbmi->mode = pred_mode;
1571 mbmi->uv_mode = UV_DC_PRED;
1572 mbmi->ref_frame[0] = ref_frame0;
1573 mbmi->ref_frame[1] = ref_frame1;
1574 pmi->palette_size[0] = 0;
1575 pmi->palette_size[1] = 0;
1576 mbmi->filter_intra_mode_info.use_filter_intra = 0;
1577 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
1578 mbmi->motion_mode = SIMPLE_TRANSLATION;
1579 mbmi->num_proj_ref = 1;
1580 mbmi->interintra_mode = 0;
1581 set_default_interp_filters(mbmi, cm->features.interp_filter);
1582 }
1583
1584 #if CONFIG_INTERNAL_STATS
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index)1585 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1586 int mode_index) {
1587 #else
1588 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
1589 #endif // CONFIG_INTERNAL_STATS
1590 MACROBLOCKD *const xd = &x->e_mbd;
1591 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1592
1593 // Take a snapshot of the coding context so it can be
1594 // restored if we decide to encode this way
1595 ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
1596
1597 ctx->skippable = txfm_info->skip_txfm;
1598 #if CONFIG_INTERNAL_STATS
1599 ctx->best_mode_index = mode_index;
1600 #endif // CONFIG_INTERNAL_STATS
1601 ctx->mic = *xd->mi[0];
1602 ctx->skippable = txfm_info->skip_txfm;
1603 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
1604 av1_ref_frame_type(xd->mi[0]->ref_frame));
1605 }
1606
1607 static int get_pred_buffer(PRED_BUFFER *p, int len) {
1608 for (int i = 0; i < len; i++) {
1609 if (!p[i].in_use) {
1610 p[i].in_use = 1;
1611 return i;
1612 }
1613 }
1614 return -1;
1615 }
1616
1617 static void free_pred_buffer(PRED_BUFFER *p) {
1618 if (p != NULL) p->in_use = 0;
1619 }
1620
1621 static INLINE int get_drl_cost(const PREDICTION_MODE this_mode,
1622 const int ref_mv_idx,
1623 const MB_MODE_INFO_EXT *mbmi_ext,
1624 const int (*const drl_mode_cost0)[2],
1625 int8_t ref_frame_type) {
1626 int cost = 0;
1627 if (this_mode == NEWMV || this_mode == NEW_NEWMV) {
1628 for (int idx = 0; idx < 2; ++idx) {
1629 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1630 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1631 cost += drl_mode_cost0[drl_ctx][ref_mv_idx != idx];
1632 if (ref_mv_idx == idx) return cost;
1633 }
1634 }
1635 return cost;
1636 }
1637
1638 if (have_nearmv_in_inter_mode(this_mode)) {
1639 for (int idx = 1; idx < 3; ++idx) {
1640 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1641 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1642 cost += drl_mode_cost0[drl_ctx][ref_mv_idx != (idx - 1)];
1643 if (ref_mv_idx == (idx - 1)) return cost;
1644 }
1645 }
1646 return cost;
1647 }
1648 return cost;
1649 }
1650
1651 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
1652 int16_t mode_context) {
1653 if (is_inter_compound_mode(mode)) {
1654 return mode_costs
1655 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
1656 }
1657
1658 int mode_cost = 0;
1659 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
1660
1661 assert(is_inter_mode(mode));
1662
1663 if (mode == NEWMV) {
1664 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
1665 return mode_cost;
1666 } else {
1667 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
1668 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
1669
1670 if (mode == GLOBALMV) {
1671 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
1672 return mode_cost;
1673 } else {
1674 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
1675 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
1676 mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
1677 return mode_cost;
1678 }
1679 }
1680 }
1681
1682 static void newmv_diff_bias(MACROBLOCKD *xd, PREDICTION_MODE this_mode,
1683 RD_STATS *this_rdc, BLOCK_SIZE bsize, int mv_row,
1684 int mv_col, int speed, uint32_t spatial_variance,
1685 CONTENT_STATE_SB content_state_sb) {
1686 // Bias against MVs associated with NEWMV mode that are very different from
1687 // top/left neighbors.
1688 if (this_mode == NEWMV) {
1689 int al_mv_average_row;
1690 int al_mv_average_col;
1691 int row_diff, col_diff;
1692 int above_mv_valid = 0;
1693 int left_mv_valid = 0;
1694 int above_row = INVALID_MV_ROW_COL, above_col = INVALID_MV_ROW_COL;
1695 int left_row = INVALID_MV_ROW_COL, left_col = INVALID_MV_ROW_COL;
1696 if (bsize >= BLOCK_64X64 && content_state_sb.source_sad_nonrd != kHighSad &&
1697 spatial_variance < 300 &&
1698 (mv_row > 16 || mv_row < -16 || mv_col > 16 || mv_col < -16)) {
1699 this_rdc->rdcost = this_rdc->rdcost << 2;
1700 return;
1701 }
1702 if (xd->above_mbmi) {
1703 above_mv_valid = xd->above_mbmi->mv[0].as_int != INVALID_MV;
1704 above_row = xd->above_mbmi->mv[0].as_mv.row;
1705 above_col = xd->above_mbmi->mv[0].as_mv.col;
1706 }
1707 if (xd->left_mbmi) {
1708 left_mv_valid = xd->left_mbmi->mv[0].as_int != INVALID_MV;
1709 left_row = xd->left_mbmi->mv[0].as_mv.row;
1710 left_col = xd->left_mbmi->mv[0].as_mv.col;
1711 }
1712 if (above_mv_valid && left_mv_valid) {
1713 al_mv_average_row = (above_row + left_row + 1) >> 1;
1714 al_mv_average_col = (above_col + left_col + 1) >> 1;
1715 } else if (above_mv_valid) {
1716 al_mv_average_row = above_row;
1717 al_mv_average_col = above_col;
1718 } else if (left_mv_valid) {
1719 al_mv_average_row = left_row;
1720 al_mv_average_col = left_col;
1721 } else {
1722 al_mv_average_row = al_mv_average_col = 0;
1723 }
1724 row_diff = al_mv_average_row - mv_row;
1725 col_diff = al_mv_average_col - mv_col;
1726 if (row_diff > 80 || row_diff < -80 || col_diff > 80 || col_diff < -80) {
1727 if (bsize >= BLOCK_32X32)
1728 this_rdc->rdcost = this_rdc->rdcost << 1;
1729 else
1730 this_rdc->rdcost = 5 * this_rdc->rdcost >> 2;
1731 }
1732 } else {
1733 // Bias for speed >= 8 for low spatial variance.
1734 if (speed >= 8 && spatial_variance < 150 &&
1735 (mv_row > 64 || mv_row < -64 || mv_col > 64 || mv_col < -64))
1736 this_rdc->rdcost = 5 * this_rdc->rdcost >> 2;
1737 }
1738 }
1739
1740 static int64_t model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
1741 MACROBLOCK *x, MACROBLOCKD *xd,
1742 RD_STATS *this_rdc, int start_plane,
1743 int stop_plane) {
1744 // Note our transform coeffs are 8 times an orthogonal transform.
1745 // Hence quantizer step is also 8 times. To get effective quantizer
1746 // we need to divide by 8 before sending to modeling function.
1747 unsigned int sse;
1748 int rate;
1749 int64_t dist;
1750 int i;
1751 int64_t tot_sse = 0;
1752
1753 this_rdc->rate = 0;
1754 this_rdc->dist = 0;
1755 this_rdc->skip_txfm = 0;
1756
1757 for (i = start_plane; i <= stop_plane; ++i) {
1758 struct macroblock_plane *const p = &x->plane[i];
1759 struct macroblockd_plane *const pd = &xd->plane[i];
1760 const uint32_t dc_quant = p->dequant_QTX[0];
1761 const uint32_t ac_quant = p->dequant_QTX[1];
1762 const BLOCK_SIZE bs = plane_bsize;
1763 unsigned int var;
1764 if (!x->color_sensitivity[i - 1]) continue;
1765
1766 var = cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
1767 pd->dst.stride, &sse);
1768 assert(sse >= var);
1769 tot_sse += sse;
1770
1771 av1_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
1772 dc_quant >> 3, &rate, &dist);
1773
1774 this_rdc->rate += rate >> 1;
1775 this_rdc->dist += dist << 3;
1776
1777 av1_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs], ac_quant >> 3,
1778 &rate, &dist);
1779
1780 this_rdc->rate += rate;
1781 this_rdc->dist += dist << 4;
1782 }
1783
1784 if (this_rdc->rate == 0) {
1785 this_rdc->skip_txfm = 1;
1786 }
1787
1788 if (RDCOST(x->rdmult, this_rdc->rate, this_rdc->dist) >=
1789 RDCOST(x->rdmult, 0, tot_sse << 4)) {
1790 this_rdc->rate = 0;
1791 this_rdc->dist = tot_sse << 4;
1792 this_rdc->skip_txfm = 1;
1793 }
1794
1795 return tot_sse;
1796 }
1797
1798 /*!\cond */
1799 struct estimate_block_intra_args {
1800 AV1_COMP *cpi;
1801 MACROBLOCK *x;
1802 PREDICTION_MODE mode;
1803 int skippable;
1804 RD_STATS *rdc;
1805 };
1806 /*!\endcond */
1807
1808 /*!\brief Estimation of RD cost of an intra mode for Non-RD optimized case.
1809 *
1810 * \ingroup nonrd_mode_search
1811 * \callgraph
1812 * \callergraph
1813 * Calculates RD Cost for an intra mode for a single TX block using Hadamard
1814 * transform.
1815 * \param[in] plane Color plane
1816 * \param[in] block Index of a TX block in a prediction block
1817 * \param[in] row Row of a current TX block
1818 * \param[in] col Column of a current TX block
1819 * \param[in] plane_bsize Block size of a current prediction block
1820 * \param[in] tx_size Transform size
1821 * \param[in] arg Pointer to a structure that holds parameters
1822 * for intra mode search
1823 *
1824 * \remark Nothing is returned. Instead, best mode and RD Cost of the best mode
1825 * are set in \c args->rdc and \c args->mode
1826 */
1827 static void estimate_block_intra(int plane, int block, int row, int col,
1828 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
1829 void *arg) {
1830 struct estimate_block_intra_args *const args = arg;
1831 AV1_COMP *const cpi = args->cpi;
1832 AV1_COMMON *const cm = &cpi->common;
1833 MACROBLOCK *const x = args->x;
1834 MACROBLOCKD *const xd = &x->e_mbd;
1835 struct macroblock_plane *const p = &x->plane[plane];
1836 struct macroblockd_plane *const pd = &xd->plane[plane];
1837 const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size];
1838 uint8_t *const src_buf_base = p->src.buf;
1839 uint8_t *const dst_buf_base = pd->dst.buf;
1840 const int64_t src_stride = p->src.stride;
1841 const int64_t dst_stride = pd->dst.stride;
1842 RD_STATS this_rdc;
1843
1844 (void)block;
1845 (void)plane_bsize;
1846
1847 av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size);
1848 av1_invalid_rd_stats(&this_rdc);
1849
1850 p->src.buf = &src_buf_base[4 * (row * src_stride + col)];
1851 pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)];
1852
1853 if (plane == 0) {
1854 block_yrd(x, &this_rdc, &args->skippable, bsize_tx,
1855 AOMMIN(tx_size, TX_16X16), 0);
1856 } else {
1857 model_rd_for_sb_uv(cpi, bsize_tx, x, xd, &this_rdc, plane, plane);
1858 }
1859
1860 p->src.buf = src_buf_base;
1861 pd->dst.buf = dst_buf_base;
1862 args->rdc->rate += this_rdc.rate;
1863 args->rdc->dist += this_rdc.dist;
1864 }
1865
1866 static INLINE void update_thresh_freq_fact(AV1_COMP *cpi, MACROBLOCK *x,
1867 BLOCK_SIZE bsize,
1868 MV_REFERENCE_FRAME ref_frame,
1869 THR_MODES best_mode_idx,
1870 PREDICTION_MODE mode) {
1871 const THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)];
1872 const BLOCK_SIZE min_size = AOMMAX(bsize - 3, BLOCK_4X4);
1873 const BLOCK_SIZE max_size = AOMMIN(bsize + 6, BLOCK_128X128);
1874 for (BLOCK_SIZE bs = min_size; bs <= max_size; bs += 3) {
1875 int *freq_fact = &x->thresh_freq_fact[bs][thr_mode_idx];
1876 if (thr_mode_idx == best_mode_idx) {
1877 *freq_fact -= (*freq_fact >> 4);
1878 } else {
1879 *freq_fact =
1880 AOMMIN(*freq_fact + RD_THRESH_INC,
1881 cpi->sf.inter_sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
1882 }
1883 }
1884 }
1885
1886 #if CONFIG_AV1_TEMPORAL_DENOISING
1887 static void av1_pickmode_ctx_den_update(
1888 AV1_PICKMODE_CTX_DEN *ctx_den, int64_t zero_last_cost_orig,
1889 unsigned int ref_frame_cost[REF_FRAMES],
1890 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES], int reuse_inter_pred,
1891 BEST_PICKMODE *bp) {
1892 ctx_den->zero_last_cost_orig = zero_last_cost_orig;
1893 ctx_den->ref_frame_cost = ref_frame_cost;
1894 ctx_den->frame_mv = frame_mv;
1895 ctx_den->reuse_inter_pred = reuse_inter_pred;
1896 ctx_den->best_tx_size = bp->best_tx_size;
1897 ctx_den->best_mode = bp->best_mode;
1898 ctx_den->best_ref_frame = bp->best_ref_frame;
1899 ctx_den->best_pred_filter = bp->best_pred_filter;
1900 ctx_den->best_mode_skip_txfm = bp->best_mode_skip_txfm;
1901 }
1902
1903 static void recheck_zeromv_after_denoising(
1904 AV1_COMP *cpi, MB_MODE_INFO *const mi, MACROBLOCK *x, MACROBLOCKD *const xd,
1905 AV1_DENOISER_DECISION decision, AV1_PICKMODE_CTX_DEN *ctx_den,
1906 struct buf_2d yv12_mb[4][MAX_MB_PLANE], RD_STATS *best_rdc,
1907 BEST_PICKMODE *best_pickmode, BLOCK_SIZE bsize, int mi_row, int mi_col) {
1908 // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on
1909 // denoised result. Only do this under noise conditions, and if rdcost of
1910 // ZEROMV on original source is not significantly higher than rdcost of best
1911 // mode.
1912 if (cpi->noise_estimate.enabled && cpi->noise_estimate.level > kLow &&
1913 ctx_den->zero_last_cost_orig < (best_rdc->rdcost << 3) &&
1914 ((ctx_den->best_ref_frame == INTRA_FRAME && decision >= FILTER_BLOCK) ||
1915 (ctx_den->best_ref_frame == GOLDEN_FRAME &&
1916 cpi->svc.number_spatial_layers == 1 &&
1917 decision == FILTER_ZEROMV_BLOCK))) {
1918 // Check if we should pick ZEROMV on denoised signal.
1919 AV1_COMMON *const cm = &cpi->common;
1920 RD_STATS this_rdc;
1921 const ModeCosts *mode_costs = &x->mode_costs;
1922 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1923 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1924
1925 mi->mode = GLOBALMV;
1926 mi->ref_frame[0] = LAST_FRAME;
1927 mi->ref_frame[1] = NONE_FRAME;
1928 set_ref_ptrs(cm, xd, mi->ref_frame[0], NONE_FRAME);
1929 mi->mv[0].as_int = 0;
1930 mi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
1931 xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0];
1932 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
1933 unsigned int var;
1934 model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc, &var, 1, NULL);
1935
1936 const int16_t mode_ctx =
1937 av1_mode_context_analyzer(mbmi_ext->mode_context, mi->ref_frame);
1938 this_rdc.rate += cost_mv_ref(mode_costs, GLOBALMV, mode_ctx);
1939
1940 this_rdc.rate += ctx_den->ref_frame_cost[LAST_FRAME];
1941 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
1942 txfm_info->skip_txfm = this_rdc.skip_txfm;
1943 // Don't switch to ZEROMV if the rdcost for ZEROMV on denoised source
1944 // is higher than best_ref mode (on original source).
1945 if (this_rdc.rdcost > best_rdc->rdcost) {
1946 this_rdc = *best_rdc;
1947 mi->mode = best_pickmode->best_mode;
1948 mi->ref_frame[0] = best_pickmode->best_ref_frame;
1949 set_ref_ptrs(cm, xd, mi->ref_frame[0], NONE_FRAME);
1950 mi->interp_filters = best_pickmode->best_pred_filter;
1951 if (best_pickmode->best_ref_frame == INTRA_FRAME) {
1952 mi->mv[0].as_int = INVALID_MV;
1953 } else {
1954 mi->mv[0].as_int = ctx_den
1955 ->frame_mv[best_pickmode->best_mode]
1956 [best_pickmode->best_ref_frame]
1957 .as_int;
1958 if (ctx_den->reuse_inter_pred) {
1959 xd->plane[0].pre[0] = yv12_mb[GOLDEN_FRAME][0];
1960 av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
1961 }
1962 }
1963 mi->tx_size = best_pickmode->best_tx_size;
1964 txfm_info->skip_txfm = best_pickmode->best_mode_skip_txfm;
1965 } else {
1966 ctx_den->best_ref_frame = LAST_FRAME;
1967 *best_rdc = this_rdc;
1968 }
1969 }
1970 }
1971 #endif // CONFIG_AV1_TEMPORAL_DENOISING
1972
1973 #define FILTER_SEARCH_SIZE 2
1974
1975 /*!\brief Searches for the best interpolation filter
1976 *
1977 * \ingroup nonrd_mode_search
1978 * \callgraph
1979 * \callergraph
1980 * Iterates through subset of possible interpolation filters (EIGHTTAP_REGULAR,
1981 * EIGTHTAP_SMOOTH, MULTITAP_SHARP, depending on FILTER_SEARCH_SIZE) and selects
1982 * the one that gives lowest RD cost. RD cost is calculated using curvfit model.
1983 * Support for dual filters (different filters in the x & y directions) is
1984 * allowed if sf.interp_sf.disable_dual_filter = 0.
1985 *
1986 * \param[in] cpi Top-level encoder structure
1987 * \param[in] x Pointer to structure holding all the
1988 * data for the current macroblock
1989 * \param[in] this_rdc Pointer to calculated RD Cost
1990 * \param[in] inter_pred_params_sr Pointer to structure holding parameters of
1991 inter prediction for single reference
1992 * \param[in] mi_row Row index in 4x4 units
1993 * \param[in] mi_col Column index in 4x4 units
1994 * \param[in] tmp_buffer Pointer to a temporary buffer for
1995 * prediction re-use
1996 * \param[in] bsize Current block size
1997 * \param[in] reuse_inter_pred Flag, indicating prediction re-use
1998 * \param[out] this_mode_pred Pointer to store prediction buffer
1999 * for prediction re-use
2000 * \param[out] this_early_term Flag, indicating that transform can be
2001 * skipped
2002 * \param[out] var The residue variance of the current
2003 * predictor.
2004 * \param[in] use_model_yrd_large Flag, indicating special logic to handle
2005 * large blocks
2006 * \param[in] best_sse Best sse so far.
2007 * \param[in] comp_pred Flag, indicating compound mode.
2008 *
2009 * \remark Nothing is returned. Instead, calculated RD cost is placed to
2010 * \c this_rdc and best filter is placed to \c mi->interp_filters. In case
2011 * \c reuse_inter_pred flag is set, this function also outputs
2012 * \c this_mode_pred. Also \c this_early_temp is set if transform can be
2013 * skipped
2014 */
2015 static void search_filter_ref(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *this_rdc,
2016 InterPredParams *inter_pred_params_sr, int mi_row,
2017 int mi_col, PRED_BUFFER *tmp_buffer,
2018 BLOCK_SIZE bsize, int reuse_inter_pred,
2019 PRED_BUFFER **this_mode_pred,
2020 int *this_early_term, unsigned int *var,
2021 int use_model_yrd_large, int64_t best_sse,
2022 int comp_pred) {
2023 AV1_COMMON *const cm = &cpi->common;
2024 MACROBLOCKD *const xd = &x->e_mbd;
2025 struct macroblockd_plane *const pd = &xd->plane[0];
2026 MB_MODE_INFO *const mi = xd->mi[0];
2027 const int bw = block_size_wide[bsize];
2028 int dim_factor =
2029 (cpi->sf.interp_sf.disable_dual_filter == 0) ? FILTER_SEARCH_SIZE : 1;
2030 RD_STATS pf_rd_stats[FILTER_SEARCH_SIZE * FILTER_SEARCH_SIZE] = { 0 };
2031 TX_SIZE pf_tx_size[FILTER_SEARCH_SIZE * FILTER_SEARCH_SIZE] = { 0 };
2032 PRED_BUFFER *current_pred = *this_mode_pred;
2033 int best_skip = 0;
2034 int best_early_term = 0;
2035 int64_t best_cost = INT64_MAX;
2036 int best_filter_index = -1;
2037
2038 SubpelParams subpel_params;
2039 // Initialize inter prediction params at mode level for single reference
2040 // mode.
2041 if (!comp_pred)
2042 init_inter_mode_params(&mi->mv[0].as_mv, inter_pred_params_sr,
2043 &subpel_params, xd->block_ref_scale_factors[0],
2044 pd->pre->width, pd->pre->height);
2045 for (int i = 0; i < FILTER_SEARCH_SIZE * FILTER_SEARCH_SIZE; ++i) {
2046 int64_t cost;
2047 if (cpi->sf.interp_sf.disable_dual_filter &&
2048 filters_ref_set[i].filter_x != filters_ref_set[i].filter_y)
2049 continue;
2050 mi->interp_filters.as_filters.x_filter = filters_ref_set[i].filter_x;
2051 mi->interp_filters.as_filters.y_filter = filters_ref_set[i].filter_y;
2052 if (!comp_pred)
2053 av1_enc_build_inter_predictor_y_nonrd(xd, inter_pred_params_sr,
2054 &subpel_params);
2055 else
2056 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
2057 unsigned int curr_var = UINT_MAX;
2058 if (use_model_yrd_large)
2059 model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd,
2060 &pf_rd_stats[i], this_early_term, 1, best_sse,
2061 &curr_var, UINT_MAX);
2062 else
2063 model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], &curr_var, 1, NULL);
2064 pf_rd_stats[i].rate += av1_get_switchable_rate(
2065 x, xd, cm->features.interp_filter, cm->seq_params->enable_dual_filter);
2066 cost = RDCOST(x->rdmult, pf_rd_stats[i].rate, pf_rd_stats[i].dist);
2067 pf_tx_size[i] = mi->tx_size;
2068 if (cost < best_cost) {
2069 *var = curr_var;
2070 best_filter_index = i;
2071 best_cost = cost;
2072 best_skip = pf_rd_stats[i].skip_txfm;
2073 best_early_term = *this_early_term;
2074 if (reuse_inter_pred) {
2075 if (*this_mode_pred != current_pred) {
2076 free_pred_buffer(*this_mode_pred);
2077 *this_mode_pred = current_pred;
2078 }
2079 current_pred = &tmp_buffer[get_pred_buffer(tmp_buffer, 3)];
2080 pd->dst.buf = current_pred->data;
2081 pd->dst.stride = bw;
2082 }
2083 }
2084 }
2085 assert(best_filter_index >= 0 &&
2086 best_filter_index < dim_factor * FILTER_SEARCH_SIZE);
2087 if (reuse_inter_pred && *this_mode_pred != current_pred)
2088 free_pred_buffer(current_pred);
2089
2090 mi->interp_filters.as_filters.x_filter =
2091 filters_ref_set[best_filter_index].filter_x;
2092 mi->interp_filters.as_filters.y_filter =
2093 filters_ref_set[best_filter_index].filter_y;
2094 mi->tx_size = pf_tx_size[best_filter_index];
2095 this_rdc->rate = pf_rd_stats[best_filter_index].rate;
2096 this_rdc->dist = pf_rd_stats[best_filter_index].dist;
2097 this_rdc->sse = pf_rd_stats[best_filter_index].sse;
2098 this_rdc->skip_txfm = (best_skip || best_early_term);
2099 *this_early_term = best_early_term;
2100 if (reuse_inter_pred) {
2101 pd->dst.buf = (*this_mode_pred)->data;
2102 pd->dst.stride = (*this_mode_pred)->stride;
2103 } else if (best_filter_index < dim_factor * FILTER_SEARCH_SIZE - 1) {
2104 if (!comp_pred)
2105 av1_enc_build_inter_predictor_y_nonrd(xd, inter_pred_params_sr,
2106 &subpel_params);
2107 else
2108 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
2109 }
2110 }
2111 #if !CONFIG_REALTIME_ONLY
2112 #define MOTION_MODE_SEARCH_SIZE 2
2113
2114 static AOM_INLINE int is_warped_mode_allowed(const AV1_COMP *cpi,
2115 MACROBLOCK *const x,
2116 const MB_MODE_INFO *mbmi) {
2117 const FeatureFlags *const features = &cpi->common.features;
2118 const MACROBLOCKD *xd = &x->e_mbd;
2119
2120 if (cpi->sf.inter_sf.extra_prune_warped) return 0;
2121 if (has_second_ref(mbmi)) return 0;
2122 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
2123
2124 if (features->switchable_motion_mode) {
2125 // Determine which motion modes to search if more than SIMPLE_TRANSLATION
2126 // is allowed.
2127 last_motion_mode_allowed = motion_mode_allowed(
2128 xd->global_motion, xd, mbmi, features->allow_warped_motion);
2129 }
2130
2131 if (last_motion_mode_allowed == WARPED_CAUSAL) {
2132 return 1;
2133 }
2134
2135 return 0;
2136 }
2137
2138 static void calc_num_proj_ref(AV1_COMP *cpi, MACROBLOCK *x, MB_MODE_INFO *mi) {
2139 AV1_COMMON *const cm = &cpi->common;
2140 MACROBLOCKD *const xd = &x->e_mbd;
2141 const FeatureFlags *const features = &cm->features;
2142
2143 mi->num_proj_ref = 1;
2144 WARP_SAMPLE_INFO *const warp_sample_info =
2145 &x->warp_sample_info[mi->ref_frame[0]];
2146 int *pts0 = warp_sample_info->pts;
2147 int *pts_inref0 = warp_sample_info->pts_inref;
2148 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
2149
2150 if (features->switchable_motion_mode) {
2151 // Determine which motion modes to search if more than SIMPLE_TRANSLATION
2152 // is allowed.
2153 last_motion_mode_allowed = motion_mode_allowed(
2154 xd->global_motion, xd, mi, features->allow_warped_motion);
2155 }
2156
2157 if (last_motion_mode_allowed == WARPED_CAUSAL) {
2158 if (warp_sample_info->num < 0) {
2159 warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
2160 }
2161 mi->num_proj_ref = warp_sample_info->num;
2162 }
2163 }
2164
2165 static void search_motion_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *this_rdc,
2166 int mi_row, int mi_col, BLOCK_SIZE bsize,
2167 int *this_early_term, int use_model_yrd_large,
2168 int *rate_mv, int64_t best_sse) {
2169 AV1_COMMON *const cm = &cpi->common;
2170 MACROBLOCKD *const xd = &x->e_mbd;
2171 const FeatureFlags *const features = &cm->features;
2172 MB_MODE_INFO *const mi = xd->mi[0];
2173 RD_STATS pf_rd_stats[MOTION_MODE_SEARCH_SIZE] = { 0 };
2174 int best_skip = 0;
2175 int best_early_term = 0;
2176 int64_t best_cost = INT64_MAX;
2177 int best_mode_index = -1;
2178 const int interp_filter = features->interp_filter;
2179
2180 const MOTION_MODE motion_modes[MOTION_MODE_SEARCH_SIZE] = {
2181 SIMPLE_TRANSLATION, WARPED_CAUSAL
2182 };
2183 int mode_search_size = is_warped_mode_allowed(cpi, x, mi) ? 2 : 1;
2184
2185 WARP_SAMPLE_INFO *const warp_sample_info =
2186 &x->warp_sample_info[mi->ref_frame[0]];
2187 int *pts0 = warp_sample_info->pts;
2188 int *pts_inref0 = warp_sample_info->pts_inref;
2189
2190 const int total_samples = mi->num_proj_ref;
2191 if (total_samples == 0) {
2192 // Do not search WARPED_CAUSAL if there are no samples to use to determine
2193 // warped parameters.
2194 mode_search_size = 1;
2195 }
2196
2197 const MB_MODE_INFO base_mbmi = *mi;
2198 MB_MODE_INFO best_mbmi;
2199
2200 for (int i = 0; i < mode_search_size; ++i) {
2201 int64_t cost = INT64_MAX;
2202 MOTION_MODE motion_mode = motion_modes[i];
2203 *mi = base_mbmi;
2204 mi->motion_mode = motion_mode;
2205 if (motion_mode == SIMPLE_TRANSLATION) {
2206 mi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
2207
2208 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
2209 if (use_model_yrd_large)
2210 model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd,
2211 &pf_rd_stats[i], this_early_term, 1, best_sse,
2212 NULL, UINT_MAX);
2213 else
2214 model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], NULL, 1, NULL);
2215 pf_rd_stats[i].rate +=
2216 av1_get_switchable_rate(x, xd, cm->features.interp_filter,
2217 cm->seq_params->enable_dual_filter);
2218 cost = RDCOST(x->rdmult, pf_rd_stats[i].rate, pf_rd_stats[i].dist);
2219 } else if (motion_mode == WARPED_CAUSAL) {
2220 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
2221 const ModeCosts *mode_costs = &x->mode_costs;
2222 mi->wm_params.wmtype = DEFAULT_WMTYPE;
2223 mi->interp_filters =
2224 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
2225
2226 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
2227 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
2228 // Select the samples according to motion vector difference
2229 if (mi->num_proj_ref > 1) {
2230 mi->num_proj_ref = av1_selectSamples(&mi->mv[0].as_mv, pts, pts_inref,
2231 mi->num_proj_ref, bsize);
2232 }
2233
2234 // Compute the warped motion parameters with a least squares fit
2235 // using the collected samples
2236 if (!av1_find_projection(mi->num_proj_ref, pts, pts_inref, bsize,
2237 mi->mv[0].as_mv.row, mi->mv[0].as_mv.col,
2238 &mi->wm_params, mi_row, mi_col)) {
2239 if (mi->mode == NEWMV) {
2240 const int_mv mv0 = mi->mv[0];
2241 const WarpedMotionParams wm_params0 = mi->wm_params;
2242 const int num_proj_ref0 = mi->num_proj_ref;
2243
2244 const int_mv ref_mv = av1_get_ref_mv(x, 0);
2245 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
2246 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
2247 &ref_mv.as_mv, NULL);
2248
2249 // Refine MV in a small range.
2250 av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
2251 total_samples);
2252 if (mi->mv[0].as_int == ref_mv.as_int) {
2253 continue;
2254 }
2255
2256 if (mv0.as_int != mi->mv[0].as_int) {
2257 // Keep the refined MV and WM parameters.
2258 int tmp_rate_mv = av1_mv_bit_cost(
2259 &mi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
2260 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
2261 *rate_mv = tmp_rate_mv;
2262 } else {
2263 // Restore the old MV and WM parameters.
2264 mi->mv[0] = mv0;
2265 mi->wm_params = wm_params0;
2266 mi->num_proj_ref = num_proj_ref0;
2267 }
2268 }
2269 // Build the warped predictor
2270 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
2271 av1_num_planes(cm) - 1);
2272 if (use_model_yrd_large)
2273 model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd,
2274 &pf_rd_stats[i], this_early_term, 1,
2275 best_sse, NULL, UINT_MAX);
2276 else
2277 model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], NULL, 1, NULL);
2278
2279 pf_rd_stats[i].rate +=
2280 mode_costs->motion_mode_cost[bsize][mi->motion_mode];
2281 cost = RDCOST(x->rdmult, pf_rd_stats[i].rate, pf_rd_stats[i].dist);
2282 } else {
2283 cost = INT64_MAX;
2284 }
2285 }
2286 if (cost < best_cost) {
2287 best_mode_index = i;
2288 best_cost = cost;
2289 best_skip = pf_rd_stats[i].skip_txfm;
2290 best_early_term = *this_early_term;
2291 best_mbmi = *mi;
2292 }
2293 }
2294 assert(best_mode_index >= 0 && best_mode_index < FILTER_SEARCH_SIZE);
2295
2296 *mi = best_mbmi;
2297 this_rdc->rate = pf_rd_stats[best_mode_index].rate;
2298 this_rdc->dist = pf_rd_stats[best_mode_index].dist;
2299 this_rdc->sse = pf_rd_stats[best_mode_index].sse;
2300 this_rdc->skip_txfm = (best_skip || best_early_term);
2301 *this_early_term = best_early_term;
2302 if (best_mode_index < FILTER_SEARCH_SIZE - 1) {
2303 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
2304 }
2305 }
2306 #endif // !CONFIG_REALTIME_ONLY
2307
2308 #define COLLECT_PICK_MODE_STAT 0
2309 #define COLLECT_NON_SQR_STAT 0
2310
2311 #if COLLECT_PICK_MODE_STAT
2312 #include "aom_ports/aom_timer.h"
2313 typedef struct _mode_search_stat {
2314 int32_t num_blocks[BLOCK_SIZES];
2315 int64_t total_block_times[BLOCK_SIZES];
2316 int32_t num_searches[BLOCK_SIZES][MB_MODE_COUNT];
2317 int32_t num_nonskipped_searches[BLOCK_SIZES][MB_MODE_COUNT];
2318 int64_t search_times[BLOCK_SIZES][MB_MODE_COUNT];
2319 int64_t nonskipped_search_times[BLOCK_SIZES][MB_MODE_COUNT];
2320 int64_t ms_time[BLOCK_SIZES][MB_MODE_COUNT];
2321 int64_t ifs_time[BLOCK_SIZES][MB_MODE_COUNT];
2322 int64_t model_rd_time[BLOCK_SIZES][MB_MODE_COUNT];
2323 int64_t txfm_time[BLOCK_SIZES][MB_MODE_COUNT];
2324 struct aom_usec_timer timer1;
2325 struct aom_usec_timer timer2;
2326 struct aom_usec_timer bsize_timer;
2327 } mode_search_stat;
2328
2329 static mode_search_stat ms_stat;
2330
2331 static AOM_INLINE void print_stage_time(const char *stage_name,
2332 int64_t stage_time,
2333 int64_t total_time) {
2334 printf(" %s: %ld (%f%%)\n", stage_name, stage_time,
2335 100 * stage_time / (float)total_time);
2336 }
2337
2338 static void print_time(const mode_search_stat *const ms_stat,
2339 const BLOCK_SIZE bsize, const int mi_rows,
2340 const int mi_cols, const int mi_row, const int mi_col) {
2341 if ((mi_row + mi_size_high[bsize] >= mi_rows) &&
2342 (mi_col + mi_size_wide[bsize] >= mi_cols)) {
2343 int64_t total_time = 0l;
2344 int32_t total_blocks = 0;
2345 for (BLOCK_SIZE bs = 0; bs < BLOCK_SIZES; bs++) {
2346 total_time += ms_stat->total_block_times[bs];
2347 total_blocks += ms_stat->num_blocks[bs];
2348 }
2349
2350 printf("\n");
2351 for (BLOCK_SIZE bs = 0; bs < BLOCK_SIZES; bs++) {
2352 if (ms_stat->num_blocks[bs] == 0) {
2353 continue;
2354 }
2355 if (!COLLECT_NON_SQR_STAT && block_size_wide[bs] != block_size_high[bs]) {
2356 continue;
2357 }
2358
2359 printf("BLOCK_%dX%d Num %d, Time: %ld (%f%%), Avg_time %f:\n",
2360 block_size_wide[bs], block_size_high[bs], ms_stat->num_blocks[bs],
2361 ms_stat->total_block_times[bs],
2362 100 * ms_stat->total_block_times[bs] / (float)total_time,
2363 (float)ms_stat->total_block_times[bs] / ms_stat->num_blocks[bs]);
2364 for (int j = 0; j < MB_MODE_COUNT; j++) {
2365 if (ms_stat->nonskipped_search_times[bs][j] == 0) {
2366 continue;
2367 }
2368
2369 int64_t total_mode_time = ms_stat->nonskipped_search_times[bs][j];
2370 printf(" Mode %d, %d/%d tps %f\n", j,
2371 ms_stat->num_nonskipped_searches[bs][j],
2372 ms_stat->num_searches[bs][j],
2373 ms_stat->num_nonskipped_searches[bs][j] > 0
2374 ? (float)ms_stat->nonskipped_search_times[bs][j] /
2375 ms_stat->num_nonskipped_searches[bs][j]
2376 : 0l);
2377 if (j >= INTER_MODE_START) {
2378 total_mode_time = ms_stat->ms_time[bs][j] + ms_stat->ifs_time[bs][j] +
2379 ms_stat->model_rd_time[bs][j] +
2380 ms_stat->txfm_time[bs][j];
2381 print_stage_time("Motion Search Time", ms_stat->ms_time[bs][j],
2382 total_time);
2383 print_stage_time("Filter Search Time", ms_stat->ifs_time[bs][j],
2384 total_time);
2385 print_stage_time("Model RD Time", ms_stat->model_rd_time[bs][j],
2386 total_time);
2387 print_stage_time("Tranfm Search Time", ms_stat->txfm_time[bs][j],
2388 total_time);
2389 }
2390 print_stage_time("Total Mode Time", total_mode_time, total_time);
2391 }
2392 printf("\n");
2393 }
2394 printf("Total time = %ld. Total blocks = %d\n", total_time, total_blocks);
2395 }
2396 }
2397 #endif // COLLECT_PICK_MODE_STAT
2398
2399 static void compute_intra_yprediction(const AV1_COMMON *cm,
2400 PREDICTION_MODE mode, BLOCK_SIZE bsize,
2401 MACROBLOCK *x, MACROBLOCKD *xd) {
2402 const SequenceHeader *seq_params = cm->seq_params;
2403 struct macroblockd_plane *const pd = &xd->plane[0];
2404 struct macroblock_plane *const p = &x->plane[0];
2405 uint8_t *const src_buf_base = p->src.buf;
2406 uint8_t *const dst_buf_base = pd->dst.buf;
2407 const int src_stride = p->src.stride;
2408 const int dst_stride = pd->dst.stride;
2409 int plane = 0;
2410 int row, col;
2411 // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
2412 // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
2413 // transform size varies per plane, look it up in a common way.
2414 const TX_SIZE tx_size = max_txsize_lookup[bsize];
2415 const BLOCK_SIZE plane_bsize =
2416 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
2417 // If mb_to_right_edge is < 0 we are in a situation in which
2418 // the current block size extends into the UMV and we won't
2419 // visit the sub blocks that are wholly within the UMV.
2420 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
2421 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
2422 // Keep track of the row and column of the blocks we use so that we know
2423 // if we are in the unrestricted motion border.
2424 for (row = 0; row < max_blocks_high; row += (1 << tx_size)) {
2425 // Skip visiting the sub blocks that are wholly within the UMV.
2426 for (col = 0; col < max_blocks_wide; col += (1 << tx_size)) {
2427 p->src.buf = &src_buf_base[4 * (row * (int64_t)src_stride + col)];
2428 pd->dst.buf = &dst_buf_base[4 * (row * (int64_t)dst_stride + col)];
2429 av1_predict_intra_block(
2430 xd, seq_params->sb_size, seq_params->enable_intra_edge_filter,
2431 block_size_wide[bsize], block_size_high[bsize], tx_size, mode, 0, 0,
2432 FILTER_INTRA_MODES, pd->dst.buf, dst_stride, pd->dst.buf, dst_stride,
2433 0, 0, plane);
2434 }
2435 }
2436 p->src.buf = src_buf_base;
2437 pd->dst.buf = dst_buf_base;
2438 }
2439
2440 void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
2441 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
2442 AV1_COMMON *const cm = &cpi->common;
2443 MACROBLOCKD *const xd = &x->e_mbd;
2444 MB_MODE_INFO *const mi = xd->mi[0];
2445 RD_STATS this_rdc, best_rdc;
2446 struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
2447 const TxfmSearchParams *txfm_params = &x->txfm_search_params;
2448 const TX_SIZE intra_tx_size =
2449 AOMMIN(max_txsize_lookup[bsize],
2450 tx_mode_to_biggest_tx_size[txfm_params->tx_mode_search_type]);
2451 int *bmode_costs;
2452 PREDICTION_MODE best_mode = DC_PRED;
2453 const MB_MODE_INFO *above_mi = xd->above_mbmi;
2454 const MB_MODE_INFO *left_mi = xd->left_mbmi;
2455 const PREDICTION_MODE A = av1_above_block_mode(above_mi);
2456 const PREDICTION_MODE L = av1_left_block_mode(left_mi);
2457 const int above_ctx = intra_mode_context[A];
2458 const int left_ctx = intra_mode_context[L];
2459 bmode_costs = x->mode_costs.y_mode_costs[above_ctx][left_ctx];
2460
2461 av1_invalid_rd_stats(&best_rdc);
2462 av1_invalid_rd_stats(&this_rdc);
2463
2464 init_mbmi(mi, DC_PRED, INTRA_FRAME, NONE_FRAME, cm);
2465 mi->mv[0].as_int = mi->mv[1].as_int = INVALID_MV;
2466
2467 // Change the limit of this loop to add other intra prediction
2468 // mode tests.
2469 for (int i = 0; i < 4; ++i) {
2470 PREDICTION_MODE this_mode = intra_mode_list[i];
2471
2472 // As per the statistics generated for intra mode evaluation in the nonrd
2473 // path, it is found that the probability of H_PRED mode being the winner is
2474 // very less when the best mode so far is V_PRED (out of DC_PRED and
2475 // V_PRED). If V_PRED is the winner mode out of DC_PRED and V_PRED, it could
2476 // imply the presence of a vertically dominant pattern. Hence, H_PRED mode
2477 // is not evaluated.
2478 if (cpi->sf.rt_sf.prune_h_pred_using_best_mode_so_far &&
2479 this_mode == H_PRED && best_mode == V_PRED)
2480 continue;
2481
2482 this_rdc.dist = this_rdc.rate = 0;
2483 args.mode = this_mode;
2484 args.skippable = 1;
2485 args.rdc = &this_rdc;
2486 mi->tx_size = intra_tx_size;
2487 mi->mode = this_mode;
2488 av1_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra,
2489 &args);
2490 const int skip_ctx = av1_get_skip_txfm_context(xd);
2491 if (args.skippable) {
2492 this_rdc.rate = x->mode_costs.skip_txfm_cost[skip_ctx][1];
2493 } else {
2494 this_rdc.rate += x->mode_costs.skip_txfm_cost[skip_ctx][0];
2495 }
2496 this_rdc.rate += bmode_costs[this_mode];
2497 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
2498
2499 if (this_rdc.rdcost < best_rdc.rdcost) {
2500 best_rdc = this_rdc;
2501 best_mode = this_mode;
2502 if (!this_rdc.skip_txfm) {
2503 memset(ctx->blk_skip, 0,
2504 sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
2505 }
2506 }
2507 }
2508
2509 mi->mode = best_mode;
2510 // Keep DC for UV since mode test is based on Y channel only.
2511 mi->uv_mode = UV_DC_PRED;
2512 *rd_cost = best_rdc;
2513
2514 #if CONFIG_INTERNAL_STATS
2515 store_coding_context(x, ctx, mi->mode);
2516 #else
2517 store_coding_context(x, ctx);
2518 #endif // CONFIG_INTERNAL_STATS
2519 }
2520
2521 static AOM_INLINE int is_same_gf_and_last_scale(AV1_COMMON *cm) {
2522 struct scale_factors *const sf_last = get_ref_scale_factors(cm, LAST_FRAME);
2523 struct scale_factors *const sf_golden =
2524 get_ref_scale_factors(cm, GOLDEN_FRAME);
2525 return ((sf_last->x_scale_fp == sf_golden->x_scale_fp) &&
2526 (sf_last->y_scale_fp == sf_golden->y_scale_fp));
2527 }
2528
2529 static AOM_INLINE void get_ref_frame_use_mask(AV1_COMP *cpi, MACROBLOCK *x,
2530 MB_MODE_INFO *mi, int mi_row,
2531 int mi_col, int bsize,
2532 int gf_temporal_ref,
2533 int use_ref_frame[],
2534 int *force_skip_low_temp_var) {
2535 AV1_COMMON *const cm = &cpi->common;
2536 const struct segmentation *const seg = &cm->seg;
2537 const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
2538
2539 // When the ref_frame_config is used to set the reference frame structure
2540 // then the usage of alt_ref is determined by the ref_frame_flags
2541 // (and not the speed feature use_nonrd_altref_frame).
2542 int use_alt_ref_frame = cpi->ppi->rtc_ref.set_ref_frame_config ||
2543 cpi->sf.rt_sf.use_nonrd_altref_frame;
2544
2545 int use_golden_ref_frame = 1;
2546 int use_last_ref_frame = 1;
2547
2548 // When the ref_frame_config is used to set the reference frame structure:
2549 // check if LAST is used as a reference. And only remove golden and altref
2550 // references below if last is used as a reference.
2551 if (cpi->ppi->rtc_ref.set_ref_frame_config)
2552 use_last_ref_frame =
2553 cpi->ref_frame_flags & AOM_LAST_FLAG ? use_last_ref_frame : 0;
2554
2555 // frame_since_golden is not used when user sets the referene structure.
2556 if (!cpi->ppi->rtc_ref.set_ref_frame_config && use_last_ref_frame &&
2557 cpi->rc.frames_since_golden == 0 && gf_temporal_ref) {
2558 use_golden_ref_frame = 0;
2559 }
2560
2561 if (use_last_ref_frame && cpi->sf.rt_sf.short_circuit_low_temp_var &&
2562 x->nonrd_prune_ref_frame_search) {
2563 if (is_small_sb)
2564 *force_skip_low_temp_var = av1_get_force_skip_low_temp_var_small_sb(
2565 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
2566 else
2567 *force_skip_low_temp_var = av1_get_force_skip_low_temp_var(
2568 &x->part_search_info.variance_low[0], mi_row, mi_col, bsize);
2569 // If force_skip_low_temp_var is set, skip golden reference.
2570 if (*force_skip_low_temp_var) {
2571 use_golden_ref_frame = 0;
2572 use_alt_ref_frame = 0;
2573 }
2574 }
2575
2576 if (use_last_ref_frame &&
2577 (x->nonrd_prune_ref_frame_search > 2 || x->force_zeromv_skip_for_blk ||
2578 (x->nonrd_prune_ref_frame_search > 1 && bsize > BLOCK_64X64))) {
2579 use_golden_ref_frame = 0;
2580 use_alt_ref_frame = 0;
2581 }
2582
2583 if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
2584 get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) == GOLDEN_FRAME) {
2585 use_golden_ref_frame = 1;
2586 use_alt_ref_frame = 0;
2587 }
2588
2589 // Skip golden reference if color is set, on flat blocks with motion.
2590 // For screen: always skip golden (if color_sensitivity_sb_g is set)
2591 // except when x->nonrd_prune_ref_frame_search = 0. This latter flag
2592 // may be set in the variance partition when golden is a much better
2593 // reference than last, in which case it may not be worth skipping
2594 // golden completely.
2595 if (((cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
2596 x->nonrd_prune_ref_frame_search != 0) ||
2597 (x->source_variance < 500 &&
2598 x->content_state_sb.source_sad_nonrd > kLowSad)) &&
2599 (x->color_sensitivity_sb_g[0] == 1 || x->color_sensitivity_sb_g[1] == 1))
2600 use_golden_ref_frame = 0;
2601
2602 // For non-screen: if golden and altref are not being selected as references
2603 // (use_golden_ref_frame/use_alt_ref_frame = 0) check to allow golden back
2604 // based on the sad of nearest/nearmv of LAST ref. If this block sad is large,
2605 // keep golden as reference. Only do this for the agrressive pruning mode and
2606 // avoid it when color is set for golden reference.
2607 if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN &&
2608 (cpi->ref_frame_flags & AOM_LAST_FLAG) && !use_golden_ref_frame &&
2609 !use_alt_ref_frame && x->pred_mv_sad[LAST_FRAME] != INT_MAX &&
2610 x->nonrd_prune_ref_frame_search > 2 &&
2611 x->color_sensitivity_sb_g[0] == 0 && x->color_sensitivity_sb_g[1] == 0) {
2612 int thr = (cm->width * cm->height >= 640 * 360) ? 100 : 150;
2613 int pred = x->pred_mv_sad[LAST_FRAME] >>
2614 (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
2615 if (pred > thr) use_golden_ref_frame = 1;
2616 }
2617
2618 use_alt_ref_frame =
2619 cpi->ref_frame_flags & AOM_ALT_FLAG ? use_alt_ref_frame : 0;
2620 use_golden_ref_frame =
2621 cpi->ref_frame_flags & AOM_GOLD_FLAG ? use_golden_ref_frame : 0;
2622
2623 // For spatial layers: enable golden ref if it is set by user and
2624 // corresponds to the lower spatial layer.
2625 if (cpi->svc.spatial_layer_id > 0 && (cpi->ref_frame_flags & AOM_GOLD_FLAG) &&
2626 x->content_state_sb.source_sad_nonrd < kHighSad) {
2627 const int buffslot_golden =
2628 cpi->ppi->rtc_ref.ref_idx[GOLDEN_FRAME - LAST_FRAME];
2629 if (cpi->svc.buffer_time_index[buffslot_golden] ==
2630 cpi->svc.current_superframe)
2631 use_golden_ref_frame = 1;
2632 }
2633
2634 use_ref_frame[ALTREF_FRAME] = use_alt_ref_frame;
2635 use_ref_frame[GOLDEN_FRAME] = use_golden_ref_frame;
2636 use_ref_frame[LAST_FRAME] = use_last_ref_frame;
2637 // For now keep this assert on, but we should remove it for svc mode,
2638 // as the user may want to generate an intra-only frame (no inter-modes).
2639 // Remove this assert in subsequent CL when nonrd_pickmode is tested for the
2640 // case of intra-only frame (no references enabled).
2641 assert(use_last_ref_frame || use_golden_ref_frame || use_alt_ref_frame);
2642 }
2643
2644 // Checks whether Intra mode needs to be pruned based on
2645 // 'intra_y_mode_bsize_mask_nrd' and 'prune_hv_pred_modes_using_blksad'
2646 // speed features.
2647 static INLINE bool is_prune_intra_mode(AV1_COMP *cpi, int mode_index,
2648 int force_intra_check, BLOCK_SIZE bsize,
2649 uint8_t segment_id,
2650 SOURCE_SAD source_sad_nonrd,
2651 uint8_t color_sensitivity[2]) {
2652 const PREDICTION_MODE this_mode = intra_mode_list[mode_index];
2653 if (mode_index > 2 || force_intra_check == 0) {
2654 if (!((1 << this_mode) & cpi->sf.rt_sf.intra_y_mode_bsize_mask_nrd[bsize]))
2655 return true;
2656
2657 if (this_mode == DC_PRED) return false;
2658
2659 if (!cpi->sf.rt_sf.prune_hv_pred_modes_using_src_sad) return false;
2660
2661 const bool has_color_sensitivity =
2662 color_sensitivity[0] && color_sensitivity[1];
2663 if (has_color_sensitivity &&
2664 (cpi->rc.frame_source_sad > 1.1 * cpi->rc.avg_source_sad ||
2665 cyclic_refresh_segment_id_boosted(segment_id) ||
2666 source_sad_nonrd > kMedSad))
2667 return false;
2668
2669 return true;
2670 }
2671 return false;
2672 }
2673
2674 /*!\brief Estimates best intra mode for inter mode search
2675 *
2676 * \ingroup nonrd_mode_search
2677 * \callgraph
2678 * \callergraph
2679 *
2680 * Using heuristics based on best inter mode, block size, and other decides
2681 * whether to check intra modes. If so, estimates and selects best intra mode
2682 * from the reduced set of intra modes (max 4 intra modes checked)
2683 *
2684 * \param[in] cpi Top-level encoder structure
2685 * \param[in] x Pointer to structure holding all the
2686 * data for the current macroblock
2687 * \param[in] bsize Current block size
2688 * \param[in] best_early_term Flag, indicating that TX for the
2689 * best inter mode was skipped
2690 * \param[in] ref_cost_intra Cost of signalling intra mode
2691 * \param[in] reuse_prediction Flag, indicating prediction re-use
2692 * \param[in] orig_dst Original destination buffer
2693 * \param[in] tmp_buffers Pointer to a temporary buffers for
2694 * prediction re-use
2695 * \param[out] this_mode_pred Pointer to store prediction buffer
2696 * for prediction re-use
2697 * \param[in] best_rdc Pointer to RD cost for the best
2698 * selected intra mode
2699 * \param[in] best_pickmode Pointer to a structure containing
2700 * best mode picked so far
2701 * \param[in] ctx Pointer to structure holding coding
2702 * contexts and modes for the block
2703 *
2704 * \remark Nothing is returned. Instead, calculated RD cost is placed to
2705 * \c best_rdc and best selected mode is placed to \c best_pickmode
2706 */
2707 static void estimate_intra_mode(
2708 AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int best_early_term,
2709 unsigned int ref_cost_intra, int reuse_prediction, struct buf_2d *orig_dst,
2710 PRED_BUFFER *tmp_buffers, PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
2711 BEST_PICKMODE *best_pickmode, PICK_MODE_CONTEXT *ctx) {
2712 AV1_COMMON *const cm = &cpi->common;
2713 MACROBLOCKD *const xd = &x->e_mbd;
2714 MB_MODE_INFO *const mi = xd->mi[0];
2715 const TxfmSearchParams *txfm_params = &x->txfm_search_params;
2716 const unsigned char segment_id = mi->segment_id;
2717 const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
2718 const int *const rd_thresh_freq_fact = x->thresh_freq_fact[bsize];
2719 const bool is_screen_content =
2720 cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
2721 struct macroblockd_plane *const pd = &xd->plane[0];
2722
2723 const CommonQuantParams *quant_params = &cm->quant_params;
2724
2725 RD_STATS this_rdc;
2726
2727 int intra_cost_penalty = av1_get_intra_cost_penalty(
2728 quant_params->base_qindex, quant_params->y_dc_delta_q,
2729 cm->seq_params->bit_depth);
2730 int64_t inter_mode_thresh =
2731 RDCOST(x->rdmult, ref_cost_intra + intra_cost_penalty, 0);
2732 int perform_intra_pred = cpi->sf.rt_sf.check_intra_pred_nonrd;
2733 int force_intra_check = 0;
2734 // For spatial enhancement layer: turn off intra prediction if the
2735 // previous spatial layer as golden ref is not chosen as best reference.
2736 // only do this for temporal enhancement layer and on non-key frames.
2737 if (cpi->svc.spatial_layer_id > 0 &&
2738 best_pickmode->best_ref_frame != GOLDEN_FRAME &&
2739 cpi->svc.temporal_layer_id > 0 &&
2740 !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)
2741 perform_intra_pred = 0;
2742
2743 int do_early_exit_rdthresh = 1;
2744
2745 uint32_t spatial_var_thresh = 50;
2746 int motion_thresh = 32;
2747 // Adjust thresholds to make intra mode likely tested if the other
2748 // references (golden, alt) are skipped/not checked. For now always
2749 // adjust for svc mode.
2750 if (cpi->ppi->use_svc || (cpi->sf.rt_sf.use_nonrd_altref_frame == 0 &&
2751 cpi->sf.rt_sf.nonrd_prune_ref_frame_search > 0)) {
2752 spatial_var_thresh = 150;
2753 motion_thresh = 0;
2754 }
2755
2756 // Some adjustments to checking intra mode based on source variance.
2757 if (x->source_variance < spatial_var_thresh) {
2758 // If the best inter mode is large motion or non-LAST ref reduce intra cost
2759 // penalty, so intra mode is more likely tested.
2760 if (best_rdc->rdcost != INT64_MAX &&
2761 (best_pickmode->best_ref_frame != LAST_FRAME ||
2762 abs(mi->mv[0].as_mv.row) >= motion_thresh ||
2763 abs(mi->mv[0].as_mv.col) >= motion_thresh)) {
2764 intra_cost_penalty = intra_cost_penalty >> 2;
2765 inter_mode_thresh =
2766 RDCOST(x->rdmult, ref_cost_intra + intra_cost_penalty, 0);
2767 do_early_exit_rdthresh = 0;
2768 }
2769 if ((x->source_variance < AOMMAX(50, (spatial_var_thresh >> 1)) &&
2770 x->content_state_sb.source_sad_nonrd >= kHighSad) ||
2771 (is_screen_content && x->source_variance < 50 &&
2772 ((bsize >= BLOCK_32X32 &&
2773 x->content_state_sb.source_sad_nonrd != kZeroSad) ||
2774 x->color_sensitivity[0] == 1 || x->color_sensitivity[1] == 1)))
2775 force_intra_check = 1;
2776 // For big blocks worth checking intra (since only DC will be checked),
2777 // even if best_early_term is set.
2778 if (bsize >= BLOCK_32X32) best_early_term = 0;
2779 } else if (cpi->sf.rt_sf.source_metrics_sb_nonrd &&
2780 x->content_state_sb.source_sad_nonrd <= kLowSad) {
2781 perform_intra_pred = 0;
2782 }
2783
2784 if (best_rdc->skip_txfm && best_pickmode->best_mode_initial_skip_flag) {
2785 if (cpi->sf.rt_sf.skip_intra_pred == 1 && best_pickmode->best_mode != NEWMV)
2786 perform_intra_pred = 0;
2787 else if (cpi->sf.rt_sf.skip_intra_pred == 2)
2788 perform_intra_pred = 0;
2789 }
2790
2791 if (!(best_rdc->rdcost == INT64_MAX || force_intra_check ||
2792 (perform_intra_pred && !best_early_term &&
2793 bsize <= cpi->sf.part_sf.max_intra_bsize))) {
2794 return;
2795 }
2796
2797 // Early exit based on RD cost calculated using known rate. When
2798 // is_screen_content is true, more bias is given to intra modes. Hence,
2799 // considered conservative threshold in early exit for the same.
2800 const int64_t known_rd = is_screen_content
2801 ? CALC_BIASED_RDCOST(inter_mode_thresh)
2802 : inter_mode_thresh;
2803 if (known_rd > best_rdc->rdcost) return;
2804
2805 struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
2806 TX_SIZE intra_tx_size = AOMMIN(
2807 AOMMIN(max_txsize_lookup[bsize],
2808 tx_mode_to_biggest_tx_size[txfm_params->tx_mode_search_type]),
2809 TX_16X16);
2810 if (is_screen_content && cpi->rc.high_source_sad &&
2811 x->source_variance > spatial_var_thresh && bsize <= BLOCK_16X16)
2812 intra_tx_size = TX_4X4;
2813
2814 PRED_BUFFER *const best_pred = best_pickmode->best_pred;
2815 if (reuse_prediction && best_pred != NULL) {
2816 const int bh = block_size_high[bsize];
2817 const int bw = block_size_wide[bsize];
2818 if (best_pred->data == orig_dst->buf) {
2819 *this_mode_pred = &tmp_buffers[get_pred_buffer(tmp_buffers, 3)];
2820 aom_convolve_copy(best_pred->data, best_pred->stride,
2821 (*this_mode_pred)->data, (*this_mode_pred)->stride, bw,
2822 bh);
2823 best_pickmode->best_pred = *this_mode_pred;
2824 }
2825 }
2826 pd->dst = *orig_dst;
2827
2828 for (int i = 0; i < 4; ++i) {
2829 const PREDICTION_MODE this_mode = intra_mode_list[i];
2830 const THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)];
2831 const int64_t mode_rd_thresh = rd_threshes[mode_index];
2832
2833 if (is_prune_intra_mode(cpi, i, force_intra_check, bsize, segment_id,
2834 x->content_state_sb.source_sad_nonrd,
2835 x->color_sensitivity))
2836 continue;
2837
2838 if (is_screen_content && cpi->sf.rt_sf.source_metrics_sb_nonrd) {
2839 // For spatially flat blocks with zero motion only check
2840 // DC mode.
2841 if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
2842 x->source_variance == 0 && this_mode != DC_PRED)
2843 continue;
2844 // Only test Intra for big blocks if spatial_variance is small.
2845 else if (bsize > BLOCK_32X32 && x->source_variance > 50)
2846 continue;
2847 }
2848
2849 if (rd_less_than_thresh(best_rdc->rdcost, mode_rd_thresh,
2850 rd_thresh_freq_fact[mode_index]) &&
2851 (do_early_exit_rdthresh || this_mode == SMOOTH_PRED)) {
2852 continue;
2853 }
2854 const BLOCK_SIZE uv_bsize = get_plane_block_size(
2855 bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
2856
2857 mi->mode = this_mode;
2858 mi->ref_frame[0] = INTRA_FRAME;
2859 mi->ref_frame[1] = NONE_FRAME;
2860
2861 av1_invalid_rd_stats(&this_rdc);
2862 args.mode = this_mode;
2863 args.skippable = 1;
2864 args.rdc = &this_rdc;
2865 mi->tx_size = intra_tx_size;
2866 compute_intra_yprediction(cm, this_mode, bsize, x, xd);
2867 // Look into selecting tx_size here, based on prediction residual.
2868 block_yrd(x, &this_rdc, &args.skippable, bsize, mi->tx_size, 0);
2869 // TODO(kyslov@) Need to account for skippable
2870 if (x->color_sensitivity[0]) {
2871 av1_foreach_transformed_block_in_plane(xd, uv_bsize, 1,
2872 estimate_block_intra, &args);
2873 }
2874 if (x->color_sensitivity[1]) {
2875 av1_foreach_transformed_block_in_plane(xd, uv_bsize, 2,
2876 estimate_block_intra, &args);
2877 }
2878
2879 int mode_cost = 0;
2880 if (av1_is_directional_mode(this_mode) && av1_use_angle_delta(bsize)) {
2881 mode_cost +=
2882 x->mode_costs.angle_delta_cost[this_mode - V_PRED]
2883 [MAX_ANGLE_DELTA +
2884 mi->angle_delta[PLANE_TYPE_Y]];
2885 }
2886 if (this_mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) {
2887 mode_cost += x->mode_costs.filter_intra_cost[bsize][0];
2888 }
2889 this_rdc.rate += ref_cost_intra;
2890 this_rdc.rate += intra_cost_penalty;
2891 this_rdc.rate += mode_cost;
2892 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
2893
2894 if (is_screen_content && cpi->sf.rt_sf.source_metrics_sb_nonrd) {
2895 // For blocks with low spatial variance and color sad,
2896 // favor the intra-modes, only on scene/slide change.
2897 if (cpi->rc.high_source_sad && x->source_variance < 800 &&
2898 (x->color_sensitivity[0] || x->color_sensitivity[1]))
2899 this_rdc.rdcost = CALC_BIASED_RDCOST(this_rdc.rdcost);
2900 // Otherwise bias against intra for blocks with zero
2901 // motion and no color, on non-scene/slide changes.
2902 else if (!cpi->rc.high_source_sad && x->source_variance > 0 &&
2903 x->content_state_sb.source_sad_nonrd == kZeroSad &&
2904 x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0)
2905 this_rdc.rdcost = (3 * this_rdc.rdcost) >> 1;
2906 }
2907
2908 if (this_rdc.rdcost < best_rdc->rdcost) {
2909 *best_rdc = this_rdc;
2910 best_pickmode->best_mode = this_mode;
2911 best_pickmode->best_tx_size = mi->tx_size;
2912 best_pickmode->best_ref_frame = INTRA_FRAME;
2913 best_pickmode->best_second_ref_frame = NONE;
2914 best_pickmode->best_mode_skip_txfm = this_rdc.skip_txfm;
2915 if (!this_rdc.skip_txfm) {
2916 memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip,
2917 sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
2918 }
2919 mi->uv_mode = this_mode;
2920 mi->mv[0].as_int = INVALID_MV;
2921 mi->mv[1].as_int = INVALID_MV;
2922 }
2923 }
2924 mi->tx_size = best_pickmode->best_tx_size;
2925 }
2926
2927 static AOM_INLINE int is_filter_search_enabled_blk(
2928 AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, BLOCK_SIZE bsize,
2929 int segment_id, int cb_pred_filter_search, InterpFilter *filt_select) {
2930 const AV1_COMMON *const cm = &cpi->common;
2931 // filt search disabled
2932 if (!cpi->sf.rt_sf.use_nonrd_filter_search) return 0;
2933 // filt search purely based on mode properties
2934 if (!cb_pred_filter_search) return 1;
2935 MACROBLOCKD *const xd = &x->e_mbd;
2936 int enable_interp_search = 0;
2937 if (!(xd->left_mbmi && xd->above_mbmi)) {
2938 // neighbors info unavailable
2939 enable_interp_search = 2;
2940 } else if (!(is_inter_block(xd->left_mbmi) &&
2941 is_inter_block(xd->above_mbmi))) {
2942 // neighbor is INTRA
2943 enable_interp_search = 2;
2944 } else if (xd->left_mbmi->interp_filters.as_int !=
2945 xd->above_mbmi->interp_filters.as_int) {
2946 // filters are different
2947 enable_interp_search = 2;
2948 } else if ((cb_pred_filter_search == 1) &&
2949 (xd->left_mbmi->interp_filters.as_filters.x_filter !=
2950 EIGHTTAP_REGULAR)) {
2951 // not regular
2952 enable_interp_search = 2;
2953 } else {
2954 // enable prediction based on chessboard pattern
2955 if (xd->left_mbmi->interp_filters.as_filters.x_filter == EIGHTTAP_SMOOTH)
2956 *filt_select = EIGHTTAP_SMOOTH;
2957 const int bsl = mi_size_wide_log2[bsize];
2958 enable_interp_search =
2959 (bool)((((mi_row + mi_col) >> bsl) +
2960 get_chessboard_index(cm->current_frame.frame_number)) &
2961 0x1);
2962 if (cyclic_refresh_segment_id_boosted(segment_id)) enable_interp_search = 1;
2963 }
2964 return enable_interp_search;
2965 }
2966
2967 static AOM_INLINE int skip_mode_by_threshold(
2968 PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, int_mv mv,
2969 int frames_since_golden, const int *const rd_threshes,
2970 const int *const rd_thresh_freq_fact, int64_t best_cost, int best_skip,
2971 int extra_shift) {
2972 int skip_this_mode = 0;
2973 const THR_MODES mode_index = mode_idx[ref_frame][INTER_OFFSET(mode)];
2974 int64_t mode_rd_thresh =
2975 best_skip ? ((int64_t)rd_threshes[mode_index]) << (extra_shift + 1)
2976 : ((int64_t)rd_threshes[mode_index]) << extra_shift;
2977
2978 // Increase mode_rd_thresh value for non-LAST for improved encoding
2979 // speed
2980 if (ref_frame != LAST_FRAME) {
2981 mode_rd_thresh = mode_rd_thresh << 1;
2982 if (ref_frame == GOLDEN_FRAME && frames_since_golden > 4)
2983 mode_rd_thresh = mode_rd_thresh << (extra_shift + 1);
2984 }
2985
2986 if (rd_less_than_thresh(best_cost, mode_rd_thresh,
2987 rd_thresh_freq_fact[mode_index]))
2988 if (mv.as_int != 0) skip_this_mode = 1;
2989
2990 return skip_this_mode;
2991 }
2992
2993 static AOM_INLINE int skip_mode_by_low_temp(
2994 PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, BLOCK_SIZE bsize,
2995 CONTENT_STATE_SB content_state_sb, int_mv mv, int force_skip_low_temp_var) {
2996 // Skip non-zeromv mode search for non-LAST frame if force_skip_low_temp_var
2997 // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
2998 // later.
2999 if (force_skip_low_temp_var && ref_frame != LAST_FRAME && mv.as_int != 0) {
3000 return 1;
3001 }
3002
3003 if (content_state_sb.source_sad_nonrd != kHighSad && bsize >= BLOCK_64X64 &&
3004 force_skip_low_temp_var && mode == NEWMV) {
3005 return 1;
3006 }
3007 return 0;
3008 }
3009
3010 static AOM_INLINE int skip_mode_by_bsize_and_ref_frame(
3011 PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, BLOCK_SIZE bsize,
3012 int extra_prune, unsigned int sse_zeromv_norm, int more_prune) {
3013 const unsigned int thresh_skip_golden = 500;
3014
3015 if (ref_frame != LAST_FRAME && sse_zeromv_norm < thresh_skip_golden &&
3016 mode == NEWMV)
3017 return 1;
3018
3019 if (bsize == BLOCK_128X128 && mode == NEWMV) return 1;
3020
3021 // Skip testing non-LAST if this flag is set.
3022 if (extra_prune) {
3023 if (extra_prune > 1 && ref_frame != LAST_FRAME &&
3024 (bsize > BLOCK_16X16 && mode == NEWMV))
3025 return 1;
3026
3027 if (ref_frame != LAST_FRAME && mode == NEARMV) return 1;
3028
3029 if (more_prune && bsize >= BLOCK_32X32 && mode == NEARMV) return 1;
3030 }
3031 return 0;
3032 }
3033
3034 static void set_color_sensitivity(AV1_COMP *cpi, MACROBLOCK *x,
3035 BLOCK_SIZE bsize, int y_sad,
3036 unsigned int source_variance,
3037 struct buf_2d yv12_mb[MAX_MB_PLANE]) {
3038 const int subsampling_x = cpi->common.seq_params->subsampling_x;
3039 const int subsampling_y = cpi->common.seq_params->subsampling_y;
3040 int factor = (bsize >= BLOCK_32X32) ? 2 : 3;
3041 int shift = 3;
3042 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
3043 cpi->rc.high_source_sad) {
3044 factor = 1;
3045 shift = 6;
3046 }
3047 NOISE_LEVEL noise_level = kLow;
3048 int norm_sad =
3049 y_sad >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
3050 unsigned int thresh_spatial = (cpi->common.width > 1920) ? 5000 : 1000;
3051 // If the spatial source variance is high and the normalized y_sad
3052 // is low, then y-channel is likely good for mode estimation, so keep
3053 // color_sensitivity off. For low noise content for now, since there is
3054 // some bdrate regression for noisy color clip.
3055 if (cpi->noise_estimate.enabled)
3056 noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
3057 if (noise_level == kLow && source_variance > thresh_spatial &&
3058 cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN && norm_sad < 50) {
3059 x->color_sensitivity[0] = 0;
3060 x->color_sensitivity[1] = 0;
3061 return;
3062 }
3063 const int num_planes = av1_num_planes(&cpi->common);
3064 for (int i = 1; i < num_planes; ++i) {
3065 if (x->color_sensitivity[i - 1] == 2 || source_variance < 50) {
3066 struct macroblock_plane *const p = &x->plane[i];
3067 const BLOCK_SIZE bs =
3068 get_plane_block_size(bsize, subsampling_x, subsampling_y);
3069
3070 const int uv_sad = cpi->ppi->fn_ptr[bs].sdf(
3071 p->src.buf, p->src.stride, yv12_mb[i].buf, yv12_mb[i].stride);
3072
3073 const int norm_uv_sad =
3074 uv_sad >> (b_width_log2_lookup[bs] + b_height_log2_lookup[bs]);
3075 x->color_sensitivity[i - 1] =
3076 uv_sad > (factor * (y_sad >> shift)) && norm_uv_sad > 40;
3077 if (source_variance < 50 && norm_uv_sad > 100)
3078 x->color_sensitivity[i - 1] = 1;
3079 }
3080 }
3081 }
3082
3083 static void setup_compound_prediction(const AV1_COMMON *cm, MACROBLOCK *x,
3084 struct buf_2d yv12_mb[8][MAX_MB_PLANE],
3085 const int *use_ref_frame_mask,
3086 const MV_REFERENCE_FRAME *rf,
3087 int *ref_mv_idx) {
3088 MACROBLOCKD *const xd = &x->e_mbd;
3089 MB_MODE_INFO *const mbmi = xd->mi[0];
3090 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3091 MV_REFERENCE_FRAME ref_frame_comp;
3092 if (!use_ref_frame_mask[rf[1]]) {
3093 // Need to setup pred_block, if it hasn't been done in find_predictors.
3094 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, rf[1]);
3095 const int num_planes = av1_num_planes(cm);
3096 if (yv12 != NULL) {
3097 const struct scale_factors *const sf =
3098 get_ref_scale_factors_const(cm, rf[1]);
3099 av1_setup_pred_block(xd, yv12_mb[rf[1]], yv12, sf, sf, num_planes);
3100 }
3101 }
3102 ref_frame_comp = av1_ref_frame_type(rf);
3103 mbmi_ext->mode_context[ref_frame_comp] = 0;
3104 mbmi_ext->ref_mv_count[ref_frame_comp] = UINT8_MAX;
3105 av1_find_mv_refs(cm, xd, mbmi, ref_frame_comp, mbmi_ext->ref_mv_count,
3106 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3107 mbmi_ext->mode_context);
3108 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_comp);
3109 *ref_mv_idx = mbmi->ref_mv_idx + 1;
3110 }
3111
3112 static void set_compound_mode(MACROBLOCK *x, int ref_frame, int ref_frame2,
3113 int ref_mv_idx,
3114 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
3115 PREDICTION_MODE this_mode) {
3116 MACROBLOCKD *const xd = &x->e_mbd;
3117 MB_MODE_INFO *const mi = xd->mi[0];
3118 mi->ref_frame[0] = ref_frame;
3119 mi->ref_frame[1] = ref_frame2;
3120 mi->compound_idx = 1;
3121 mi->comp_group_idx = 0;
3122 mi->interinter_comp.type = COMPOUND_AVERAGE;
3123 MV_REFERENCE_FRAME ref_frame_comp = av1_ref_frame_type(mi->ref_frame);
3124 if (this_mode == GLOBAL_GLOBALMV) {
3125 frame_mv[this_mode][ref_frame].as_int = 0;
3126 frame_mv[this_mode][ref_frame2].as_int = 0;
3127 } else if (this_mode == NEAREST_NEARESTMV) {
3128 frame_mv[this_mode][ref_frame].as_int =
3129 xd->ref_mv_stack[ref_frame_comp][0].this_mv.as_int;
3130 frame_mv[this_mode][ref_frame2].as_int =
3131 xd->ref_mv_stack[ref_frame_comp][0].comp_mv.as_int;
3132 } else if (this_mode == NEAR_NEARMV) {
3133 frame_mv[this_mode][ref_frame].as_int =
3134 xd->ref_mv_stack[ref_frame_comp][ref_mv_idx].this_mv.as_int;
3135 frame_mv[this_mode][ref_frame2].as_int =
3136 xd->ref_mv_stack[ref_frame_comp][ref_mv_idx].comp_mv.as_int;
3137 }
3138 }
3139
3140 // Prune compound mode if the single mode variance is lower than a fixed
3141 // percentage of the median value.
3142 static bool skip_comp_based_on_var(
3143 const unsigned int (*single_vars)[REF_FRAMES], BLOCK_SIZE bsize) {
3144 unsigned int best_var = UINT_MAX;
3145 for (int cur_mode_idx = 0; cur_mode_idx < RTC_INTER_MODES; cur_mode_idx++) {
3146 for (int ref_idx = 0; ref_idx < REF_FRAMES; ref_idx++) {
3147 best_var = AOMMIN(best_var, single_vars[cur_mode_idx][ref_idx]);
3148 }
3149 }
3150 const unsigned int thresh_64 = (unsigned int)(0.57356805f * 8659);
3151 const unsigned int thresh_32 = (unsigned int)(0.23964763f * 4281);
3152
3153 // Currently, the thresh for 128 and 16 are not well-tuned. We are using the
3154 // results from 64 and 32 as an heuristic.
3155 switch (bsize) {
3156 case BLOCK_128X128: return best_var < 4 * thresh_64;
3157 case BLOCK_64X64: return best_var < thresh_64;
3158 case BLOCK_32X32: return best_var < thresh_32;
3159 case BLOCK_16X16: return best_var < thresh_32 / 4;
3160 default: return false;
3161 }
3162 }
3163
3164 static AOM_FORCE_INLINE void fill_single_inter_mode_costs(
3165 int (*single_inter_mode_costs)[REF_FRAMES], const int num_inter_modes,
3166 const REF_MODE *reference_mode_set, const ModeCosts *mode_costs,
3167 const int16_t *mode_context) {
3168 bool ref_frame_used[REF_FRAMES] = { false };
3169 for (int idx = 0; idx < num_inter_modes; idx++) {
3170 ref_frame_used[reference_mode_set[idx].ref_frame] = true;
3171 }
3172
3173 for (int this_ref_frame = LAST_FRAME; this_ref_frame < REF_FRAMES;
3174 this_ref_frame++) {
3175 if (!ref_frame_used[this_ref_frame]) {
3176 continue;
3177 }
3178
3179 const MV_REFERENCE_FRAME rf[2] = { this_ref_frame, NONE_FRAME };
3180 const int16_t mode_ctx = av1_mode_context_analyzer(mode_context, rf);
3181 for (PREDICTION_MODE this_mode = NEARESTMV; this_mode <= NEWMV;
3182 this_mode++) {
3183 single_inter_mode_costs[INTER_OFFSET(this_mode)][this_ref_frame] =
3184 cost_mv_ref(mode_costs, this_mode, mode_ctx);
3185 }
3186 }
3187 }
3188
3189 static AOM_INLINE bool is_globalmv_better(
3190 PREDICTION_MODE this_mode, MV_REFERENCE_FRAME ref_frame, int rate_mv,
3191 const ModeCosts *mode_costs,
3192 const int (*single_inter_mode_costs)[REF_FRAMES],
3193 const MB_MODE_INFO_EXT *mbmi_ext) {
3194 const int globalmv_mode_cost =
3195 single_inter_mode_costs[INTER_OFFSET(GLOBALMV)][ref_frame];
3196 int this_mode_cost =
3197 rate_mv + single_inter_mode_costs[INTER_OFFSET(this_mode)][ref_frame];
3198 if (this_mode == NEWMV || this_mode == NEARMV) {
3199 const MV_REFERENCE_FRAME rf[2] = { ref_frame, NONE_FRAME };
3200 this_mode_cost += get_drl_cost(
3201 NEWMV, 0, mbmi_ext, mode_costs->drl_mode_cost0, av1_ref_frame_type(rf));
3202 }
3203 return this_mode_cost > globalmv_mode_cost;
3204 }
3205
3206 // Set up the mv/ref_frames etc based on the comp_index. Returns 1 if it
3207 // succeeds, 0 if it fails.
3208 static AOM_INLINE int setup_compound_params_from_comp_idx(
3209 const AV1_COMP *cpi, MACROBLOCK *x, struct buf_2d yv12_mb[8][MAX_MB_PLANE],
3210 PREDICTION_MODE *this_mode, MV_REFERENCE_FRAME *ref_frame,
3211 MV_REFERENCE_FRAME *ref_frame2, int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
3212 const int *use_ref_frame_mask, int comp_index,
3213 bool comp_use_zero_zeromv_only, MV_REFERENCE_FRAME *last_comp_ref_frame) {
3214 const MV_REFERENCE_FRAME *rf = comp_ref_mode_set[comp_index].ref_frame;
3215 *this_mode = comp_ref_mode_set[comp_index].pred_mode;
3216 *ref_frame = rf[0];
3217 *ref_frame2 = rf[1];
3218 assert(*ref_frame == LAST_FRAME);
3219 assert(*this_mode == GLOBAL_GLOBALMV || *this_mode == NEAREST_NEARESTMV);
3220 if (comp_use_zero_zeromv_only && *this_mode != GLOBAL_GLOBALMV) {
3221 return 0;
3222 }
3223 if (*ref_frame2 == GOLDEN_FRAME &&
3224 (cpi->sf.rt_sf.ref_frame_comp_nonrd[0] == 0 ||
3225 !(cpi->ref_frame_flags & AOM_GOLD_FLAG))) {
3226 return 0;
3227 } else if (*ref_frame2 == LAST2_FRAME &&
3228 (cpi->sf.rt_sf.ref_frame_comp_nonrd[1] == 0 ||
3229 !(cpi->ref_frame_flags & AOM_LAST2_FLAG))) {
3230 return 0;
3231 } else if (*ref_frame2 == ALTREF_FRAME &&
3232 (cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 0 ||
3233 !(cpi->ref_frame_flags & AOM_ALT_FLAG))) {
3234 return 0;
3235 }
3236 int ref_mv_idx = 0;
3237 if (*last_comp_ref_frame != rf[1]) {
3238 // Only needs to be done once per reference pair.
3239 setup_compound_prediction(&cpi->common, x, yv12_mb, use_ref_frame_mask, rf,
3240 &ref_mv_idx);
3241 *last_comp_ref_frame = rf[1];
3242 }
3243 set_compound_mode(x, *ref_frame, *ref_frame2, ref_mv_idx, frame_mv,
3244 *this_mode);
3245 if (*this_mode != GLOBAL_GLOBALMV &&
3246 frame_mv[*this_mode][*ref_frame].as_int == 0 &&
3247 frame_mv[*this_mode][*ref_frame2].as_int == 0) {
3248 return 0;
3249 }
3250
3251 return 1;
3252 }
3253
3254 static AOM_INLINE bool previous_mode_performed_poorly(
3255 PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame,
3256 const unsigned int (*vars)[REF_FRAMES],
3257 const int64_t (*uv_dist)[REF_FRAMES]) {
3258 unsigned int best_var = UINT_MAX;
3259 int64_t best_uv_dist = INT64_MAX;
3260 for (int midx = 0; midx < RTC_INTER_MODES; midx++) {
3261 best_var = AOMMIN(best_var, vars[midx][ref_frame]);
3262 best_uv_dist = AOMMIN(best_uv_dist, uv_dist[midx][ref_frame]);
3263 }
3264 assert(best_var != UINT_MAX && "Invalid variance data.");
3265 const float mult = 1.125f;
3266 bool var_bad = mult * best_var < vars[INTER_OFFSET(mode)][ref_frame];
3267 if (uv_dist[INTER_OFFSET(mode)][ref_frame] < INT64_MAX &&
3268 best_uv_dist != uv_dist[INTER_OFFSET(mode)][ref_frame]) {
3269 // If we have chroma info, then take it into account
3270 var_bad &= mult * best_uv_dist < uv_dist[INTER_OFFSET(mode)][ref_frame];
3271 }
3272 return var_bad;
3273 }
3274
3275 static AOM_INLINE bool prune_compoundmode_with_singlemode_var(
3276 PREDICTION_MODE compound_mode, MV_REFERENCE_FRAME ref_frame,
3277 MV_REFERENCE_FRAME ref_frame2, const int_mv (*frame_mv)[REF_FRAMES],
3278 const uint8_t (*mode_checked)[REF_FRAMES],
3279 const unsigned int (*vars)[REF_FRAMES],
3280 const int64_t (*uv_dist)[REF_FRAMES]) {
3281 const PREDICTION_MODE single_mode0 = compound_ref0_mode(compound_mode);
3282 const PREDICTION_MODE single_mode1 = compound_ref1_mode(compound_mode);
3283
3284 bool first_ref_valid = false, second_ref_valid = false;
3285 bool first_ref_bad = false, second_ref_bad = false;
3286 if (mode_checked[single_mode0][ref_frame] &&
3287 frame_mv[single_mode0][ref_frame].as_int ==
3288 frame_mv[compound_mode][ref_frame].as_int &&
3289 vars[INTER_OFFSET(single_mode0)][ref_frame] < UINT_MAX) {
3290 first_ref_valid = true;
3291 first_ref_bad =
3292 previous_mode_performed_poorly(single_mode0, ref_frame, vars, uv_dist);
3293 }
3294 if (mode_checked[single_mode1][ref_frame2] &&
3295 frame_mv[single_mode1][ref_frame2].as_int ==
3296 frame_mv[compound_mode][ref_frame2].as_int &&
3297 vars[INTER_OFFSET(single_mode1)][ref_frame2] < UINT_MAX) {
3298 second_ref_valid = true;
3299 second_ref_bad =
3300 previous_mode_performed_poorly(single_mode1, ref_frame2, vars, uv_dist);
3301 }
3302 if (first_ref_valid && second_ref_valid) {
3303 return first_ref_bad && second_ref_bad;
3304 } else if (first_ref_valid || second_ref_valid) {
3305 return first_ref_bad || second_ref_bad;
3306 }
3307 return false;
3308 }
3309
3310 // Function to setup parameters used for inter mode evaluation.
3311 static AOM_FORCE_INLINE void set_params_nonrd_pick_inter_mode(
3312 AV1_COMP *cpi, MACROBLOCK *x, InterModeSearchStateNonrd *search_state,
3313 TileDataEnc *tile_data, PICK_MODE_CONTEXT *ctx, RD_STATS *rd_cost,
3314 int *force_skip_low_temp_var, int *skip_pred_mv, const int mi_row,
3315 const int mi_col, const int gf_temporal_ref, const unsigned char segment_id,
3316 BLOCK_SIZE bsize
3317 #if CONFIG_AV1_TEMPORAL_DENOISING
3318 ,
3319 int denoise_svc_pickmode
3320 #endif
3321 ) {
3322 AV1_COMMON *const cm = &cpi->common;
3323 MACROBLOCKD *const xd = &x->e_mbd;
3324 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3325 MB_MODE_INFO *const mi = xd->mi[0];
3326 const ModeCosts *mode_costs = &x->mode_costs;
3327 (void)ctx;
3328
3329 for (int idx = 0; idx < RTC_INTER_MODES; idx++) {
3330 for (int ref = 0; ref < REF_FRAMES; ref++) {
3331 search_state->vars[idx][ref] = UINT_MAX;
3332 search_state->uv_dist[idx][ref] = INT64_MAX;
3333 }
3334 }
3335
3336 x->color_sensitivity[0] = x->color_sensitivity_sb[0];
3337 x->color_sensitivity[1] = x->color_sensitivity_sb[1];
3338 init_best_pickmode(&search_state->best_pickmode);
3339
3340 estimate_single_ref_frame_costs(cm, xd, mode_costs, segment_id, bsize,
3341 search_state->ref_costs_single);
3342
3343 memset(&search_state->mode_checked[0][0], 0, MB_MODE_COUNT * REF_FRAMES);
3344
3345 txfm_info->skip_txfm = 0;
3346
3347 // initialize mode decisions
3348 av1_invalid_rd_stats(&search_state->best_rdc);
3349 av1_invalid_rd_stats(&search_state->this_rdc);
3350 av1_invalid_rd_stats(rd_cost);
3351 for (int i = 0; i < REF_FRAMES; ++i) {
3352 x->warp_sample_info[i].num = -1;
3353 }
3354
3355 mi->bsize = bsize;
3356 mi->ref_frame[0] = NONE_FRAME;
3357 mi->ref_frame[1] = NONE_FRAME;
3358
3359 #if CONFIG_AV1_TEMPORAL_DENOISING
3360 if (cpi->oxcf.noise_sensitivity > 0) {
3361 // if (cpi->ppi->use_svc) denoise_svc_pickmode =
3362 // av1_denoise_svc_non_key(cpi);
3363 if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode)
3364 av1_denoiser_reset_frame_stats(ctx);
3365 }
3366 #endif
3367
3368 if (cpi->ref_frame_flags & AOM_LAST_FLAG)
3369 find_predictors(cpi, x, LAST_FRAME, search_state->frame_mv, tile_data,
3370 search_state->yv12_mb, bsize, *force_skip_low_temp_var,
3371 x->force_zeromv_skip_for_blk);
3372
3373 get_ref_frame_use_mask(cpi, x, mi, mi_row, mi_col, bsize, gf_temporal_ref,
3374 search_state->use_ref_frame_mask,
3375 force_skip_low_temp_var);
3376
3377 *skip_pred_mv =
3378 x->force_zeromv_skip_for_blk ||
3379 (x->nonrd_prune_ref_frame_search > 2 && x->color_sensitivity[0] != 2 &&
3380 x->color_sensitivity[1] != 2);
3381
3382 // Start at LAST_FRAME + 1.
3383 for (MV_REFERENCE_FRAME ref_frame_iter = LAST_FRAME + 1;
3384 ref_frame_iter <= ALTREF_FRAME; ++ref_frame_iter) {
3385 if (search_state->use_ref_frame_mask[ref_frame_iter]) {
3386 find_predictors(cpi, x, ref_frame_iter, search_state->frame_mv, tile_data,
3387 search_state->yv12_mb, bsize, *force_skip_low_temp_var,
3388 *skip_pred_mv);
3389 }
3390 }
3391 }
3392
3393 // Function to check the inter mode can be skipped based on mode statistics and
3394 // speed features settings.
3395 static AOM_FORCE_INLINE bool skip_inter_mode_nonrd(
3396 AV1_COMP *cpi, MACROBLOCK *x, InterModeSearchStateNonrd *search_state,
3397 int64_t *thresh_sad_pred, int *force_mv_inter_layer, int *comp_pred,
3398 PREDICTION_MODE *this_mode, MV_REFERENCE_FRAME *last_comp_ref_frame,
3399 MV_REFERENCE_FRAME *ref_frame, MV_REFERENCE_FRAME *ref_frame2, int idx,
3400 int svc_mv_col, int svc_mv_row, int force_skip_low_temp_var,
3401 unsigned int sse_zeromv_norm, const int num_inter_modes,
3402 const unsigned char segment_id, BLOCK_SIZE bsize,
3403 bool comp_use_zero_zeromv_only, bool check_globalmv) {
3404 AV1_COMMON *const cm = &cpi->common;
3405 const struct segmentation *const seg = &cm->seg;
3406 const SVC *const svc = &cpi->svc;
3407 MACROBLOCKD *const xd = &x->e_mbd;
3408 MB_MODE_INFO *const mi = xd->mi[0];
3409
3410 if (idx >= num_inter_modes) {
3411 const int comp_index = idx - num_inter_modes;
3412 if (!setup_compound_params_from_comp_idx(
3413 cpi, x, search_state->yv12_mb, this_mode, ref_frame, ref_frame2,
3414 search_state->frame_mv, search_state->use_ref_frame_mask,
3415 comp_index, comp_use_zero_zeromv_only, last_comp_ref_frame)) {
3416 return true;
3417 }
3418 *comp_pred = 1;
3419 } else {
3420 *this_mode = ref_mode_set[idx].pred_mode;
3421 *ref_frame = ref_mode_set[idx].ref_frame;
3422 *ref_frame2 = NONE_FRAME;
3423 }
3424
3425 if (!*comp_pred && search_state->mode_checked[*this_mode][*ref_frame]) {
3426 return true;
3427 }
3428
3429 if (!check_globalmv && *this_mode == GLOBALMV) {
3430 return true;
3431 }
3432
3433 #if COLLECT_PICK_MODE_STAT
3434 aom_usec_timer_start(&ms_stat.timer1);
3435 ms_stat.num_searches[bsize][*this_mode]++;
3436 #endif
3437 mi->mode = *this_mode;
3438 mi->ref_frame[0] = *ref_frame;
3439 mi->ref_frame[1] = *ref_frame2;
3440
3441 if (!search_state->use_ref_frame_mask[*ref_frame]) return true;
3442
3443 if (x->force_zeromv_skip_for_blk &&
3444 ((!(*this_mode == NEARESTMV &&
3445 search_state->frame_mv[*this_mode][*ref_frame].as_int == 0) &&
3446 *this_mode != GLOBALMV) ||
3447 *ref_frame != LAST_FRAME))
3448 return true;
3449
3450 if (cpi->sf.rt_sf.prune_compoundmode_with_singlemode_var && *comp_pred &&
3451 prune_compoundmode_with_singlemode_var(
3452 *this_mode, *ref_frame, *ref_frame2, search_state->frame_mv,
3453 search_state->mode_checked, search_state->vars,
3454 search_state->uv_dist)) {
3455 return true;
3456 }
3457
3458 *force_mv_inter_layer = 0;
3459 if (cpi->ppi->use_svc && svc->spatial_layer_id > 0 &&
3460 ((*ref_frame == LAST_FRAME && svc->skip_mvsearch_last) ||
3461 (*ref_frame == GOLDEN_FRAME && svc->skip_mvsearch_gf) ||
3462 (*ref_frame == ALTREF_FRAME && svc->skip_mvsearch_altref))) {
3463 // Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
3464 // otherwise set NEWMV to (svc_mv_col, svc_mv_row).
3465 // Skip newmv and filter search.
3466 *force_mv_inter_layer = 1;
3467 if (*this_mode == NEWMV) {
3468 search_state->frame_mv[*this_mode][*ref_frame].as_mv.col = svc_mv_col;
3469 search_state->frame_mv[*this_mode][*ref_frame].as_mv.row = svc_mv_row;
3470 } else if (search_state->frame_mv[*this_mode][*ref_frame].as_mv.col !=
3471 svc_mv_col ||
3472 search_state->frame_mv[*this_mode][*ref_frame].as_mv.row !=
3473 svc_mv_row) {
3474 return true;
3475 }
3476 }
3477
3478 // If the segment reference frame feature is enabled then do nothing if the
3479 // current ref frame is not allowed.
3480 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3481 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)(*ref_frame))
3482 return true;
3483
3484 // For screen content: for base spatial layer only for now.
3485 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
3486 cpi->svc.spatial_layer_id == 0) {
3487 // If source_sad is computed: skip non-zero motion
3488 // check for stationary (super)blocks. Otherwise if superblock
3489 // has motion skip the modes with zero motion for flat blocks,
3490 // and color is not set.
3491 // For the latter condition: the same condition should apply
3492 // to newmv if (0, 0), so this latter condition is repeated
3493 // below after search_new_mv.
3494 if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
3495 if ((search_state->frame_mv[*this_mode][*ref_frame].as_int != 0 &&
3496 x->content_state_sb.source_sad_nonrd == kZeroSad) ||
3497 (search_state->frame_mv[*this_mode][*ref_frame].as_int == 0 &&
3498 x->content_state_sb.source_sad_nonrd != kZeroSad &&
3499 ((x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0) ||
3500 cpi->rc.high_source_sad) &&
3501 x->source_variance == 0))
3502 return true;
3503 }
3504 // Skip NEWMV search for flat blocks.
3505 if (*this_mode == NEWMV && x->source_variance < 100) return true;
3506 // Skip non-LAST for color on flat blocks.
3507 if (*ref_frame > LAST_FRAME && x->source_variance == 0 &&
3508 (x->color_sensitivity[0] == 1 || x->color_sensitivity[1] == 1))
3509 return true;
3510 }
3511
3512 if (skip_mode_by_bsize_and_ref_frame(
3513 *this_mode, *ref_frame, bsize, x->nonrd_prune_ref_frame_search,
3514 sse_zeromv_norm, cpi->sf.rt_sf.nonrd_aggressive_skip))
3515 return true;
3516
3517 if (skip_mode_by_low_temp(*this_mode, *ref_frame, bsize, x->content_state_sb,
3518 search_state->frame_mv[*this_mode][*ref_frame],
3519 force_skip_low_temp_var))
3520 return true;
3521
3522 // Disable this drop out case if the ref frame segment level feature is
3523 // enabled for this segment. This is to prevent the possibility that we
3524 // end up unable to pick any mode.
3525 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3526 // Check for skipping GOLDEN and ALTREF based pred_mv_sad.
3527 if (cpi->sf.rt_sf.nonrd_prune_ref_frame_search > 0 &&
3528 x->pred_mv_sad[*ref_frame] != INT_MAX && *ref_frame != LAST_FRAME) {
3529 if ((int64_t)(x->pred_mv_sad[*ref_frame]) > *thresh_sad_pred) return true;
3530 }
3531 }
3532
3533 // Check for skipping NEARMV based on pred_mv_sad.
3534 if (*this_mode == NEARMV && x->pred_mv1_sad[*ref_frame] != INT_MAX &&
3535 x->pred_mv1_sad[*ref_frame] > (x->pred_mv0_sad[*ref_frame] << 1))
3536 return true;
3537
3538 if (!*comp_pred) {
3539 if (skip_mode_by_threshold(
3540 *this_mode, *ref_frame,
3541 search_state->frame_mv[*this_mode][*ref_frame],
3542 cpi->rc.frames_since_golden, cpi->rd.threshes[segment_id][bsize],
3543 x->thresh_freq_fact[bsize], search_state->best_rdc.rdcost,
3544 search_state->best_pickmode.best_mode_skip_txfm,
3545 (cpi->sf.rt_sf.nonrd_aggressive_skip ? 1 : 0)))
3546 return true;
3547 }
3548 return false;
3549 }
3550
3551 void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
3552 MACROBLOCK *x, RD_STATS *rd_cost,
3553 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
3554 AV1_COMMON *const cm = &cpi->common;
3555 SVC *const svc = &cpi->svc;
3556 MACROBLOCKD *const xd = &x->e_mbd;
3557 MB_MODE_INFO *const mi = xd->mi[0];
3558 struct macroblockd_plane *const pd = &xd->plane[0];
3559 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3560 const InterpFilter filter_ref = cm->features.interp_filter;
3561 const InterpFilter default_interp_filter = EIGHTTAP_REGULAR;
3562 MV_REFERENCE_FRAME ref_frame, ref_frame2;
3563 const unsigned char segment_id = mi->segment_id;
3564 int best_early_term = 0;
3565 int force_skip_low_temp_var = 0;
3566 unsigned int sse_zeromv_norm = UINT_MAX;
3567 int skip_pred_mv = 0;
3568 const int num_inter_modes = NUM_INTER_MODES;
3569 bool check_globalmv = cpi->sf.rt_sf.check_globalmv_on_single_ref;
3570 PRED_BUFFER tmp_buffer[4];
3571 DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 128 * 128]);
3572 PRED_BUFFER *this_mode_pred = NULL;
3573 const int reuse_inter_pred = cpi->sf.rt_sf.reuse_inter_pred_nonrd &&
3574 cm->seq_params->bit_depth == AOM_BITS_8;
3575 InterModeSearchStateNonrd search_state;
3576 av1_zero(search_state.use_ref_frame_mask);
3577
3578 const int bh = block_size_high[bsize];
3579 const int bw = block_size_wide[bsize];
3580 const int pixels_in_block = bh * bw;
3581 const int num_8x8_blocks = ctx->num_4x4_blk / 4;
3582 struct buf_2d orig_dst = pd->dst;
3583 const TxfmSearchParams *txfm_params = &x->txfm_search_params;
3584 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3585 #if COLLECT_PICK_MODE_STAT
3586 aom_usec_timer_start(&ms_stat.bsize_timer);
3587 #endif
3588 int64_t thresh_sad_pred = INT64_MAX;
3589 const int mi_row = xd->mi_row;
3590 const int mi_col = xd->mi_col;
3591 int svc_mv_col = 0;
3592 int svc_mv_row = 0;
3593 int force_mv_inter_layer = 0;
3594 bool comp_use_zero_zeromv_only = 0;
3595 int tot_num_comp_modes = NUM_COMP_INTER_MODES_RT;
3596 #if CONFIG_AV1_TEMPORAL_DENOISING
3597 const int denoise_recheck_zeromv = 1;
3598 AV1_PICKMODE_CTX_DEN ctx_den;
3599 int64_t zero_last_cost_orig = INT64_MAX;
3600 int denoise_svc_pickmode = 1;
3601 const int resize_pending = is_frame_resize_pending(cpi);
3602 #endif
3603 const ModeCosts *mode_costs = &x->mode_costs;
3604
3605 if (reuse_inter_pred) {
3606 for (int i = 0; i < 3; i++) {
3607 tmp_buffer[i].data = &pred_buf[pixels_in_block * i];
3608 tmp_buffer[i].stride = bw;
3609 tmp_buffer[i].in_use = 0;
3610 }
3611 tmp_buffer[3].data = pd->dst.buf;
3612 tmp_buffer[3].stride = pd->dst.stride;
3613 tmp_buffer[3].in_use = 0;
3614 }
3615
3616 const int gf_temporal_ref = is_same_gf_and_last_scale(cm);
3617
3618 // If the lower spatial layer uses an averaging filter for downsampling
3619 // (phase = 8), the target decimated pixel is shifted by (1/2, 1/2) relative
3620 // to source, so use subpel motion vector to compensate. The nonzero motion
3621 // is half pixel shifted to left and top, so (-4, -4). This has more effect
3622 // on higher resolutions, so condition it on that for now.
3623 if (cpi->ppi->use_svc && svc->spatial_layer_id > 0 &&
3624 svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 &&
3625 cm->width * cm->height > 640 * 480) {
3626 svc_mv_col = -4;
3627 svc_mv_row = -4;
3628 }
3629
3630 // Setup parameters used for inter mode evaluation.
3631 set_params_nonrd_pick_inter_mode(
3632 cpi, x, &search_state, tile_data, ctx, rd_cost, &force_skip_low_temp_var,
3633 &skip_pred_mv, mi_row, mi_col, gf_temporal_ref, segment_id, bsize
3634 #if CONFIG_AV1_TEMPORAL_DENOISING
3635 ,
3636 denoise_svc_pickmode
3637 #endif
3638 );
3639
3640 if (cpi->sf.rt_sf.use_comp_ref_nonrd && is_comp_ref_allowed(bsize)) {
3641 // Only search compound if bsize \gt BLOCK_16X16.
3642 if (bsize > BLOCK_16X16) {
3643 comp_use_zero_zeromv_only =
3644 cpi->sf.rt_sf.check_only_zero_zeromv_on_large_blocks;
3645 } else {
3646 tot_num_comp_modes = 0;
3647 }
3648 } else {
3649 tot_num_comp_modes = 0;
3650 }
3651
3652 if (x->pred_mv_sad[LAST_FRAME] != INT_MAX) {
3653 thresh_sad_pred = ((int64_t)x->pred_mv_sad[LAST_FRAME]) << 1;
3654 // Increase threshold for less aggressive pruning.
3655 if (cpi->sf.rt_sf.nonrd_prune_ref_frame_search == 1)
3656 thresh_sad_pred += (x->pred_mv_sad[LAST_FRAME] >> 2);
3657 }
3658
3659 const int use_model_yrd_large = get_model_rd_flag(cpi, xd, bsize);
3660
3661 // decide block-level interp filter search flags:
3662 // filter_search_enabled_blk:
3663 // 0: disabled
3664 // 1: filter search depends on mode properties
3665 // 2: filter search forced since prediction is unreliable
3666 // cb_pred_filter_search 0: disabled cb prediction
3667 InterpFilter filt_select = EIGHTTAP_REGULAR;
3668 const int cb_pred_filter_search =
3669 x->content_state_sb.source_sad_nonrd > kVeryLowSad
3670 ? cpi->sf.interp_sf.cb_pred_filter_search
3671 : 0;
3672 const int filter_search_enabled_blk =
3673 is_filter_search_enabled_blk(cpi, x, mi_row, mi_col, bsize, segment_id,
3674 cb_pred_filter_search, &filt_select);
3675
3676 #if COLLECT_PICK_MODE_STAT
3677 ms_stat.num_blocks[bsize]++;
3678 #endif
3679 init_mbmi(mi, DC_PRED, NONE_FRAME, NONE_FRAME, cm);
3680 mi->tx_size = AOMMIN(
3681 AOMMIN(max_txsize_lookup[bsize],
3682 tx_mode_to_biggest_tx_size[txfm_params->tx_mode_search_type]),
3683 TX_16X16);
3684
3685 fill_single_inter_mode_costs(search_state.single_inter_mode_costs,
3686 num_inter_modes, ref_mode_set, mode_costs,
3687 mbmi_ext->mode_context);
3688
3689 MV_REFERENCE_FRAME last_comp_ref_frame = NONE_FRAME;
3690
3691 // Initialize inter prediction params at block level for single reference
3692 // mode.
3693 InterPredParams inter_pred_params_sr;
3694 init_inter_block_params(&inter_pred_params_sr, pd->width, pd->height,
3695 mi_row * MI_SIZE, mi_col * MI_SIZE, pd->subsampling_x,
3696 pd->subsampling_y, xd->bd, is_cur_buf_hbd(xd),
3697 /*is_intrabc=*/0);
3698 inter_pred_params_sr.conv_params =
3699 get_conv_params(/*do_average=*/0, AOM_PLANE_Y, xd->bd);
3700
3701 for (int idx = 0; idx < num_inter_modes + tot_num_comp_modes; ++idx) {
3702 // If we are at the first compound mode, and the single modes already
3703 // perform well, then end the search.
3704 if (cpi->sf.rt_sf.skip_compound_based_on_var && idx == num_inter_modes &&
3705 skip_comp_based_on_var(search_state.vars, bsize)) {
3706 break;
3707 }
3708
3709 int rate_mv = 0;
3710 int is_skippable;
3711 int this_early_term = 0;
3712 int skip_this_mv = 0;
3713 int comp_pred = 0;
3714 unsigned int var = UINT_MAX;
3715 PREDICTION_MODE this_mode;
3716 RD_STATS nonskip_rdc;
3717 av1_invalid_rd_stats(&nonskip_rdc);
3718 memset(txfm_info->blk_skip, 0,
3719 sizeof(txfm_info->blk_skip[0]) * num_8x8_blocks);
3720
3721 // Check the inter mode can be skipped based on mode statistics and speed
3722 // features settings.
3723 if (skip_inter_mode_nonrd(
3724 cpi, x, &search_state, &thresh_sad_pred, &force_mv_inter_layer,
3725 &comp_pred, &this_mode, &last_comp_ref_frame, &ref_frame,
3726 &ref_frame2, idx, svc_mv_col, svc_mv_row, force_skip_low_temp_var,
3727 sse_zeromv_norm, num_inter_modes, segment_id, bsize,
3728 comp_use_zero_zeromv_only, check_globalmv))
3729 continue;
3730
3731 // Select prediction reference frames.
3732 for (int i = 0; i < MAX_MB_PLANE; i++) {
3733 xd->plane[i].pre[0] = search_state.yv12_mb[ref_frame][i];
3734 if (comp_pred) xd->plane[i].pre[1] = search_state.yv12_mb[ref_frame2][i];
3735 }
3736
3737 mi->ref_frame[0] = ref_frame;
3738 mi->ref_frame[1] = ref_frame2;
3739 set_ref_ptrs(cm, xd, ref_frame, ref_frame2);
3740
3741 if (this_mode == NEWMV && !force_mv_inter_layer) {
3742 #if COLLECT_PICK_MODE_STAT
3743 aom_usec_timer_start(&ms_stat.timer2);
3744 #endif
3745 const bool skip_newmv = search_new_mv(
3746 cpi, x, search_state.frame_mv, ref_frame, gf_temporal_ref, bsize,
3747 mi_row, mi_col, &rate_mv, &search_state.best_rdc);
3748 #if COLLECT_PICK_MODE_STAT
3749 aom_usec_timer_mark(&ms_stat.timer2);
3750 ms_stat.ms_time[bsize][this_mode] +=
3751 aom_usec_timer_elapsed(&ms_stat.timer2);
3752 #endif
3753 if (skip_newmv) {
3754 continue;
3755 }
3756 }
3757
3758 for (PREDICTION_MODE inter_mv_mode = NEARESTMV; inter_mv_mode <= NEWMV;
3759 inter_mv_mode++) {
3760 if (inter_mv_mode == this_mode) continue;
3761 if (!comp_pred && search_state.mode_checked[inter_mv_mode][ref_frame] &&
3762 search_state.frame_mv[this_mode][ref_frame].as_int ==
3763 search_state.frame_mv[inter_mv_mode][ref_frame].as_int) {
3764 skip_this_mv = 1;
3765 break;
3766 }
3767 }
3768
3769 if (skip_this_mv && !comp_pred) continue;
3770
3771 // For screen: for spatially flat blocks with non-zero motion,
3772 // skip newmv if the motion vector is (0, 0), and color is not set.
3773 if (this_mode == NEWMV &&
3774 cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
3775 cpi->svc.spatial_layer_id == 0 &&
3776 cpi->sf.rt_sf.source_metrics_sb_nonrd) {
3777 if (search_state.frame_mv[this_mode][ref_frame].as_int == 0 &&
3778 x->content_state_sb.source_sad_nonrd != kZeroSad &&
3779 ((x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0) ||
3780 cpi->rc.high_source_sad) &&
3781 x->source_variance == 0)
3782 continue;
3783 }
3784
3785 mi->mode = this_mode;
3786 mi->mv[0].as_int = search_state.frame_mv[this_mode][ref_frame].as_int;
3787 mi->mv[1].as_int = 0;
3788 if (comp_pred)
3789 mi->mv[1].as_int = search_state.frame_mv[this_mode][ref_frame2].as_int;
3790
3791 if (reuse_inter_pred) {
3792 if (!this_mode_pred) {
3793 this_mode_pred = &tmp_buffer[3];
3794 } else {
3795 this_mode_pred = &tmp_buffer[get_pred_buffer(tmp_buffer, 3)];
3796 pd->dst.buf = this_mode_pred->data;
3797 pd->dst.stride = bw;
3798 }
3799 }
3800
3801 if (idx == 0 && !skip_pred_mv) {
3802 // Set color sensitivity on first tested mode only.
3803 // Use y-sad already computed in find_predictors: take the sad with motion
3804 // vector closest to 0; the uv-sad computed below in set_color_sensitivity
3805 // is for zeromv.
3806 // For screen: first check if golden reference is being used, if so,
3807 // force color_sensitivity on if the color sensitivity for sb_g is on.
3808 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
3809 search_state.use_ref_frame_mask[GOLDEN_FRAME]) {
3810 if (x->color_sensitivity_sb_g[0] == 1) x->color_sensitivity[0] = 1;
3811 if (x->color_sensitivity_sb_g[1] == 1) x->color_sensitivity[1] = 1;
3812 } else {
3813 int y_sad = x->pred_mv0_sad[LAST_FRAME];
3814 if (x->pred_mv1_sad[LAST_FRAME] != INT_MAX &&
3815 (abs(search_state.frame_mv[NEARMV][LAST_FRAME].as_mv.col) +
3816 abs(search_state.frame_mv[NEARMV][LAST_FRAME].as_mv.row)) <
3817 (abs(search_state.frame_mv[NEARESTMV][LAST_FRAME].as_mv.col) +
3818 abs(search_state.frame_mv[NEARESTMV][LAST_FRAME].as_mv.row)))
3819 y_sad = x->pred_mv1_sad[LAST_FRAME];
3820 set_color_sensitivity(cpi, x, bsize, y_sad, x->source_variance,
3821 search_state.yv12_mb[LAST_FRAME]);
3822 }
3823 }
3824 mi->motion_mode = SIMPLE_TRANSLATION;
3825 #if !CONFIG_REALTIME_ONLY
3826 if (cpi->oxcf.motion_mode_cfg.allow_warped_motion) {
3827 calc_num_proj_ref(cpi, x, mi);
3828 }
3829 #endif
3830 // set variance threshold for compound more pruning
3831 unsigned int var_threshold = UINT_MAX;
3832 if (cpi->sf.rt_sf.prune_compoundmode_with_singlecompound_var && comp_pred &&
3833 use_model_yrd_large) {
3834 const PREDICTION_MODE single_mode0 = compound_ref0_mode(this_mode);
3835 const PREDICTION_MODE single_mode1 = compound_ref1_mode(this_mode);
3836 var_threshold =
3837 AOMMIN(var_threshold,
3838 search_state.vars[INTER_OFFSET(single_mode0)][ref_frame]);
3839 var_threshold =
3840 AOMMIN(var_threshold,
3841 search_state.vars[INTER_OFFSET(single_mode1)][ref_frame2]);
3842 }
3843 // decide interpolation filter, build prediction signal, get sse
3844 const bool is_mv_subpel =
3845 (mi->mv[0].as_mv.row & 0x07) || (mi->mv[0].as_mv.col & 0x07);
3846 const bool enable_filt_search_this_mode =
3847 (filter_search_enabled_blk == 2)
3848 ? true
3849 : (filter_search_enabled_blk && !force_mv_inter_layer &&
3850 !comp_pred &&
3851 (ref_frame == LAST_FRAME || !x->nonrd_prune_ref_frame_search));
3852 if (is_mv_subpel && enable_filt_search_this_mode) {
3853 #if COLLECT_PICK_MODE_STAT
3854 aom_usec_timer_start(&ms_stat.timer2);
3855 #endif
3856 search_filter_ref(cpi, x, &search_state.this_rdc, &inter_pred_params_sr,
3857 mi_row, mi_col, tmp_buffer, bsize, reuse_inter_pred,
3858 &this_mode_pred, &this_early_term, &var,
3859 use_model_yrd_large,
3860 search_state.best_pickmode.best_sse, comp_pred);
3861 #if COLLECT_PICK_MODE_STAT
3862 aom_usec_timer_mark(&ms_stat.timer2);
3863 ms_stat.ifs_time[bsize][this_mode] +=
3864 aom_usec_timer_elapsed(&ms_stat.timer2);
3865 #endif
3866 #if !CONFIG_REALTIME_ONLY
3867 } else if (cpi->oxcf.motion_mode_cfg.allow_warped_motion &&
3868 this_mode == NEWMV) {
3869 search_motion_mode(cpi, x, &search_state.this_rdc, mi_row, mi_col, bsize,
3870 &this_early_term, use_model_yrd_large, &rate_mv,
3871 search_state.best_pickmode.best_sse);
3872 if (this_mode == NEWMV) {
3873 search_state.frame_mv[this_mode][ref_frame] = mi->mv[0];
3874 }
3875 #endif
3876 } else {
3877 mi->interp_filters =
3878 (filter_ref == SWITCHABLE)
3879 ? av1_broadcast_interp_filter(default_interp_filter)
3880 : av1_broadcast_interp_filter(filter_ref);
3881 if (force_mv_inter_layer)
3882 mi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
3883
3884 // If it is sub-pel motion and cb_pred_filter_search is enabled, select
3885 // the pre-decided filter
3886 if (is_mv_subpel && cb_pred_filter_search)
3887 mi->interp_filters = av1_broadcast_interp_filter(filt_select);
3888
3889 #if COLLECT_PICK_MODE_STAT
3890 aom_usec_timer_start(&ms_stat.timer2);
3891 #endif
3892 if (!comp_pred) {
3893 SubpelParams subpel_params;
3894 // Initialize inter mode level params for single reference mode.
3895 init_inter_mode_params(&mi->mv[0].as_mv, &inter_pred_params_sr,
3896 &subpel_params, xd->block_ref_scale_factors[0],
3897 pd->pre->width, pd->pre->height);
3898 av1_enc_build_inter_predictor_y_nonrd(xd, &inter_pred_params_sr,
3899 &subpel_params);
3900 } else {
3901 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3902 0);
3903 }
3904
3905 if (use_model_yrd_large) {
3906 model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd,
3907 &search_state.this_rdc, &this_early_term, 0,
3908 search_state.best_pickmode.best_sse, &var,
3909 var_threshold);
3910 } else {
3911 model_rd_for_sb_y(cpi, bsize, x, xd, &search_state.this_rdc, &var, 0,
3912 &this_early_term);
3913 }
3914 #if COLLECT_PICK_MODE_STAT
3915 aom_usec_timer_mark(&ms_stat.timer2);
3916 ms_stat.model_rd_time[bsize][this_mode] +=
3917 aom_usec_timer_elapsed(&ms_stat.timer2);
3918 #endif
3919 }
3920 // update variance for single mode
3921 if (!comp_pred) {
3922 search_state.vars[INTER_OFFSET(this_mode)][ref_frame] = var;
3923 if (search_state.frame_mv[this_mode][ref_frame].as_int == 0) {
3924 search_state.vars[INTER_OFFSET(GLOBALMV)][ref_frame] = var;
3925 }
3926 }
3927 // prune compound mode based on single mode var threshold
3928 if (comp_pred && var > var_threshold) {
3929 if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
3930 continue;
3931 }
3932
3933 if (ref_frame == LAST_FRAME &&
3934 search_state.frame_mv[this_mode][ref_frame].as_int == 0) {
3935 sse_zeromv_norm = (unsigned int)(search_state.this_rdc.sse >>
3936 (b_width_log2_lookup[bsize] +
3937 b_height_log2_lookup[bsize]));
3938 }
3939
3940 if (cpi->sf.rt_sf.sse_early_term_inter_search &&
3941 early_term_inter_search_with_sse(
3942 cpi->sf.rt_sf.sse_early_term_inter_search, bsize,
3943 search_state.this_rdc.sse, search_state.best_pickmode.best_sse,
3944 this_mode)) {
3945 if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
3946 continue;
3947 }
3948
3949 #if COLLECT_PICK_MODE_STAT
3950 ms_stat.num_nonskipped_searches[bsize][this_mode]++;
3951 #endif
3952
3953 const int skip_ctx = av1_get_skip_txfm_context(xd);
3954 const int skip_txfm_cost = mode_costs->skip_txfm_cost[skip_ctx][1];
3955 const int no_skip_txfm_cost = mode_costs->skip_txfm_cost[skip_ctx][0];
3956 const int64_t sse_y = search_state.this_rdc.sse;
3957 if (this_early_term) {
3958 search_state.this_rdc.skip_txfm = 1;
3959 search_state.this_rdc.rate = skip_txfm_cost;
3960 search_state.this_rdc.dist = search_state.this_rdc.sse << 4;
3961 } else {
3962 #if COLLECT_PICK_MODE_STAT
3963 aom_usec_timer_start(&ms_stat.timer2);
3964 #endif
3965 block_yrd(x, &search_state.this_rdc, &is_skippable, bsize, mi->tx_size,
3966 1);
3967 if (search_state.this_rdc.skip_txfm ||
3968 RDCOST(x->rdmult, search_state.this_rdc.rate,
3969 search_state.this_rdc.dist) >=
3970 RDCOST(x->rdmult, 0, search_state.this_rdc.sse)) {
3971 if (!search_state.this_rdc.skip_txfm) {
3972 // Need to store "real" rdc for possible future use if UV rdc
3973 // disallows tx skip
3974 nonskip_rdc = search_state.this_rdc;
3975 nonskip_rdc.rate += no_skip_txfm_cost;
3976 }
3977 search_state.this_rdc.rate = skip_txfm_cost;
3978 search_state.this_rdc.skip_txfm = 1;
3979 search_state.this_rdc.dist = search_state.this_rdc.sse;
3980 } else {
3981 search_state.this_rdc.rate += no_skip_txfm_cost;
3982 }
3983 if ((x->color_sensitivity[0] || x->color_sensitivity[1])) {
3984 RD_STATS rdc_uv;
3985 const BLOCK_SIZE uv_bsize = get_plane_block_size(
3986 bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
3987 if (x->color_sensitivity[0]) {
3988 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
3989 AOM_PLANE_U, AOM_PLANE_U);
3990 }
3991 if (x->color_sensitivity[1]) {
3992 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
3993 AOM_PLANE_V, AOM_PLANE_V);
3994 }
3995 const int64_t sse_uv =
3996 model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &rdc_uv, 1, 2);
3997 search_state.this_rdc.sse += sse_uv;
3998 // Restore Y rdc if UV rdc disallows txfm skip
3999 if (search_state.this_rdc.skip_txfm && !rdc_uv.skip_txfm &&
4000 nonskip_rdc.rate != INT_MAX)
4001 search_state.this_rdc = nonskip_rdc;
4002 if (!comp_pred) {
4003 search_state.uv_dist[INTER_OFFSET(this_mode)][ref_frame] =
4004 rdc_uv.dist;
4005 }
4006 search_state.this_rdc.rate += rdc_uv.rate;
4007 search_state.this_rdc.dist += rdc_uv.dist;
4008 search_state.this_rdc.skip_txfm =
4009 search_state.this_rdc.skip_txfm && rdc_uv.skip_txfm;
4010 }
4011 #if COLLECT_PICK_MODE_STAT
4012 aom_usec_timer_mark(&ms_stat.timer2);
4013 ms_stat.txfm_time[bsize][this_mode] +=
4014 aom_usec_timer_elapsed(&ms_stat.timer2);
4015 #endif
4016 }
4017 PREDICTION_MODE this_best_mode = this_mode;
4018
4019 // TODO(kyslov) account for UV prediction cost
4020 search_state.this_rdc.rate += rate_mv;
4021 if (comp_pred) {
4022 const int16_t mode_ctx =
4023 av1_mode_context_analyzer(mbmi_ext->mode_context, mi->ref_frame);
4024 search_state.this_rdc.rate +=
4025 cost_mv_ref(mode_costs, this_mode, mode_ctx);
4026 } else {
4027 // If the current mode has zeromv but is not GLOBALMV, compare the rate
4028 // cost. If GLOBALMV is cheaper, use GLOBALMV instead.
4029 if (this_mode != GLOBALMV &&
4030 search_state.frame_mv[this_mode][ref_frame].as_int ==
4031 search_state.frame_mv[GLOBALMV][ref_frame].as_int) {
4032 if (is_globalmv_better(this_mode, ref_frame, rate_mv, mode_costs,
4033 search_state.single_inter_mode_costs,
4034 mbmi_ext)) {
4035 this_best_mode = GLOBALMV;
4036 }
4037 }
4038
4039 search_state.this_rdc.rate +=
4040 search_state
4041 .single_inter_mode_costs[INTER_OFFSET(this_best_mode)][ref_frame];
4042 }
4043
4044 if (!comp_pred && search_state.frame_mv[this_mode][ref_frame].as_int == 0 &&
4045 var < UINT_MAX) {
4046 search_state.vars[INTER_OFFSET(GLOBALMV)][ref_frame] = var;
4047 }
4048
4049 search_state.this_rdc.rate += search_state.ref_costs_single[ref_frame];
4050
4051 search_state.this_rdc.rdcost = RDCOST(x->rdmult, search_state.this_rdc.rate,
4052 search_state.this_rdc.dist);
4053 if (cpi->oxcf.rc_cfg.mode == AOM_CBR && !comp_pred) {
4054 newmv_diff_bias(
4055 xd, this_best_mode, &search_state.this_rdc, bsize,
4056 search_state.frame_mv[this_best_mode][ref_frame].as_mv.row,
4057 search_state.frame_mv[this_best_mode][ref_frame].as_mv.col,
4058 cpi->speed, x->source_variance, x->content_state_sb);
4059 }
4060 #if CONFIG_AV1_TEMPORAL_DENOISING
4061 if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode &&
4062 cpi->denoiser.denoising_level > kDenLowLow) {
4063 av1_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx);
4064 // Keep track of zero_last cost.
4065 if (ref_frame == LAST_FRAME &&
4066 search_state.frame_mv[this_mode][ref_frame].as_int == 0)
4067 zero_last_cost_orig = search_state.this_rdc.rdcost;
4068 }
4069 #else
4070 (void)sse_y;
4071 #endif
4072
4073 search_state.mode_checked[this_mode][ref_frame] = 1;
4074 search_state.mode_checked[this_best_mode][ref_frame] = 1;
4075
4076 if (check_globalmv) {
4077 int32_t abs_mv =
4078 abs(search_state.frame_mv[this_best_mode][ref_frame].as_mv.row) +
4079 abs(search_state.frame_mv[this_best_mode][ref_frame].as_mv.col);
4080 // Early exit check: if the magnitude of this_best_mode's mv is small
4081 // enough, we skip GLOBALMV check in the next loop iteration.
4082 if (abs_mv < 2) {
4083 check_globalmv = false;
4084 }
4085 }
4086 #if COLLECT_PICK_MODE_STAT
4087 aom_usec_timer_mark(&ms_stat.timer1);
4088 ms_stat.nonskipped_search_times[bsize][this_mode] +=
4089 aom_usec_timer_elapsed(&ms_stat.timer1);
4090 #endif
4091 if (search_state.this_rdc.rdcost < search_state.best_rdc.rdcost) {
4092 search_state.best_rdc = search_state.this_rdc;
4093 best_early_term = this_early_term;
4094 search_state.best_pickmode.best_sse = sse_y;
4095 search_state.best_pickmode.best_mode = this_best_mode;
4096 search_state.best_pickmode.best_motion_mode = mi->motion_mode;
4097 search_state.best_pickmode.wm_params = mi->wm_params;
4098 search_state.best_pickmode.num_proj_ref = mi->num_proj_ref;
4099 search_state.best_pickmode.best_pred_filter = mi->interp_filters;
4100 search_state.best_pickmode.best_tx_size = mi->tx_size;
4101 search_state.best_pickmode.best_ref_frame = ref_frame;
4102 search_state.best_pickmode.best_second_ref_frame = ref_frame2;
4103 search_state.best_pickmode.best_mode_skip_txfm =
4104 search_state.this_rdc.skip_txfm;
4105 search_state.best_pickmode.best_mode_initial_skip_flag =
4106 (nonskip_rdc.rate == INT_MAX && search_state.this_rdc.skip_txfm);
4107 if (!search_state.best_pickmode.best_mode_skip_txfm) {
4108 memcpy(search_state.best_pickmode.blk_skip, txfm_info->blk_skip,
4109 sizeof(txfm_info->blk_skip[0]) * num_8x8_blocks);
4110 }
4111
4112 // This is needed for the compound modes.
4113 search_state.frame_mv_best[this_best_mode][ref_frame].as_int =
4114 search_state.frame_mv[this_best_mode][ref_frame].as_int;
4115 if (ref_frame2 > NONE_FRAME) {
4116 search_state.frame_mv_best[this_best_mode][ref_frame2].as_int =
4117 search_state.frame_mv[this_best_mode][ref_frame2].as_int;
4118 }
4119
4120 if (reuse_inter_pred) {
4121 free_pred_buffer(search_state.best_pickmode.best_pred);
4122 search_state.best_pickmode.best_pred = this_mode_pred;
4123 }
4124 } else {
4125 if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
4126 }
4127 if (best_early_term && (idx > 0 || cpi->sf.rt_sf.nonrd_aggressive_skip)) {
4128 txfm_info->skip_txfm = 1;
4129 break;
4130 }
4131 }
4132
4133 mi->mode = search_state.best_pickmode.best_mode;
4134 mi->motion_mode = search_state.best_pickmode.best_motion_mode;
4135 mi->wm_params = search_state.best_pickmode.wm_params;
4136 mi->num_proj_ref = search_state.best_pickmode.num_proj_ref;
4137 mi->interp_filters = search_state.best_pickmode.best_pred_filter;
4138 mi->tx_size = search_state.best_pickmode.best_tx_size;
4139 memset(mi->inter_tx_size, mi->tx_size, sizeof(mi->inter_tx_size));
4140 mi->ref_frame[0] = search_state.best_pickmode.best_ref_frame;
4141 mi->mv[0].as_int =
4142 search_state
4143 .frame_mv_best[search_state.best_pickmode.best_mode]
4144 [search_state.best_pickmode.best_ref_frame]
4145 .as_int;
4146 mi->mv[1].as_int = 0;
4147 if (search_state.best_pickmode.best_second_ref_frame > INTRA_FRAME) {
4148 mi->ref_frame[1] = search_state.best_pickmode.best_second_ref_frame;
4149 mi->mv[1].as_int =
4150 search_state
4151 .frame_mv_best[search_state.best_pickmode.best_mode]
4152 [search_state.best_pickmode.best_second_ref_frame]
4153 .as_int;
4154 }
4155 // Perform intra prediction search, if the best SAD is above a certain
4156 // threshold.
4157 mi->angle_delta[PLANE_TYPE_Y] = 0;
4158 mi->angle_delta[PLANE_TYPE_UV] = 0;
4159 mi->filter_intra_mode_info.use_filter_intra = 0;
4160
4161 #if COLLECT_PICK_MODE_STAT
4162 aom_usec_timer_start(&ms_stat.timer1);
4163 ms_stat.num_searches[bsize][DC_PRED]++;
4164 ms_stat.num_nonskipped_searches[bsize][DC_PRED]++;
4165 #endif
4166
4167 if (!x->force_zeromv_skip_for_blk)
4168 estimate_intra_mode(cpi, x, bsize, best_early_term,
4169 search_state.ref_costs_single[INTRA_FRAME],
4170 reuse_inter_pred, &orig_dst, tmp_buffer,
4171 &this_mode_pred, &search_state.best_rdc,
4172 &search_state.best_pickmode, ctx);
4173
4174 int skip_idtx_palette =
4175 (x->color_sensitivity[0] || x->color_sensitivity[1]) &&
4176 x->content_state_sb.source_sad_nonrd != kZeroSad &&
4177 !cpi->rc.high_source_sad;
4178
4179 // Check for IDTX: based only on Y channel, so avoid when color_sensitivity
4180 // is set.
4181 if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && !skip_idtx_palette &&
4182 !cpi->oxcf.txfm_cfg.use_inter_dct_only && !x->force_zeromv_skip_for_blk &&
4183 is_inter_mode(search_state.best_pickmode.best_mode) &&
4184 (!cpi->sf.rt_sf.prune_idtx_nonrd ||
4185 (cpi->sf.rt_sf.prune_idtx_nonrd && bsize <= BLOCK_32X32 &&
4186 search_state.best_pickmode.best_mode_skip_txfm != 1 &&
4187 x->source_variance > 200))) {
4188 RD_STATS idtx_rdc;
4189 av1_init_rd_stats(&idtx_rdc);
4190 int is_skippable;
4191 this_mode_pred = &tmp_buffer[get_pred_buffer(tmp_buffer, 3)];
4192 pd->dst.buf = this_mode_pred->data;
4193 pd->dst.stride = bw;
4194 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
4195 block_yrd_idtx(x, &idtx_rdc, &is_skippable, bsize, mi->tx_size);
4196 int64_t idx_rdcost = RDCOST(x->rdmult, idtx_rdc.rate, idtx_rdc.dist);
4197 if (idx_rdcost < search_state.best_rdc.rdcost) {
4198 // Keep the skip_txfm off if the color_sensitivity is set.
4199 if (x->color_sensitivity[0] || x->color_sensitivity[1])
4200 idtx_rdc.skip_txfm = 0;
4201 search_state.best_pickmode.tx_type = IDTX;
4202 search_state.best_rdc.rdcost = idx_rdcost;
4203 search_state.best_pickmode.best_mode_skip_txfm = idtx_rdc.skip_txfm;
4204 if (!idtx_rdc.skip_txfm) {
4205 memcpy(search_state.best_pickmode.blk_skip, txfm_info->blk_skip,
4206 sizeof(txfm_info->blk_skip[0]) * num_8x8_blocks);
4207 }
4208 xd->tx_type_map[0] = search_state.best_pickmode.tx_type;
4209 memset(ctx->tx_type_map, search_state.best_pickmode.tx_type,
4210 ctx->num_4x4_blk);
4211 memset(xd->tx_type_map, search_state.best_pickmode.tx_type,
4212 ctx->num_4x4_blk);
4213 }
4214 pd->dst = orig_dst;
4215 }
4216
4217 int try_palette =
4218 !skip_idtx_palette && cpi->oxcf.tool_cfg.enable_palette &&
4219 av1_allow_palette(cpi->common.features.allow_screen_content_tools,
4220 mi->bsize);
4221 try_palette = try_palette &&
4222 is_mode_intra(search_state.best_pickmode.best_mode) &&
4223 x->source_variance > 0 && !x->force_zeromv_skip_for_blk &&
4224 (cpi->rc.high_source_sad || x->source_variance > 500);
4225
4226 if (try_palette) {
4227 const unsigned int intra_ref_frame_cost =
4228 search_state.ref_costs_single[INTRA_FRAME];
4229
4230 av1_search_palette_mode_luma(cpi, x, bsize, intra_ref_frame_cost, ctx,
4231 &search_state.this_rdc,
4232 search_state.best_rdc.rdcost);
4233 if (search_state.this_rdc.rdcost < search_state.best_rdc.rdcost) {
4234 search_state.best_pickmode.pmi = mi->palette_mode_info;
4235 search_state.best_pickmode.best_mode = DC_PRED;
4236 mi->mv[0].as_int = 0;
4237 search_state.best_rdc.rate = search_state.this_rdc.rate;
4238 search_state.best_rdc.dist = search_state.this_rdc.dist;
4239 search_state.best_rdc.rdcost = search_state.this_rdc.rdcost;
4240 search_state.best_pickmode.best_mode_skip_txfm =
4241 search_state.this_rdc.skip_txfm;
4242 // Keep the skip_txfm off if the color_sensitivity is set.
4243 if (x->color_sensitivity[0] || x->color_sensitivity[1])
4244 search_state.this_rdc.skip_txfm = 0;
4245 if (!search_state.this_rdc.skip_txfm) {
4246 memcpy(ctx->blk_skip, txfm_info->blk_skip,
4247 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4248 }
4249 if (xd->tx_type_map[0] != DCT_DCT)
4250 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4251 }
4252 }
4253
4254 #if COLLECT_PICK_MODE_STAT
4255 aom_usec_timer_mark(&ms_stat.timer1);
4256 ms_stat.nonskipped_search_times[bsize][DC_PRED] +=
4257 aom_usec_timer_elapsed(&ms_stat.timer1);
4258 #endif
4259
4260 pd->dst = orig_dst;
4261 if (try_palette) mi->palette_mode_info = search_state.best_pickmode.pmi;
4262 mi->mode = search_state.best_pickmode.best_mode;
4263 mi->ref_frame[0] = search_state.best_pickmode.best_ref_frame;
4264 mi->ref_frame[1] = search_state.best_pickmode.best_second_ref_frame;
4265 txfm_info->skip_txfm = search_state.best_pickmode.best_mode_skip_txfm;
4266 if (!txfm_info->skip_txfm) {
4267 // For inter modes: copy blk_skip from best_pickmode, which is
4268 // defined for 8x8 blocks. If palette or intra mode was selected
4269 // as best then blk_skip is already copied into the ctx.
4270 if (search_state.best_pickmode.best_mode >= INTRA_MODE_END)
4271 memcpy(ctx->blk_skip, search_state.best_pickmode.blk_skip,
4272 sizeof(search_state.best_pickmode.blk_skip[0]) * num_8x8_blocks);
4273 }
4274 if (has_second_ref(mi)) {
4275 mi->comp_group_idx = 0;
4276 mi->compound_idx = 1;
4277 mi->interinter_comp.type = COMPOUND_AVERAGE;
4278 }
4279
4280 if (!is_inter_block(mi)) {
4281 mi->interp_filters = av1_broadcast_interp_filter(SWITCHABLE_FILTERS);
4282 }
4283
4284 if (reuse_inter_pred && search_state.best_pickmode.best_pred != NULL) {
4285 PRED_BUFFER *const best_pred = search_state.best_pickmode.best_pred;
4286 if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) {
4287 aom_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
4288 pd->dst.stride, bw, bh);
4289 }
4290 }
4291
4292 #if CONFIG_AV1_TEMPORAL_DENOISING
4293 if (cpi->oxcf.noise_sensitivity > 0 && resize_pending == 0 &&
4294 denoise_svc_pickmode && cpi->denoiser.denoising_level > kDenLowLow &&
4295 cpi->denoiser.reset == 0) {
4296 AV1_DENOISER_DECISION decision = COPY_BLOCK;
4297 ctx->sb_skip_denoising = 0;
4298 av1_pickmode_ctx_den_update(
4299 &ctx_den, zero_last_cost_orig, search_state.ref_costs_single,
4300 search_state.frame_mv, reuse_inter_pred, &search_state.best_pickmode);
4301 av1_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision,
4302 gf_temporal_ref);
4303 if (denoise_recheck_zeromv)
4304 recheck_zeromv_after_denoising(
4305 cpi, mi, x, xd, decision, &ctx_den, search_state.yv12_mb,
4306 &search_state.best_rdc, &search_state.best_pickmode, bsize, mi_row,
4307 mi_col);
4308 search_state.best_pickmode.best_ref_frame = ctx_den.best_ref_frame;
4309 }
4310 #endif
4311
4312 if (cpi->sf.inter_sf.adaptive_rd_thresh && !has_second_ref(mi)) {
4313 THR_MODES best_mode_idx =
4314 mode_idx[search_state.best_pickmode.best_ref_frame]
4315 [mode_offset(mi->mode)];
4316 if (search_state.best_pickmode.best_ref_frame == INTRA_FRAME) {
4317 // Only consider the modes that are included in the intra_mode_list.
4318 int intra_modes = sizeof(intra_mode_list) / sizeof(PREDICTION_MODE);
4319 for (int i = 0; i < intra_modes; i++) {
4320 update_thresh_freq_fact(cpi, x, bsize, INTRA_FRAME, best_mode_idx,
4321 intra_mode_list[i]);
4322 }
4323 } else {
4324 PREDICTION_MODE this_mode;
4325 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
4326 update_thresh_freq_fact(cpi, x, bsize,
4327 search_state.best_pickmode.best_ref_frame,
4328 best_mode_idx, this_mode);
4329 }
4330 }
4331 }
4332
4333 #if CONFIG_INTERNAL_STATS
4334 store_coding_context(x, ctx, mi->mode);
4335 #else
4336 store_coding_context(x, ctx);
4337 #endif // CONFIG_INTERNAL_STATS
4338
4339 #if COLLECT_PICK_MODE_STAT
4340 aom_usec_timer_mark(&ms_stat.bsize_timer);
4341 ms_stat.total_block_times[bsize] +=
4342 aom_usec_timer_elapsed(&ms_stat.bsize_timer);
4343 print_time(&ms_stat, bsize, cm->mi_params.mi_rows, cm->mi_params.mi_cols,
4344 mi_row, mi_col);
4345 #endif // COLLECT_PICK_MODE_STAT
4346
4347 *rd_cost = search_state.best_rdc;
4348 }
4349