1 /*
2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "av1/common/av1_common_int.h"
13 #include "av1/common/reconintra.h"
14
15 #include "av1/encoder/intra_mode_search.h"
16 #include "av1/encoder/intra_mode_search_utils.h"
17 #include "av1/encoder/palette.h"
18 #include "av1/encoder/speed_features.h"
19 #include "av1/encoder/tx_search.h"
20
21 // Even though there are 7 delta angles, this macro is set to 9 to facilitate
22 // the rd threshold check to prune -3 and 3 delta angles.
23 #define SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY (2 * MAX_ANGLE_DELTA + 3)
24
25 // The order for evaluating delta angles while processing the luma directional
26 // intra modes. Currently, this order of evaluation is applicable only when
27 // speed feature prune_luma_odd_delta_angles_in_intra is enabled. In this case,
28 // even angles are evaluated first in order to facilitate the pruning of odd
29 // delta angles based on the rd costs of the neighboring delta angles.
30 static const int8_t luma_delta_angles_order[2 * MAX_ANGLE_DELTA] = {
31 -2, 2, -3, -1, 1, 3,
32 };
33
34 /*!\cond */
35 static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
36 DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED,
37 SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED, D157_PRED,
38 D67_PRED, D113_PRED, D45_PRED,
39 };
40
41 static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
42 UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED,
43 UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
44 UV_D135_PRED, UV_D203_PRED, UV_D157_PRED, UV_D67_PRED,
45 UV_D113_PRED, UV_D45_PRED,
46 };
47
48 // The bitmask corresponds to the filter intra modes as defined in enums.h
49 // FILTER_INTRA_MODE enumeration type. Setting a bit to 0 in the mask means to
50 // disable the evaluation of corresponding filter intra mode. The table
51 // av1_derived_filter_intra_mode_used_flag is used when speed feature
52 // prune_filter_intra_level is 1. The evaluated filter intra modes are union
53 // of the following:
54 // 1) FILTER_DC_PRED
55 // 2) mode that corresponds to best mode so far of DC_PRED, V_PRED, H_PRED,
56 // D157_PRED and PAETH_PRED. (Eg: FILTER_V_PRED if best mode so far is V_PRED).
57 static const uint8_t av1_derived_filter_intra_mode_used_flag[INTRA_MODES] = {
58 0x01, // DC_PRED: 0000 0001
59 0x03, // V_PRED: 0000 0011
60 0x05, // H_PRED: 0000 0101
61 0x01, // D45_PRED: 0000 0001
62 0x01, // D135_PRED: 0000 0001
63 0x01, // D113_PRED: 0000 0001
64 0x09, // D157_PRED: 0000 1001
65 0x01, // D203_PRED: 0000 0001
66 0x01, // D67_PRED: 0000 0001
67 0x01, // SMOOTH_PRED: 0000 0001
68 0x01, // SMOOTH_V_PRED: 0000 0001
69 0x01, // SMOOTH_H_PRED: 0000 0001
70 0x11 // PAETH_PRED: 0001 0001
71 };
72
73 // The bitmask corresponds to the chroma intra modes as defined in enums.h
74 // UV_PREDICTION_MODE enumeration type. Setting a bit to 0 in the mask means to
75 // disable the evaluation of corresponding chroma intra mode. The table
76 // av1_derived_chroma_intra_mode_used_flag is used when speed feature
77 // prune_chroma_modes_using_luma_winner is enabled. The evaluated chroma
78 // intra modes are union of the following:
79 // 1) UV_DC_PRED
80 // 2) UV_SMOOTH_PRED
81 // 3) UV_CFL_PRED
82 // 4) mode that corresponds to luma intra mode winner (Eg : UV_V_PRED if luma
83 // intra mode winner is V_PRED).
84 static const uint16_t av1_derived_chroma_intra_mode_used_flag[INTRA_MODES] = {
85 0x2201, // DC_PRED: 0010 0010 0000 0001
86 0x2203, // V_PRED: 0010 0010 0000 0011
87 0x2205, // H_PRED: 0010 0010 0000 0101
88 0x2209, // D45_PRED: 0010 0010 0000 1001
89 0x2211, // D135_PRED: 0010 0010 0001 0001
90 0x2221, // D113_PRED: 0010 0010 0010 0001
91 0x2241, // D157_PRED: 0010 0010 0100 0001
92 0x2281, // D203_PRED: 0010 0010 1000 0001
93 0x2301, // D67_PRED: 0010 0011 0000 0001
94 0x2201, // SMOOTH_PRED: 0010 0010 0000 0001
95 0x2601, // SMOOTH_V_PRED: 0010 0110 0000 0001
96 0x2a01, // SMOOTH_H_PRED: 0010 1010 0000 0001
97 0x3201 // PAETH_PRED: 0011 0010 0000 0001
98 };
99
100 DECLARE_ALIGNED(16, static const uint8_t, all_zeros[MAX_SB_SIZE]) = { 0 };
101 DECLARE_ALIGNED(16, static const uint16_t,
102 highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
103
av1_calc_normalized_variance(aom_variance_fn_t vf,const uint8_t * const buf,const int stride,const int is_hbd)104 int av1_calc_normalized_variance(aom_variance_fn_t vf, const uint8_t *const buf,
105 const int stride, const int is_hbd) {
106 unsigned int sse;
107
108 if (is_hbd)
109 return vf(buf, stride, CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse);
110 else
111 return vf(buf, stride, all_zeros, 0, &sse);
112 }
113
114 // Computes average of log(1 + variance) across 4x4 sub-blocks for source and
115 // reconstructed blocks.
compute_avg_log_variance(const AV1_COMP * const cpi,MACROBLOCK * x,const BLOCK_SIZE bs,double * avg_log_src_variance,double * avg_log_recon_variance)116 static void compute_avg_log_variance(const AV1_COMP *const cpi, MACROBLOCK *x,
117 const BLOCK_SIZE bs,
118 double *avg_log_src_variance,
119 double *avg_log_recon_variance) {
120 const MACROBLOCKD *const xd = &x->e_mbd;
121 const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
122 const int mi_row_in_sb = x->e_mbd.mi_row & (mi_size_high[sb_size] - 1);
123 const int mi_col_in_sb = x->e_mbd.mi_col & (mi_size_wide[sb_size] - 1);
124 const int right_overflow =
125 (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
126 const int bottom_overflow =
127 (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
128 const int bw = (MI_SIZE * mi_size_wide[bs] - right_overflow);
129 const int bh = (MI_SIZE * mi_size_high[bs] - bottom_overflow);
130 const int is_hbd = is_cur_buf_hbd(xd);
131
132 for (int i = 0; i < bh; i += MI_SIZE) {
133 const int r = mi_row_in_sb + (i >> MI_SIZE_LOG2);
134 for (int j = 0; j < bw; j += MI_SIZE) {
135 const int c = mi_col_in_sb + (j >> MI_SIZE_LOG2);
136 const int mi_offset = r * mi_size_wide[sb_size] + c;
137 Block4x4VarInfo *block_4x4_var_info =
138 &x->src_var_info_of_4x4_sub_blocks[mi_offset];
139 int src_var = block_4x4_var_info->var;
140 double log_src_var = block_4x4_var_info->log_var;
141 // Compute average of log(1 + variance) for the source block from 4x4
142 // sub-block variance values. Calculate and store 4x4 sub-block variance
143 // and log(1 + variance), if the values present in
144 // src_var_of_4x4_sub_blocks are invalid. Reuse the same if it is readily
145 // available with valid values.
146 if (src_var < 0) {
147 src_var = av1_calc_normalized_variance(
148 cpi->ppi->fn_ptr[BLOCK_4X4].vf,
149 x->plane[0].src.buf + i * x->plane[0].src.stride + j,
150 x->plane[0].src.stride, is_hbd);
151 block_4x4_var_info->var = src_var;
152 log_src_var = log(1.0 + src_var / 16.0);
153 block_4x4_var_info->log_var = log_src_var;
154 } else {
155 // When source variance is already calculated and available for
156 // retrieval, check if log(1 + variance) is also available. If it is
157 // available, then retrieve from buffer. Else, calculate the same and
158 // store to the buffer.
159 if (log_src_var < 0) {
160 log_src_var = log(1.0 + src_var / 16.0);
161 block_4x4_var_info->log_var = log_src_var;
162 }
163 }
164 *avg_log_src_variance += log_src_var;
165
166 const int recon_var = av1_calc_normalized_variance(
167 cpi->ppi->fn_ptr[BLOCK_4X4].vf,
168 xd->plane[0].dst.buf + i * xd->plane[0].dst.stride + j,
169 xd->plane[0].dst.stride, is_hbd);
170 *avg_log_recon_variance += log(1.0 + recon_var / 16.0);
171 }
172 }
173
174 const int blocks = (bw * bh) / 16;
175 *avg_log_src_variance /= (double)blocks;
176 *avg_log_recon_variance /= (double)blocks;
177 }
178
179 // Returns a factor to be applied to the RD value based on how well the
180 // reconstructed block variance matches the source variance.
intra_rd_variance_factor(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bs)181 static double intra_rd_variance_factor(const AV1_COMP *cpi, MACROBLOCK *x,
182 BLOCK_SIZE bs) {
183 double threshold = INTRA_RD_VAR_THRESH(cpi->oxcf.speed);
184 // For non-positive threshold values, the comparison of source and
185 // reconstructed variances with threshold evaluates to false
186 // (src_var < threshold/rec_var < threshold) as these metrics are greater than
187 // than 0. Hence further calculations are skipped.
188 if (threshold <= 0) return 1.0;
189
190 double variance_rd_factor = 1.0;
191 double avg_log_src_variance = 0.0;
192 double avg_log_recon_variance = 0.0;
193 double var_diff = 0.0;
194
195 compute_avg_log_variance(cpi, x, bs, &avg_log_src_variance,
196 &avg_log_recon_variance);
197
198 // Dont allow 0 to prevent / 0 below.
199 avg_log_src_variance += 0.000001;
200 avg_log_recon_variance += 0.000001;
201
202 if (avg_log_src_variance >= avg_log_recon_variance) {
203 var_diff = (avg_log_src_variance - avg_log_recon_variance);
204 if ((var_diff > 0.5) && (avg_log_recon_variance < threshold)) {
205 variance_rd_factor = 1.0 + ((var_diff * 2) / avg_log_src_variance);
206 }
207 } else {
208 var_diff = (avg_log_recon_variance - avg_log_src_variance);
209 if ((var_diff > 0.5) && (avg_log_src_variance < threshold)) {
210 variance_rd_factor = 1.0 + (var_diff / (2 * avg_log_src_variance));
211 }
212 }
213
214 // Limit adjustment;
215 variance_rd_factor = AOMMIN(3.0, variance_rd_factor);
216
217 return variance_rd_factor;
218 }
219 /*!\endcond */
220
221 /*!\brief Search for the best filter_intra mode when coding intra frame.
222 *
223 * \ingroup intra_mode_search
224 * \callergraph
225 * This function loops through all filter_intra modes to find the best one.
226 *
227 * \return Returns 1 if a new filter_intra mode is selected; 0 otherwise.
228 */
rd_pick_filter_intra_sby(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,BLOCK_SIZE bsize,int mode_cost,PREDICTION_MODE best_mode_so_far,int64_t * best_rd,int64_t * best_model_rd,PICK_MODE_CONTEXT * ctx)229 static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
230 int *rate, int *rate_tokenonly,
231 int64_t *distortion, uint8_t *skippable,
232 BLOCK_SIZE bsize, int mode_cost,
233 PREDICTION_MODE best_mode_so_far,
234 int64_t *best_rd, int64_t *best_model_rd,
235 PICK_MODE_CONTEXT *ctx) {
236 // Skip the evaluation of filter intra modes.
237 if (cpi->sf.intra_sf.prune_filter_intra_level == 2) return 0;
238
239 MACROBLOCKD *const xd = &x->e_mbd;
240 MB_MODE_INFO *mbmi = xd->mi[0];
241 int filter_intra_selected_flag = 0;
242 FILTER_INTRA_MODE mode;
243 TX_SIZE best_tx_size = TX_8X8;
244 FILTER_INTRA_MODE_INFO filter_intra_mode_info;
245 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
246 av1_zero(filter_intra_mode_info);
247 mbmi->filter_intra_mode_info.use_filter_intra = 1;
248 mbmi->mode = DC_PRED;
249 mbmi->palette_mode_info.palette_size[0] = 0;
250
251 // Skip the evaluation of filter-intra if cached MB_MODE_INFO does not have
252 // filter-intra as winner.
253 if (x->use_mb_mode_cache &&
254 !x->mb_mode_cache->filter_intra_mode_info.use_filter_intra)
255 return 0;
256
257 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
258 int64_t this_rd;
259 RD_STATS tokenonly_rd_stats;
260 mbmi->filter_intra_mode_info.filter_intra_mode = mode;
261
262 if ((cpi->sf.intra_sf.prune_filter_intra_level == 1) &&
263 !(av1_derived_filter_intra_mode_used_flag[best_mode_so_far] &
264 (1 << mode)))
265 continue;
266
267 // Skip the evaluation of modes that do not match with the winner mode in
268 // x->mb_mode_cache.
269 if (x->use_mb_mode_cache &&
270 mode != x->mb_mode_cache->filter_intra_mode_info.filter_intra_mode)
271 continue;
272
273 if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd)) {
274 continue;
275 }
276 av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize,
277 *best_rd);
278 if (tokenonly_rd_stats.rate == INT_MAX) continue;
279 const int this_rate =
280 tokenonly_rd_stats.rate +
281 intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0);
282 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
283
284 // Visual quality adjustment based on recon vs source variance.
285 if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) {
286 this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize));
287 }
288
289 // Collect mode stats for multiwinner mode processing
290 const int txfm_search_done = 1;
291 store_winner_mode_stats(
292 &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd,
293 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
294 if (this_rd < *best_rd) {
295 *best_rd = this_rd;
296 best_tx_size = mbmi->tx_size;
297 filter_intra_mode_info = mbmi->filter_intra_mode_info;
298 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
299 memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip,
300 sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
301 *rate = this_rate;
302 *rate_tokenonly = tokenonly_rd_stats.rate;
303 *distortion = tokenonly_rd_stats.dist;
304 *skippable = tokenonly_rd_stats.skip_txfm;
305 filter_intra_selected_flag = 1;
306 }
307 }
308
309 if (filter_intra_selected_flag) {
310 mbmi->mode = DC_PRED;
311 mbmi->tx_size = best_tx_size;
312 mbmi->filter_intra_mode_info = filter_intra_mode_info;
313 av1_copy_array(ctx->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
314 return 1;
315 } else {
316 return 0;
317 }
318 }
319
av1_count_colors(const uint8_t * src,int stride,int rows,int cols,int * val_count,int * num_colors)320 void av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
321 int *val_count, int *num_colors) {
322 const int max_pix_val = 1 << 8;
323 memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
324 for (int r = 0; r < rows; ++r) {
325 for (int c = 0; c < cols; ++c) {
326 const int this_val = src[r * stride + c];
327 assert(this_val < max_pix_val);
328 ++val_count[this_val];
329 }
330 }
331 int n = 0;
332 for (int i = 0; i < max_pix_val; ++i) {
333 if (val_count[i]) ++n;
334 }
335 *num_colors = n;
336 }
337
av1_count_colors_highbd(const uint8_t * src8,int stride,int rows,int cols,int bit_depth,int * val_count,int * bin_val_count,int * num_color_bins,int * num_colors)338 void av1_count_colors_highbd(const uint8_t *src8, int stride, int rows,
339 int cols, int bit_depth, int *val_count,
340 int *bin_val_count, int *num_color_bins,
341 int *num_colors) {
342 assert(bit_depth <= 12);
343 const int max_bin_val = 1 << 8;
344 const int max_pix_val = 1 << bit_depth;
345 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
346 memset(bin_val_count, 0, max_bin_val * sizeof(val_count[0]));
347 if (val_count != NULL)
348 memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
349 for (int r = 0; r < rows; ++r) {
350 for (int c = 0; c < cols; ++c) {
351 /*
352 * Down-convert the pixels to 8-bit domain before counting.
353 * This provides consistency of behavior for palette search
354 * between lbd and hbd encodes. This down-converted pixels
355 * are only used for calculating the threshold (n).
356 */
357 const int this_val = ((src[r * stride + c]) >> (bit_depth - 8));
358 assert(this_val < max_bin_val);
359 if (this_val >= max_bin_val) continue;
360 ++bin_val_count[this_val];
361 if (val_count != NULL) ++val_count[(src[r * stride + c])];
362 }
363 }
364 int n = 0;
365 // Count the colors based on 8-bit domain used to gate the palette path
366 for (int i = 0; i < max_bin_val; ++i) {
367 if (bin_val_count[i]) ++n;
368 }
369 *num_color_bins = n;
370
371 // Count the actual hbd colors used to create top_colors
372 n = 0;
373 if (val_count != NULL) {
374 for (int i = 0; i < max_pix_val; ++i) {
375 if (val_count[i]) ++n;
376 }
377 *num_colors = n;
378 }
379 }
380
set_y_mode_and_delta_angle(const int mode_idx,MB_MODE_INFO * const mbmi,int reorder_delta_angle_eval)381 void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi,
382 int reorder_delta_angle_eval) {
383 if (mode_idx < INTRA_MODE_END) {
384 mbmi->mode = intra_rd_search_mode_order[mode_idx];
385 mbmi->angle_delta[PLANE_TYPE_Y] = 0;
386 } else {
387 mbmi->mode = (mode_idx - INTRA_MODE_END) / (MAX_ANGLE_DELTA * 2) + V_PRED;
388 int delta_angle_eval_idx =
389 (mode_idx - INTRA_MODE_END) % (MAX_ANGLE_DELTA * 2);
390 if (reorder_delta_angle_eval) {
391 mbmi->angle_delta[PLANE_TYPE_Y] =
392 luma_delta_angles_order[delta_angle_eval_idx];
393 } else {
394 mbmi->angle_delta[PLANE_TYPE_Y] =
395 (delta_angle_eval_idx < 3 ? (delta_angle_eval_idx - 3)
396 : (delta_angle_eval_idx - 2));
397 }
398 }
399 }
400
get_model_rd_index_for_pruning(const MACROBLOCK * const x,const INTRA_MODE_SPEED_FEATURES * const intra_sf)401 static AOM_INLINE int get_model_rd_index_for_pruning(
402 const MACROBLOCK *const x,
403 const INTRA_MODE_SPEED_FEATURES *const intra_sf) {
404 const int top_intra_model_count_allowed =
405 intra_sf->top_intra_model_count_allowed;
406 if (!intra_sf->adapt_top_model_rd_count_using_neighbors)
407 return top_intra_model_count_allowed - 1;
408
409 const MACROBLOCKD *const xd = &x->e_mbd;
410 const PREDICTION_MODE mode = xd->mi[0]->mode;
411 int model_rd_index_for_pruning = top_intra_model_count_allowed - 1;
412 int is_left_mode_neq_cur_mode = 0, is_above_mode_neq_cur_mode = 0;
413 if (xd->left_available)
414 is_left_mode_neq_cur_mode = xd->left_mbmi->mode != mode;
415 if (xd->up_available)
416 is_above_mode_neq_cur_mode = xd->above_mbmi->mode != mode;
417 // The pruning of luma intra modes is made more aggressive at lower quantizers
418 // and vice versa. The value for model_rd_index_for_pruning is derived as
419 // follows.
420 // qidx 0 to 127: Reduce the index of a candidate used for comparison only if
421 // the current mode does not match either of the available neighboring modes.
422 // qidx 128 to 255: Reduce the index of a candidate used for comparison only
423 // if the current mode does not match both the available neighboring modes.
424 if (x->qindex <= 127) {
425 if (is_left_mode_neq_cur_mode || is_above_mode_neq_cur_mode)
426 model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0);
427 } else {
428 if (is_left_mode_neq_cur_mode && is_above_mode_neq_cur_mode)
429 model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0);
430 }
431 return model_rd_index_for_pruning;
432 }
433
prune_intra_y_mode(int64_t this_model_rd,int64_t * best_model_rd,int64_t top_intra_model_rd[],int max_model_cnt_allowed,int model_rd_index_for_pruning)434 int prune_intra_y_mode(int64_t this_model_rd, int64_t *best_model_rd,
435 int64_t top_intra_model_rd[], int max_model_cnt_allowed,
436 int model_rd_index_for_pruning) {
437 const double thresh_best = 1.50;
438 const double thresh_top = 1.00;
439 for (int i = 0; i < max_model_cnt_allowed; i++) {
440 if (this_model_rd < top_intra_model_rd[i]) {
441 for (int j = max_model_cnt_allowed - 1; j > i; j--) {
442 top_intra_model_rd[j] = top_intra_model_rd[j - 1];
443 }
444 top_intra_model_rd[i] = this_model_rd;
445 break;
446 }
447 }
448 if (top_intra_model_rd[model_rd_index_for_pruning] != INT64_MAX &&
449 this_model_rd >
450 thresh_top * top_intra_model_rd[model_rd_index_for_pruning])
451 return 1;
452
453 if (this_model_rd != INT64_MAX &&
454 this_model_rd > thresh_best * (*best_model_rd))
455 return 1;
456 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
457 return 0;
458 }
459
460 // Run RD calculation with given chroma intra prediction angle., and return
461 // the RD cost. Update the best mode info. if the RD cost is the best so far.
pick_intra_angle_routine_sbuv(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int rate_overhead,int64_t best_rd_in,int * rate,RD_STATS * rd_stats,int * best_angle_delta,int64_t * best_rd)462 static int64_t pick_intra_angle_routine_sbuv(
463 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
464 int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
465 int *best_angle_delta, int64_t *best_rd) {
466 MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
467 assert(!is_inter_block(mbmi));
468 int this_rate;
469 int64_t this_rd;
470 RD_STATS tokenonly_rd_stats;
471
472 if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
473 return INT64_MAX;
474 this_rate = tokenonly_rd_stats.rate +
475 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead);
476 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
477 if (this_rd < *best_rd) {
478 *best_rd = this_rd;
479 *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
480 *rate = this_rate;
481 rd_stats->rate = tokenonly_rd_stats.rate;
482 rd_stats->dist = tokenonly_rd_stats.dist;
483 rd_stats->skip_txfm = tokenonly_rd_stats.skip_txfm;
484 }
485 return this_rd;
486 }
487
488 /*!\brief Search for the best angle delta for chroma prediction
489 *
490 * \ingroup intra_mode_search
491 * \callergraph
492 * Given a chroma directional intra prediction mode, this function will try to
493 * estimate the best delta_angle.
494 *
495 * \returns Return if there is a new mode with smaller rdcost than best_rd.
496 */
rd_pick_intra_angle_sbuv(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int rate_overhead,int64_t best_rd,int * rate,RD_STATS * rd_stats)497 static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
498 BLOCK_SIZE bsize, int rate_overhead,
499 int64_t best_rd, int *rate,
500 RD_STATS *rd_stats) {
501 MACROBLOCKD *const xd = &x->e_mbd;
502 MB_MODE_INFO *mbmi = xd->mi[0];
503 assert(!is_inter_block(mbmi));
504 int i, angle_delta, best_angle_delta = 0;
505 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
506
507 rd_stats->rate = INT_MAX;
508 rd_stats->skip_txfm = 0;
509 rd_stats->dist = INT64_MAX;
510 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
511
512 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
513 for (i = 0; i < 2; ++i) {
514 best_rd_in = (best_rd == INT64_MAX)
515 ? INT64_MAX
516 : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
517 mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
518 this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
519 best_rd_in, rate, rd_stats,
520 &best_angle_delta, &best_rd);
521 rd_cost[2 * angle_delta + i] = this_rd;
522 if (angle_delta == 0) {
523 if (this_rd == INT64_MAX) return 0;
524 rd_cost[1] = this_rd;
525 break;
526 }
527 }
528 }
529
530 assert(best_rd != INT64_MAX);
531 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
532 int64_t rd_thresh;
533 for (i = 0; i < 2; ++i) {
534 int skip_search = 0;
535 rd_thresh = best_rd + (best_rd >> 5);
536 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
537 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
538 skip_search = 1;
539 if (!skip_search) {
540 mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
541 pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
542 rate, rd_stats, &best_angle_delta,
543 &best_rd);
544 }
545 }
546 }
547
548 mbmi->angle_delta[PLANE_TYPE_UV] = best_angle_delta;
549 return rd_stats->rate != INT_MAX;
550 }
551
552 #define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \
553 (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1)
554
cfl_idx_to_sign_and_alpha(int cfl_idx,CFL_SIGN_TYPE * cfl_sign,int * cfl_alpha)555 static void cfl_idx_to_sign_and_alpha(int cfl_idx, CFL_SIGN_TYPE *cfl_sign,
556 int *cfl_alpha) {
557 int cfl_linear_idx = cfl_idx - CFL_INDEX_ZERO;
558 if (cfl_linear_idx == 0) {
559 *cfl_sign = CFL_SIGN_ZERO;
560 *cfl_alpha = 0;
561 } else {
562 *cfl_sign = cfl_linear_idx > 0 ? CFL_SIGN_POS : CFL_SIGN_NEG;
563 *cfl_alpha = abs(cfl_linear_idx) - 1;
564 }
565 }
566
cfl_compute_rd(const AV1_COMP * const cpi,MACROBLOCK * x,int plane,TX_SIZE tx_size,BLOCK_SIZE plane_bsize,int cfl_idx,int fast_mode,RD_STATS * rd_stats)567 static int64_t cfl_compute_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
568 int plane, TX_SIZE tx_size,
569 BLOCK_SIZE plane_bsize, int cfl_idx,
570 int fast_mode, RD_STATS *rd_stats) {
571 assert(IMPLIES(fast_mode, rd_stats == NULL));
572 const AV1_COMMON *const cm = &cpi->common;
573 MACROBLOCKD *const xd = &x->e_mbd;
574 MB_MODE_INFO *const mbmi = xd->mi[0];
575 int cfl_plane = get_cfl_pred_type(plane);
576 CFL_SIGN_TYPE cfl_sign;
577 int cfl_alpha;
578 cfl_idx_to_sign_and_alpha(cfl_idx, &cfl_sign, &cfl_alpha);
579 // We conly build CFL for a given plane, the other plane's sign is dummy
580 int dummy_sign = CFL_SIGN_NEG;
581 const int8_t orig_cfl_alpha_signs = mbmi->cfl_alpha_signs;
582 const uint8_t orig_cfl_alpha_idx = mbmi->cfl_alpha_idx;
583 mbmi->cfl_alpha_signs =
584 PLANE_SIGN_TO_JOINT_SIGN(cfl_plane, cfl_sign, dummy_sign);
585 mbmi->cfl_alpha_idx = (cfl_alpha << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha;
586 int64_t cfl_cost;
587 if (fast_mode) {
588 cfl_cost =
589 intra_model_rd(cm, x, plane, plane_bsize, tx_size, /*use_hadamard=*/0);
590 } else {
591 av1_init_rd_stats(rd_stats);
592 av1_txfm_rd_in_plane(x, cpi, rd_stats, INT64_MAX, 0, plane, plane_bsize,
593 tx_size, FTXS_NONE, 0);
594 av1_rd_cost_update(x->rdmult, rd_stats);
595 cfl_cost = rd_stats->rdcost;
596 }
597 mbmi->cfl_alpha_signs = orig_cfl_alpha_signs;
598 mbmi->cfl_alpha_idx = orig_cfl_alpha_idx;
599 return cfl_cost;
600 }
601
602 static const int cfl_dir_ls[2] = { 1, -1 };
603
604 // If cfl_search_range is CFL_MAGS_SIZE, return zero. Otherwise return the index
605 // of the best alpha found using intra_model_rd().
cfl_pick_plane_parameter(const AV1_COMP * const cpi,MACROBLOCK * x,int plane,TX_SIZE tx_size,int cfl_search_range)606 static int cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x,
607 int plane, TX_SIZE tx_size,
608 int cfl_search_range) {
609 assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
610
611 if (cfl_search_range == CFL_MAGS_SIZE) return CFL_INDEX_ZERO;
612
613 const MACROBLOCKD *const xd = &x->e_mbd;
614 const MB_MODE_INFO *const mbmi = xd->mi[0];
615 assert(mbmi->uv_mode == UV_CFL_PRED);
616 const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
617 const BLOCK_SIZE plane_bsize =
618 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
619
620 int est_best_cfl_idx = CFL_INDEX_ZERO;
621 int fast_mode = 1;
622 int start_cfl_idx = CFL_INDEX_ZERO;
623 int64_t best_cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
624 start_cfl_idx, fast_mode, NULL);
625 for (int si = 0; si < 2; ++si) {
626 const int dir = cfl_dir_ls[si];
627 for (int i = 1; i < CFL_MAGS_SIZE; ++i) {
628 int cfl_idx = start_cfl_idx + dir * i;
629 if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
630 int64_t cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
631 cfl_idx, fast_mode, NULL);
632 if (cfl_cost < best_cfl_cost) {
633 best_cfl_cost = cfl_cost;
634 est_best_cfl_idx = cfl_idx;
635 } else {
636 break;
637 }
638 }
639 }
640 return est_best_cfl_idx;
641 }
642
cfl_pick_plane_rd(const AV1_COMP * const cpi,MACROBLOCK * x,int plane,TX_SIZE tx_size,int cfl_search_range,RD_STATS cfl_rd_arr[CFL_MAGS_SIZE],int est_best_cfl_idx)643 static void cfl_pick_plane_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
644 int plane, TX_SIZE tx_size, int cfl_search_range,
645 RD_STATS cfl_rd_arr[CFL_MAGS_SIZE],
646 int est_best_cfl_idx) {
647 assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
648 const MACROBLOCKD *const xd = &x->e_mbd;
649 const MB_MODE_INFO *const mbmi = xd->mi[0];
650 assert(mbmi->uv_mode == UV_CFL_PRED);
651 const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
652 const BLOCK_SIZE plane_bsize =
653 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
654
655 for (int cfl_idx = 0; cfl_idx < CFL_MAGS_SIZE; ++cfl_idx) {
656 av1_invalid_rd_stats(&cfl_rd_arr[cfl_idx]);
657 }
658
659 int fast_mode = 0;
660 int start_cfl_idx = est_best_cfl_idx;
661 cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, start_cfl_idx, fast_mode,
662 &cfl_rd_arr[start_cfl_idx]);
663
664 if (cfl_search_range == 1) return;
665
666 for (int si = 0; si < 2; ++si) {
667 const int dir = cfl_dir_ls[si];
668 for (int i = 1; i < cfl_search_range; ++i) {
669 int cfl_idx = start_cfl_idx + dir * i;
670 if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
671 cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, cfl_idx, fast_mode,
672 &cfl_rd_arr[cfl_idx]);
673 }
674 }
675 }
676
677 /*!\brief Pick the optimal parameters for Chroma to Luma (CFL) component
678 *
679 * \ingroup intra_mode_search
680 * \callergraph
681 *
682 * This function will use DCT_DCT followed by computing SATD (sum of absolute
683 * transformed differences) to estimate the RD score and find the best possible
684 * CFL parameter.
685 *
686 * Then the function will apply a full RD search near the best possible CFL
687 * parameter to find the best actual CFL parameter.
688 *
689 * Side effect:
690 * We use ths buffers in x->plane[] and xd->plane[] as throw-away buffers for RD
691 * search.
692 *
693 * \param[in] x Encoder prediction block structure.
694 * \param[in] cpi Top-level encoder instance structure.
695 * \param[in] tx_size Transform size.
696 * \param[in] ref_best_rd Reference best RD.
697 * \param[in] cfl_search_range The search range of full RD search near the
698 * estimated best CFL parameter.
699 *
700 * \param[out] best_rd_stats RD stats of the best CFL parameter
701 * \param[out] best_cfl_alpha_idx Best CFL alpha index
702 * \param[out] best_cfl_alpha_signs Best CFL joint signs
703 *
704 */
cfl_rd_pick_alpha(MACROBLOCK * const x,const AV1_COMP * const cpi,TX_SIZE tx_size,int64_t ref_best_rd,int cfl_search_range,RD_STATS * best_rd_stats,uint8_t * best_cfl_alpha_idx,int8_t * best_cfl_alpha_signs)705 static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
706 TX_SIZE tx_size, int64_t ref_best_rd,
707 int cfl_search_range, RD_STATS *best_rd_stats,
708 uint8_t *best_cfl_alpha_idx,
709 int8_t *best_cfl_alpha_signs) {
710 assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
711 const ModeCosts *mode_costs = &x->mode_costs;
712 RD_STATS cfl_rd_arr_u[CFL_MAGS_SIZE];
713 RD_STATS cfl_rd_arr_v[CFL_MAGS_SIZE];
714 MACROBLOCKD *const xd = &x->e_mbd;
715 int est_best_cfl_idx_u, est_best_cfl_idx_v;
716
717 av1_invalid_rd_stats(best_rd_stats);
718
719 // As the dc pred data is same for different values of alpha, enable the
720 // caching of dc pred data.
721 xd->cfl.use_dc_pred_cache = 1;
722 // Evaluate alpha parameter of each chroma plane.
723 est_best_cfl_idx_u =
724 cfl_pick_plane_parameter(cpi, x, 1, tx_size, cfl_search_range);
725 est_best_cfl_idx_v =
726 cfl_pick_plane_parameter(cpi, x, 2, tx_size, cfl_search_range);
727
728 // For cfl_search_range=1, further refinement of alpha is not enabled. Hence
729 // CfL index=0 for both the chroma planes implies invalid CfL mode.
730 if (cfl_search_range == 1 && est_best_cfl_idx_u == CFL_INDEX_ZERO &&
731 est_best_cfl_idx_v == CFL_INDEX_ZERO) {
732 // Set invalid CfL parameters here as CfL mode is invalid.
733 *best_cfl_alpha_idx = 0;
734 *best_cfl_alpha_signs = 0;
735
736 // Clear the following flags to avoid the unintentional usage of cached dc
737 // pred data.
738 xd->cfl.use_dc_pred_cache = 0;
739 xd->cfl.dc_pred_is_cached[0] = 0;
740 xd->cfl.dc_pred_is_cached[1] = 0;
741 return 0;
742 }
743
744 // Compute the rd cost of each chroma plane using the alpha parameters which
745 // were already evaluated.
746 cfl_pick_plane_rd(cpi, x, 1, tx_size, cfl_search_range, cfl_rd_arr_u,
747 est_best_cfl_idx_u);
748 cfl_pick_plane_rd(cpi, x, 2, tx_size, cfl_search_range, cfl_rd_arr_v,
749 est_best_cfl_idx_v);
750
751 // Clear the following flags to avoid the unintentional usage of cached dc
752 // pred data.
753 xd->cfl.use_dc_pred_cache = 0;
754 xd->cfl.dc_pred_is_cached[0] = 0;
755 xd->cfl.dc_pred_is_cached[1] = 0;
756
757 for (int ui = 0; ui < CFL_MAGS_SIZE; ++ui) {
758 if (cfl_rd_arr_u[ui].rate == INT_MAX) continue;
759 int cfl_alpha_u;
760 CFL_SIGN_TYPE cfl_sign_u;
761 cfl_idx_to_sign_and_alpha(ui, &cfl_sign_u, &cfl_alpha_u);
762 for (int vi = 0; vi < CFL_MAGS_SIZE; ++vi) {
763 if (cfl_rd_arr_v[vi].rate == INT_MAX) continue;
764 int cfl_alpha_v;
765 CFL_SIGN_TYPE cfl_sign_v;
766 cfl_idx_to_sign_and_alpha(vi, &cfl_sign_v, &cfl_alpha_v);
767 // cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO is not a
768 // valid parameter for CFL
769 if (cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO) continue;
770 int joint_sign = cfl_sign_u * CFL_SIGNS + cfl_sign_v - 1;
771 RD_STATS rd_stats = cfl_rd_arr_u[ui];
772 av1_merge_rd_stats(&rd_stats, &cfl_rd_arr_v[vi]);
773 if (rd_stats.rate != INT_MAX) {
774 rd_stats.rate +=
775 mode_costs->cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u];
776 rd_stats.rate +=
777 mode_costs->cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v];
778 }
779 av1_rd_cost_update(x->rdmult, &rd_stats);
780 if (rd_stats.rdcost < best_rd_stats->rdcost) {
781 *best_rd_stats = rd_stats;
782 *best_cfl_alpha_idx =
783 (cfl_alpha_u << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha_v;
784 *best_cfl_alpha_signs = joint_sign;
785 }
786 }
787 }
788 if (best_rd_stats->rdcost >= ref_best_rd) {
789 av1_invalid_rd_stats(best_rd_stats);
790 // Set invalid CFL parameters here since the rdcost is not better than
791 // ref_best_rd.
792 *best_cfl_alpha_idx = 0;
793 *best_cfl_alpha_signs = 0;
794 return 0;
795 }
796 return 1;
797 }
798
should_prune_chroma_smooth_pred_based_on_source_variance(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)799 static bool should_prune_chroma_smooth_pred_based_on_source_variance(
800 const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bsize) {
801 if (!cpi->sf.intra_sf.prune_smooth_intra_mode_for_chroma) return false;
802
803 // If the source variance of both chroma planes is less than 20 (empirically
804 // derived), prune UV_SMOOTH_PRED.
805 for (int i = AOM_PLANE_U; i < av1_num_planes(&cpi->common); i++) {
806 const unsigned int variance = av1_get_perpixel_variance_facade(
807 cpi, &x->e_mbd, &x->plane[i].src, bsize, i);
808 if (variance >= 20) return false;
809 }
810 return true;
811 }
812
av1_rd_pick_intra_sbuv_mode(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,BLOCK_SIZE bsize,TX_SIZE max_tx_size)813 int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
814 int *rate, int *rate_tokenonly,
815 int64_t *distortion, uint8_t *skippable,
816 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
817 const AV1_COMMON *const cm = &cpi->common;
818 MACROBLOCKD *xd = &x->e_mbd;
819 MB_MODE_INFO *mbmi = xd->mi[0];
820 assert(!is_inter_block(mbmi));
821 MB_MODE_INFO best_mbmi = *mbmi;
822 int64_t best_rd = INT64_MAX, this_rd;
823 const ModeCosts *mode_costs = &x->mode_costs;
824 const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
825
826 init_sbuv_mode(mbmi);
827
828 // Return if the current block does not correspond to a chroma block.
829 if (!xd->is_chroma_ref) {
830 *rate = 0;
831 *rate_tokenonly = 0;
832 *distortion = 0;
833 *skippable = 1;
834 return INT64_MAX;
835 }
836
837 // Only store reconstructed luma when there's chroma RDO. When there's no
838 // chroma RDO, the reconstructed luma will be stored in encode_superblock().
839 xd->cfl.store_y = store_cfl_required_rdo(cm, x);
840 if (xd->cfl.store_y) {
841 // Restore reconstructed luma values.
842 // TODO(chiyotsai@google.com): right now we are re-computing the txfm in
843 // this function everytime we search through uv modes. There is some
844 // potential speed up here if we cache the result to avoid redundant
845 // computation.
846 av1_encode_intra_block_plane(cpi, x, mbmi->bsize, AOM_PLANE_Y,
847 DRY_RUN_NORMAL,
848 cpi->optimize_seg_arr[mbmi->segment_id]);
849 xd->cfl.store_y = 0;
850 }
851 IntraModeSearchState intra_search_state;
852 init_intra_mode_search_state(&intra_search_state);
853
854 // Search through all non-palette modes.
855 for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
856 int this_rate;
857 RD_STATS tokenonly_rd_stats;
858 UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
859 const int is_diagonal_mode = av1_is_diagonal_mode(get_uv_mode(mode));
860 const int is_directional_mode = av1_is_directional_mode(get_uv_mode(mode));
861
862 if (is_diagonal_mode && !cpi->oxcf.intra_mode_cfg.enable_diagonal_intra)
863 continue;
864 if (is_directional_mode &&
865 !cpi->oxcf.intra_mode_cfg.enable_directional_intra)
866 continue;
867
868 if (!(cpi->sf.intra_sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
869 (1 << mode)))
870 continue;
871 if (!intra_mode_cfg->enable_smooth_intra && mode >= UV_SMOOTH_PRED &&
872 mode <= UV_SMOOTH_H_PRED)
873 continue;
874
875 if (!intra_mode_cfg->enable_paeth_intra && mode == UV_PAETH_PRED) continue;
876
877 assert(mbmi->mode < INTRA_MODES);
878 if (cpi->sf.intra_sf.prune_chroma_modes_using_luma_winner &&
879 !(av1_derived_chroma_intra_mode_used_flag[mbmi->mode] & (1 << mode)))
880 continue;
881
882 mbmi->uv_mode = mode;
883
884 // Init variables for cfl and angle delta
885 const SPEED_FEATURES *sf = &cpi->sf;
886 mbmi->angle_delta[PLANE_TYPE_UV] = 0;
887 if (mode == UV_CFL_PRED) {
888 if (!is_cfl_allowed(xd) || !intra_mode_cfg->enable_cfl_intra) continue;
889 assert(!is_directional_mode);
890 const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
891 if (!cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd,
892 sf->intra_sf.cfl_search_range, &tokenonly_rd_stats,
893 &mbmi->cfl_alpha_idx, &mbmi->cfl_alpha_signs)) {
894 continue;
895 }
896 } else if (is_directional_mode && av1_use_angle_delta(mbmi->bsize) &&
897 intra_mode_cfg->enable_angle_delta) {
898 if (sf->intra_sf.chroma_intra_pruning_with_hog &&
899 !intra_search_state.dir_mode_skip_mask_ready) {
900 static const float thresh[2][4] = {
901 { -1.2f, 0.0f, 0.0f, 1.2f }, // Interframe
902 { -1.2f, -1.2f, -0.6f, 0.4f }, // Intraframe
903 };
904 const int is_chroma = 1;
905 const int is_intra_frame = frame_is_intra_only(cm);
906 prune_intra_mode_with_hog(
907 x, bsize, cm->seq_params->sb_size,
908 thresh[is_intra_frame]
909 [sf->intra_sf.chroma_intra_pruning_with_hog - 1],
910 intra_search_state.directional_mode_skip_mask, is_chroma);
911 intra_search_state.dir_mode_skip_mask_ready = 1;
912 }
913 if (intra_search_state.directional_mode_skip_mask[mode]) {
914 continue;
915 }
916
917 // Search through angle delta
918 const int rate_overhead =
919 mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode];
920 if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
921 &this_rate, &tokenonly_rd_stats))
922 continue;
923 } else {
924 if (mode == UV_SMOOTH_PRED &&
925 should_prune_chroma_smooth_pred_based_on_source_variance(cpi, x,
926 bsize))
927 continue;
928
929 // Predict directly if we don't need to search for angle delta.
930 if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
931 continue;
932 }
933 }
934 const int mode_cost =
935 mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode];
936 this_rate = tokenonly_rd_stats.rate +
937 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost);
938 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
939
940 if (this_rd < best_rd) {
941 best_mbmi = *mbmi;
942 best_rd = this_rd;
943 *rate = this_rate;
944 *rate_tokenonly = tokenonly_rd_stats.rate;
945 *distortion = tokenonly_rd_stats.dist;
946 *skippable = tokenonly_rd_stats.skip_txfm;
947 }
948 }
949
950 // Search palette mode
951 const int try_palette =
952 cpi->oxcf.tool_cfg.enable_palette &&
953 av1_allow_palette(cpi->common.features.allow_screen_content_tools,
954 mbmi->bsize);
955 if (try_palette) {
956 uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
957 av1_rd_pick_palette_intra_sbuv(
958 cpi, x,
959 mode_costs
960 ->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][UV_DC_PRED],
961 best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly,
962 distortion, skippable);
963 }
964
965 *mbmi = best_mbmi;
966 // Make sure we actually chose a mode
967 assert(best_rd < INT64_MAX);
968 return best_rd;
969 }
970
971 // Searches palette mode for luma channel in inter frame.
av1_search_palette_mode(IntraModeSearchState * intra_search_state,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int ref_frame_cost,PICK_MODE_CONTEXT * ctx,RD_STATS * this_rd_cost,int64_t best_rd)972 int av1_search_palette_mode(IntraModeSearchState *intra_search_state,
973 const AV1_COMP *cpi, MACROBLOCK *x,
974 BLOCK_SIZE bsize, unsigned int ref_frame_cost,
975 PICK_MODE_CONTEXT *ctx, RD_STATS *this_rd_cost,
976 int64_t best_rd) {
977 const AV1_COMMON *const cm = &cpi->common;
978 MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
979 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
980 const int num_planes = av1_num_planes(cm);
981 MACROBLOCKD *const xd = &x->e_mbd;
982 int rate2 = 0;
983 int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd;
984 int skippable = 0;
985 uint8_t *const best_palette_color_map =
986 x->palette_buffer->best_palette_color_map;
987 uint8_t *const color_map = xd->plane[0].color_index_map;
988 MB_MODE_INFO best_mbmi_palette = *mbmi;
989 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
990 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
991 const ModeCosts *mode_costs = &x->mode_costs;
992 const int *const intra_mode_cost =
993 mode_costs->mbmode_cost[size_group_lookup[bsize]];
994 const int rows = block_size_high[bsize];
995 const int cols = block_size_wide[bsize];
996
997 mbmi->mode = DC_PRED;
998 mbmi->uv_mode = UV_DC_PRED;
999 mbmi->ref_frame[0] = INTRA_FRAME;
1000 mbmi->ref_frame[1] = NONE_FRAME;
1001 av1_zero(pmi->palette_size);
1002
1003 RD_STATS rd_stats_y;
1004 av1_invalid_rd_stats(&rd_stats_y);
1005 av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED],
1006 &best_mbmi_palette, best_palette_color_map,
1007 &best_rd_palette, &rd_stats_y.rate, NULL,
1008 &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL,
1009 ctx, best_blk_skip, best_tx_type_map);
1010 if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) {
1011 this_rd_cost->rdcost = INT64_MAX;
1012 return skippable;
1013 }
1014
1015 memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
1016 sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
1017 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
1018 memcpy(color_map, best_palette_color_map,
1019 rows * cols * sizeof(best_palette_color_map[0]));
1020
1021 skippable = rd_stats_y.skip_txfm;
1022 distortion2 = rd_stats_y.dist;
1023 rate2 = rd_stats_y.rate + ref_frame_cost;
1024 if (num_planes > 1) {
1025 if (intra_search_state->rate_uv_intra == INT_MAX) {
1026 // We have not found any good uv mode yet, so we need to search for it.
1027 TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
1028 av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra,
1029 &intra_search_state->rate_uv_tokenonly,
1030 &intra_search_state->dist_uvs,
1031 &intra_search_state->skip_uvs, bsize, uv_tx);
1032 intra_search_state->mode_uv = mbmi->uv_mode;
1033 intra_search_state->pmi_uv = *pmi;
1034 intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
1035 }
1036
1037 // We have found at least one good uv mode before, so copy and paste it
1038 // over.
1039 mbmi->uv_mode = intra_search_state->mode_uv;
1040 pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1];
1041 if (pmi->palette_size[1] > 0) {
1042 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
1043 intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE,
1044 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
1045 }
1046 mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta;
1047 skippable = skippable && intra_search_state->skip_uvs;
1048 distortion2 += intra_search_state->dist_uvs;
1049 rate2 += intra_search_state->rate_uv_intra;
1050 }
1051
1052 if (skippable) {
1053 rate2 -= rd_stats_y.rate;
1054 if (num_planes > 1) rate2 -= intra_search_state->rate_uv_tokenonly;
1055 rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1];
1056 } else {
1057 rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
1058 }
1059 this_rd = RDCOST(x->rdmult, rate2, distortion2);
1060 this_rd_cost->rate = rate2;
1061 this_rd_cost->dist = distortion2;
1062 this_rd_cost->rdcost = this_rd;
1063 return skippable;
1064 }
1065
av1_search_palette_mode_luma(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int ref_frame_cost,PICK_MODE_CONTEXT * ctx,RD_STATS * this_rd_cost,int64_t best_rd)1066 void av1_search_palette_mode_luma(const AV1_COMP *cpi, MACROBLOCK *x,
1067 BLOCK_SIZE bsize, unsigned int ref_frame_cost,
1068 PICK_MODE_CONTEXT *ctx,
1069 RD_STATS *this_rd_cost, int64_t best_rd) {
1070 MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
1071 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1072 MACROBLOCKD *const xd = &x->e_mbd;
1073 int64_t best_rd_palette = best_rd, this_rd;
1074 uint8_t *const best_palette_color_map =
1075 x->palette_buffer->best_palette_color_map;
1076 uint8_t *const color_map = xd->plane[0].color_index_map;
1077 MB_MODE_INFO best_mbmi_palette = *mbmi;
1078 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1079 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1080 const ModeCosts *mode_costs = &x->mode_costs;
1081 const int *const intra_mode_cost =
1082 mode_costs->mbmode_cost[size_group_lookup[bsize]];
1083 const int rows = block_size_high[bsize];
1084 const int cols = block_size_wide[bsize];
1085
1086 mbmi->mode = DC_PRED;
1087 mbmi->uv_mode = UV_DC_PRED;
1088 mbmi->ref_frame[0] = INTRA_FRAME;
1089 mbmi->ref_frame[1] = NONE_FRAME;
1090 av1_zero(pmi->palette_size);
1091
1092 RD_STATS rd_stats_y;
1093 av1_invalid_rd_stats(&rd_stats_y);
1094 av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED],
1095 &best_mbmi_palette, best_palette_color_map,
1096 &best_rd_palette, &rd_stats_y.rate, NULL,
1097 &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL,
1098 ctx, best_blk_skip, best_tx_type_map);
1099 if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) {
1100 this_rd_cost->rdcost = INT64_MAX;
1101 return;
1102 }
1103
1104 memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
1105 sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
1106 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
1107 memcpy(color_map, best_palette_color_map,
1108 rows * cols * sizeof(best_palette_color_map[0]));
1109
1110 rd_stats_y.rate += ref_frame_cost;
1111
1112 if (rd_stats_y.skip_txfm) {
1113 rd_stats_y.rate =
1114 ref_frame_cost +
1115 mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1];
1116 } else {
1117 rd_stats_y.rate +=
1118 mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
1119 }
1120 this_rd = RDCOST(x->rdmult, rd_stats_y.rate, rd_stats_y.dist);
1121 this_rd_cost->rate = rd_stats_y.rate;
1122 this_rd_cost->dist = rd_stats_y.dist;
1123 this_rd_cost->rdcost = this_rd;
1124 this_rd_cost->skip_txfm = rd_stats_y.skip_txfm;
1125 }
1126
1127 /*!\brief Get the intra prediction by searching through tx_type and tx_size.
1128 *
1129 * \ingroup intra_mode_search
1130 * \callergraph
1131 * Currently this function is only used in the intra frame code path for
1132 * winner-mode processing.
1133 *
1134 * \return Returns whether the current mode is an improvement over best_rd.
1135 */
intra_block_yrd(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,const int * bmode_costs,int64_t * best_rd,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,MB_MODE_INFO * best_mbmi,PICK_MODE_CONTEXT * ctx)1136 static AOM_INLINE int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
1137 BLOCK_SIZE bsize, const int *bmode_costs,
1138 int64_t *best_rd, int *rate,
1139 int *rate_tokenonly, int64_t *distortion,
1140 uint8_t *skippable,
1141 MB_MODE_INFO *best_mbmi,
1142 PICK_MODE_CONTEXT *ctx) {
1143 MACROBLOCKD *const xd = &x->e_mbd;
1144 MB_MODE_INFO *const mbmi = xd->mi[0];
1145 RD_STATS rd_stats;
1146 // In order to improve txfm search avoid rd based breakouts during winner
1147 // mode evaluation. Hence passing ref_best_rd as a maximum value
1148 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
1149 if (rd_stats.rate == INT_MAX) return 0;
1150 int this_rate_tokenonly = rd_stats.rate;
1151 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) {
1152 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
1153 // in the tokenonly rate, but for intra blocks, tx_size is always coded
1154 // (prediction granularity), so we account for it in the full rate,
1155 // not the tokenonly rate.
1156 this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size);
1157 }
1158 const int this_rate =
1159 rd_stats.rate +
1160 intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode], 0);
1161 const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist);
1162 if (this_rd < *best_rd) {
1163 *best_mbmi = *mbmi;
1164 *best_rd = this_rd;
1165 *rate = this_rate;
1166 *rate_tokenonly = this_rate_tokenonly;
1167 *distortion = rd_stats.dist;
1168 *skippable = rd_stats.skip_txfm;
1169 av1_copy_array(ctx->blk_skip, x->txfm_search_info.blk_skip,
1170 ctx->num_4x4_blk);
1171 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1172 return 1;
1173 }
1174 return 0;
1175 }
1176
1177 /*!\brief Search for the best filter_intra mode when coding inter frame.
1178 *
1179 * \ingroup intra_mode_search
1180 * \callergraph
1181 * This function loops through all filter_intra modes to find the best one.
1182 *
1183 * \remark Returns nothing, but updates the mbmi and rd_stats.
1184 */
handle_filter_intra_mode(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,const PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats_y,int mode_cost,int64_t best_rd,int64_t best_rd_so_far)1185 static INLINE void handle_filter_intra_mode(const AV1_COMP *cpi, MACROBLOCK *x,
1186 BLOCK_SIZE bsize,
1187 const PICK_MODE_CONTEXT *ctx,
1188 RD_STATS *rd_stats_y, int mode_cost,
1189 int64_t best_rd,
1190 int64_t best_rd_so_far) {
1191 MACROBLOCKD *const xd = &x->e_mbd;
1192 MB_MODE_INFO *const mbmi = xd->mi[0];
1193 assert(mbmi->mode == DC_PRED &&
1194 av1_filter_intra_allowed_bsize(&cpi->common, bsize));
1195
1196 RD_STATS rd_stats_y_fi;
1197 int filter_intra_selected_flag = 0;
1198 TX_SIZE best_tx_size = mbmi->tx_size;
1199 FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
1200 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1201 memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
1202 sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
1203 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1204 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1205 mbmi->filter_intra_mode_info.use_filter_intra = 1;
1206 for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED; fi_mode < FILTER_INTRA_MODES;
1207 ++fi_mode) {
1208 mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode;
1209 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd);
1210 if (rd_stats_y_fi.rate == INT_MAX) continue;
1211 const int this_rate_tmp =
1212 rd_stats_y_fi.rate +
1213 intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0);
1214 const int64_t this_rd_tmp =
1215 RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
1216
1217 if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > best_rd) {
1218 break;
1219 }
1220 if (this_rd_tmp < best_rd_so_far) {
1221 best_tx_size = mbmi->tx_size;
1222 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1223 memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
1224 sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
1225 best_fi_mode = fi_mode;
1226 *rd_stats_y = rd_stats_y_fi;
1227 filter_intra_selected_flag = 1;
1228 best_rd_so_far = this_rd_tmp;
1229 }
1230 }
1231
1232 mbmi->tx_size = best_tx_size;
1233 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
1234 memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
1235 sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
1236
1237 if (filter_intra_selected_flag) {
1238 mbmi->filter_intra_mode_info.use_filter_intra = 1;
1239 mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode;
1240 } else {
1241 mbmi->filter_intra_mode_info.use_filter_intra = 0;
1242 }
1243 }
1244
1245 // Evaluate a given luma intra-mode in inter frames.
av1_handle_intra_y_mode(IntraModeSearchState * intra_search_state,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int ref_frame_cost,const PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats_y,int64_t best_rd,int * mode_cost_y,int64_t * rd_y,int64_t * best_model_rd,int64_t top_intra_model_rd[])1246 int av1_handle_intra_y_mode(IntraModeSearchState *intra_search_state,
1247 const AV1_COMP *cpi, MACROBLOCK *x,
1248 BLOCK_SIZE bsize, unsigned int ref_frame_cost,
1249 const PICK_MODE_CONTEXT *ctx, RD_STATS *rd_stats_y,
1250 int64_t best_rd, int *mode_cost_y, int64_t *rd_y,
1251 int64_t *best_model_rd,
1252 int64_t top_intra_model_rd[]) {
1253 const AV1_COMMON *cm = &cpi->common;
1254 const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf;
1255 MACROBLOCKD *const xd = &x->e_mbd;
1256 MB_MODE_INFO *const mbmi = xd->mi[0];
1257 assert(mbmi->ref_frame[0] == INTRA_FRAME);
1258 const PREDICTION_MODE mode = mbmi->mode;
1259 const ModeCosts *mode_costs = &x->mode_costs;
1260 const int mode_cost =
1261 mode_costs->mbmode_cost[size_group_lookup[bsize]][mode] + ref_frame_cost;
1262 const int skip_ctx = av1_get_skip_txfm_context(xd);
1263
1264 int known_rate = mode_cost;
1265 const int intra_cost_penalty = av1_get_intra_cost_penalty(
1266 cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q,
1267 cm->seq_params->bit_depth);
1268
1269 if (mode != DC_PRED && mode != PAETH_PRED) known_rate += intra_cost_penalty;
1270 known_rate += AOMMIN(mode_costs->skip_txfm_cost[skip_ctx][0],
1271 mode_costs->skip_txfm_cost[skip_ctx][1]);
1272 const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0);
1273 if (known_rd > best_rd) {
1274 intra_search_state->skip_intra_modes = 1;
1275 return 0;
1276 }
1277
1278 const int is_directional_mode = av1_is_directional_mode(mode);
1279 if (is_directional_mode && av1_use_angle_delta(bsize) &&
1280 cpi->oxcf.intra_mode_cfg.enable_angle_delta) {
1281 if (intra_sf->intra_pruning_with_hog &&
1282 !intra_search_state->dir_mode_skip_mask_ready) {
1283 const float thresh[4] = { -1.2f, 0.0f, 0.0f, 1.2f };
1284 const int is_chroma = 0;
1285 prune_intra_mode_with_hog(x, bsize, cm->seq_params->sb_size,
1286 thresh[intra_sf->intra_pruning_with_hog - 1],
1287 intra_search_state->directional_mode_skip_mask,
1288 is_chroma);
1289 intra_search_state->dir_mode_skip_mask_ready = 1;
1290 }
1291 if (intra_search_state->directional_mode_skip_mask[mode]) return 0;
1292 }
1293 const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
1294 const int64_t this_model_rd =
1295 intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1);
1296
1297 const int model_rd_index_for_pruning =
1298 get_model_rd_index_for_pruning(x, intra_sf);
1299
1300 if (prune_intra_y_mode(this_model_rd, best_model_rd, top_intra_model_rd,
1301 intra_sf->top_intra_model_count_allowed,
1302 model_rd_index_for_pruning))
1303 return 0;
1304 av1_init_rd_stats(rd_stats_y);
1305 av1_pick_uniform_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, best_rd);
1306
1307 // Pick filter intra modes.
1308 if (mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) {
1309 int try_filter_intra = 1;
1310 int64_t best_rd_so_far = INT64_MAX;
1311 if (rd_stats_y->rate != INT_MAX) {
1312 // best_rd_so_far is the rdcost of DC_PRED without using filter_intra.
1313 // Later, in filter intra search, best_rd_so_far is used for comparison.
1314 mbmi->filter_intra_mode_info.use_filter_intra = 0;
1315 const int tmp_rate =
1316 rd_stats_y->rate +
1317 intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0);
1318 best_rd_so_far = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist);
1319 try_filter_intra = (best_rd_so_far / 2) <= best_rd;
1320 } else if (intra_sf->skip_filter_intra_in_inter_frames >= 1) {
1321 // As rd cost of luma intra dc mode is more than best_rd (i.e.,
1322 // rd_stats_y->rate = INT_MAX), skip the evaluation of filter intra modes.
1323 try_filter_intra = 0;
1324 }
1325
1326 if (try_filter_intra) {
1327 handle_filter_intra_mode(cpi, x, bsize, ctx, rd_stats_y, mode_cost,
1328 best_rd, best_rd_so_far);
1329 }
1330 }
1331
1332 if (rd_stats_y->rate == INT_MAX) return 0;
1333
1334 *mode_cost_y = intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0);
1335 const int rate_y = rd_stats_y->skip_txfm
1336 ? mode_costs->skip_txfm_cost[skip_ctx][1]
1337 : rd_stats_y->rate;
1338 *rd_y = RDCOST(x->rdmult, rate_y + *mode_cost_y, rd_stats_y->dist);
1339 if (best_rd < (INT64_MAX / 2) && *rd_y > (best_rd + (best_rd >> 2))) {
1340 intra_search_state->skip_intra_modes = 1;
1341 return 0;
1342 }
1343
1344 return 1;
1345 }
1346
av1_search_intra_uv_modes_in_interframe(IntraModeSearchState * intra_search_state,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,const RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,int64_t best_rd)1347 int av1_search_intra_uv_modes_in_interframe(
1348 IntraModeSearchState *intra_search_state, const AV1_COMP *cpi,
1349 MACROBLOCK *x, BLOCK_SIZE bsize, RD_STATS *rd_stats,
1350 const RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, int64_t best_rd) {
1351 const AV1_COMMON *cm = &cpi->common;
1352 MACROBLOCKD *const xd = &x->e_mbd;
1353 MB_MODE_INFO *const mbmi = xd->mi[0];
1354 assert(mbmi->ref_frame[0] == INTRA_FRAME);
1355
1356 // TODO(chiyotsai@google.com): Consolidate the chroma search code here with
1357 // the one in av1_search_palette_mode.
1358 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1359 const int try_palette =
1360 cpi->oxcf.tool_cfg.enable_palette &&
1361 av1_allow_palette(cm->features.allow_screen_content_tools, mbmi->bsize);
1362
1363 assert(intra_search_state->rate_uv_intra == INT_MAX);
1364 if (intra_search_state->rate_uv_intra == INT_MAX) {
1365 // If no good uv-predictor had been found, search for it.
1366 const TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
1367 av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra,
1368 &intra_search_state->rate_uv_tokenonly,
1369 &intra_search_state->dist_uvs,
1370 &intra_search_state->skip_uvs, bsize, uv_tx);
1371 intra_search_state->mode_uv = mbmi->uv_mode;
1372 if (try_palette) intra_search_state->pmi_uv = *pmi;
1373 intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
1374
1375 const int uv_rate = intra_search_state->rate_uv_tokenonly;
1376 const int64_t uv_dist = intra_search_state->dist_uvs;
1377 const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist);
1378 if (uv_rd > best_rd) {
1379 // If there is no good intra uv-mode available, we can skip all intra
1380 // modes.
1381 intra_search_state->skip_intra_modes = 1;
1382 return 0;
1383 }
1384 }
1385
1386 // If we are here, then the encoder has found at least one good intra uv
1387 // predictor, so we can directly copy its statistics over.
1388 // TODO(any): the stats here is not right if the best uv mode is CFL but the
1389 // best y mode is palette.
1390 rd_stats_uv->rate = intra_search_state->rate_uv_tokenonly;
1391 rd_stats_uv->dist = intra_search_state->dist_uvs;
1392 rd_stats_uv->skip_txfm = intra_search_state->skip_uvs;
1393 rd_stats->skip_txfm = rd_stats_y->skip_txfm && rd_stats_uv->skip_txfm;
1394 mbmi->uv_mode = intra_search_state->mode_uv;
1395 if (try_palette) {
1396 pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1];
1397 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
1398 intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE,
1399 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
1400 }
1401 mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta;
1402
1403 return 1;
1404 }
1405
1406 // Checks if odd delta angles can be pruned based on rdcosts of even delta
1407 // angles of the corresponding directional mode.
prune_luma_odd_delta_angles_using_rd_cost(const MB_MODE_INFO * const mbmi,const int64_t * const intra_modes_rd_cost,int64_t best_rd,int prune_luma_odd_delta_angles_in_intra)1408 static AOM_INLINE int prune_luma_odd_delta_angles_using_rd_cost(
1409 const MB_MODE_INFO *const mbmi, const int64_t *const intra_modes_rd_cost,
1410 int64_t best_rd, int prune_luma_odd_delta_angles_in_intra) {
1411 const int luma_delta_angle = mbmi->angle_delta[PLANE_TYPE_Y];
1412 if (!prune_luma_odd_delta_angles_in_intra ||
1413 !av1_is_directional_mode(mbmi->mode) || !(abs(luma_delta_angle) & 1) ||
1414 best_rd == INT64_MAX)
1415 return 0;
1416
1417 const int64_t rd_thresh = best_rd + (best_rd >> 3);
1418
1419 // Neighbour rdcosts are considered for pruning of odd delta angles as
1420 // mentioned below:
1421 // Delta angle Delta angle rdcost
1422 // to be pruned to be considered
1423 // -3 -2
1424 // -1 -2, 0
1425 // 1 0, 2
1426 // 3 2
1427 return intra_modes_rd_cost[luma_delta_angle + MAX_ANGLE_DELTA] > rd_thresh &&
1428 intra_modes_rd_cost[luma_delta_angle + MAX_ANGLE_DELTA + 2] >
1429 rd_thresh;
1430 }
1431
1432 // Finds the best non-intrabc mode on an intra frame.
av1_rd_pick_intra_sby_mode(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,BLOCK_SIZE bsize,int64_t best_rd,PICK_MODE_CONTEXT * ctx)1433 int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
1434 int *rate, int *rate_tokenonly,
1435 int64_t *distortion, uint8_t *skippable,
1436 BLOCK_SIZE bsize, int64_t best_rd,
1437 PICK_MODE_CONTEXT *ctx) {
1438 MACROBLOCKD *const xd = &x->e_mbd;
1439 MB_MODE_INFO *const mbmi = xd->mi[0];
1440 assert(!is_inter_block(mbmi));
1441 int64_t best_model_rd = INT64_MAX;
1442 int is_directional_mode;
1443 uint8_t directional_mode_skip_mask[INTRA_MODES] = { 0 };
1444 // Flag to check rd of any intra mode is better than best_rd passed to this
1445 // function
1446 int beat_best_rd = 0;
1447 const int *bmode_costs;
1448 const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
1449 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1450 const int try_palette =
1451 cpi->oxcf.tool_cfg.enable_palette &&
1452 av1_allow_palette(cpi->common.features.allow_screen_content_tools,
1453 mbmi->bsize);
1454 uint8_t *best_palette_color_map =
1455 try_palette ? x->palette_buffer->best_palette_color_map : NULL;
1456 const MB_MODE_INFO *above_mi = xd->above_mbmi;
1457 const MB_MODE_INFO *left_mi = xd->left_mbmi;
1458 const PREDICTION_MODE A = av1_above_block_mode(above_mi);
1459 const PREDICTION_MODE L = av1_left_block_mode(left_mi);
1460 const int above_ctx = intra_mode_context[A];
1461 const int left_ctx = intra_mode_context[L];
1462 bmode_costs = x->mode_costs.y_mode_costs[above_ctx][left_ctx];
1463
1464 mbmi->angle_delta[PLANE_TYPE_Y] = 0;
1465 const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf;
1466 if (intra_sf->intra_pruning_with_hog) {
1467 // Less aggressive thresholds are used here than those used in inter frame
1468 // encoding in av1_handle_intra_y_mode() because we want key frames/intra
1469 // frames to have higher quality.
1470 const float thresh[4] = { -1.2f, -1.2f, -0.6f, 0.4f };
1471 const int is_chroma = 0;
1472 prune_intra_mode_with_hog(x, bsize, cpi->common.seq_params->sb_size,
1473 thresh[intra_sf->intra_pruning_with_hog - 1],
1474 directional_mode_skip_mask, is_chroma);
1475 }
1476 mbmi->filter_intra_mode_info.use_filter_intra = 0;
1477 pmi->palette_size[0] = 0;
1478
1479 // Set params for mode evaluation
1480 set_mode_eval_params(cpi, x, MODE_EVAL);
1481
1482 MB_MODE_INFO best_mbmi = *mbmi;
1483 const int max_winner_mode_count =
1484 winner_mode_count_allowed[cpi->sf.winner_mode_sf.multi_winner_mode_type];
1485 zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
1486 x->winner_mode_count = 0;
1487
1488 // Searches the intra-modes except for intrabc, palette, and filter_intra.
1489 int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
1490 for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
1491 top_intra_model_rd[i] = INT64_MAX;
1492 }
1493
1494 // Initialize the rdcost corresponding to all the directional and
1495 // non-directional intra modes.
1496 // 1. For directional modes, it stores the rdcost values for delta angles -4,
1497 // -3, ..., 3, 4.
1498 // 2. The rdcost value for luma_delta_angle is stored at index
1499 // luma_delta_angle + MAX_ANGLE_DELTA + 1.
1500 // 3. The rdcost values for fictitious/nonexistent luma_delta_angle -4 and 4
1501 // (array indices 0 and 8) are always set to INT64_MAX (the initial value).
1502 int64_t intra_modes_rd_cost[INTRA_MODE_END]
1503 [SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY];
1504 for (int i = 0; i < INTRA_MODE_END; i++) {
1505 for (int j = 0; j < SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY; j++) {
1506 intra_modes_rd_cost[i][j] = INT64_MAX;
1507 }
1508 }
1509
1510 for (int mode_idx = INTRA_MODE_START; mode_idx < LUMA_MODE_COUNT;
1511 ++mode_idx) {
1512 set_y_mode_and_delta_angle(mode_idx, mbmi,
1513 intra_sf->prune_luma_odd_delta_angles_in_intra);
1514 RD_STATS this_rd_stats;
1515 int this_rate, this_rate_tokenonly, s;
1516 int is_diagonal_mode;
1517 int64_t this_distortion, this_rd;
1518 const int luma_delta_angle = mbmi->angle_delta[PLANE_TYPE_Y];
1519
1520 is_diagonal_mode = av1_is_diagonal_mode(mbmi->mode);
1521 if (is_diagonal_mode && !intra_mode_cfg->enable_diagonal_intra) continue;
1522 if (av1_is_directional_mode(mbmi->mode) &&
1523 !intra_mode_cfg->enable_directional_intra)
1524 continue;
1525
1526 // The smooth prediction mode appears to be more frequently picked
1527 // than horizontal / vertical smooth prediction modes. Hence treat
1528 // them differently in speed features.
1529 if ((!intra_mode_cfg->enable_smooth_intra ||
1530 intra_sf->disable_smooth_intra) &&
1531 (mbmi->mode == SMOOTH_H_PRED || mbmi->mode == SMOOTH_V_PRED))
1532 continue;
1533 if (!intra_mode_cfg->enable_smooth_intra && mbmi->mode == SMOOTH_PRED)
1534 continue;
1535
1536 // The functionality of filter intra modes and smooth prediction
1537 // overlap. Hence smooth prediction is pruned only if all the
1538 // filter intra modes are enabled.
1539 if (intra_sf->disable_smooth_intra &&
1540 intra_sf->prune_filter_intra_level == 0 && mbmi->mode == SMOOTH_PRED)
1541 continue;
1542 if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
1543 continue;
1544
1545 // Skip the evaluation of modes that do not match with the winner mode in
1546 // x->mb_mode_cache.
1547 if (x->use_mb_mode_cache && mbmi->mode != x->mb_mode_cache->mode) continue;
1548
1549 is_directional_mode = av1_is_directional_mode(mbmi->mode);
1550 if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
1551 if (is_directional_mode &&
1552 !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
1553 luma_delta_angle != 0)
1554 continue;
1555
1556 // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
1557 if (!(intra_sf->intra_y_mode_mask[max_txsize_lookup[bsize]] &
1558 (1 << mbmi->mode)))
1559 continue;
1560
1561 if (prune_luma_odd_delta_angles_using_rd_cost(
1562 mbmi, intra_modes_rd_cost[mbmi->mode], best_rd,
1563 intra_sf->prune_luma_odd_delta_angles_in_intra))
1564 continue;
1565
1566 const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
1567 const int64_t this_model_rd =
1568 intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1);
1569
1570 const int model_rd_index_for_pruning =
1571 get_model_rd_index_for_pruning(x, intra_sf);
1572
1573 if (prune_intra_y_mode(this_model_rd, &best_model_rd, top_intra_model_rd,
1574 intra_sf->top_intra_model_count_allowed,
1575 model_rd_index_for_pruning))
1576 continue;
1577
1578 // Builds the actual prediction. The prediction from
1579 // model_intra_yrd_and_prune was just an estimation that did not take into
1580 // account the effect of txfm pipeline, so we need to redo it for real
1581 // here.
1582 av1_pick_uniform_tx_size_type_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
1583 this_rate_tokenonly = this_rd_stats.rate;
1584 this_distortion = this_rd_stats.dist;
1585 s = this_rd_stats.skip_txfm;
1586
1587 if (this_rate_tokenonly == INT_MAX) continue;
1588
1589 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) {
1590 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the
1591 // tx_size in the tokenonly rate, but for intra blocks, tx_size is always
1592 // coded (prediction granularity), so we account for it in the full rate,
1593 // not the tokenonly rate.
1594 this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size);
1595 }
1596 this_rate =
1597 this_rd_stats.rate +
1598 intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode], 0);
1599 this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
1600
1601 // Visual quality adjustment based on recon vs source variance.
1602 if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) {
1603 this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize));
1604 }
1605
1606 intra_modes_rd_cost[mbmi->mode][luma_delta_angle + MAX_ANGLE_DELTA + 1] =
1607 this_rd;
1608
1609 // Collect mode stats for multiwinner mode processing
1610 const int txfm_search_done = 1;
1611 store_winner_mode_stats(
1612 &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd,
1613 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
1614 if (this_rd < best_rd) {
1615 best_mbmi = *mbmi;
1616 best_rd = this_rd;
1617 // Setting beat_best_rd flag because current mode rd is better than
1618 // best_rd passed to this function
1619 beat_best_rd = 1;
1620 *rate = this_rate;
1621 *rate_tokenonly = this_rate_tokenonly;
1622 *distortion = this_distortion;
1623 *skippable = s;
1624 memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip,
1625 sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
1626 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1627 }
1628 }
1629
1630 // Searches palette
1631 if (try_palette) {
1632 av1_rd_pick_palette_intra_sby(
1633 cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi, best_palette_color_map,
1634 &best_rd, rate, rate_tokenonly, distortion, skippable, &beat_best_rd,
1635 ctx, ctx->blk_skip, ctx->tx_type_map);
1636 }
1637
1638 // Searches filter_intra
1639 if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) {
1640 if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
1641 skippable, bsize, bmode_costs[DC_PRED],
1642 best_mbmi.mode, &best_rd, &best_model_rd,
1643 ctx)) {
1644 best_mbmi = *mbmi;
1645 }
1646 }
1647
1648 // No mode is identified with less rd value than best_rd passed to this
1649 // function. In such cases winner mode processing is not necessary and return
1650 // best_rd as INT64_MAX to indicate best mode is not identified
1651 if (!beat_best_rd) return INT64_MAX;
1652
1653 // In multi-winner mode processing, perform tx search for few best modes
1654 // identified during mode evaluation. Winner mode processing uses best tx
1655 // configuration for tx search.
1656 if (cpi->sf.winner_mode_sf.multi_winner_mode_type) {
1657 int best_mode_idx = 0;
1658 int block_width, block_height;
1659 uint8_t *color_map_dst = xd->plane[PLANE_TYPE_Y].color_index_map;
1660 av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width,
1661 &block_height, NULL, NULL);
1662
1663 for (int mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++) {
1664 *mbmi = x->winner_mode_stats[mode_idx].mbmi;
1665 if (is_winner_mode_processing_enabled(cpi, x, mbmi, 0)) {
1666 // Restore color_map of palette mode before winner mode processing
1667 if (mbmi->palette_mode_info.palette_size[0] > 0) {
1668 uint8_t *color_map_src =
1669 x->winner_mode_stats[mode_idx].color_index_map;
1670 memcpy(color_map_dst, color_map_src,
1671 block_width * block_height * sizeof(*color_map_src));
1672 }
1673 // Set params for winner mode evaluation
1674 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
1675
1676 // Winner mode processing
1677 // If previous searches use only the default tx type/no R-D optimization
1678 // of quantized coeffs, do an extra search for the best tx type/better
1679 // R-D optimization of quantized coeffs
1680 if (intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate,
1681 rate_tokenonly, distortion, skippable, &best_mbmi,
1682 ctx))
1683 best_mode_idx = mode_idx;
1684 }
1685 }
1686 // Copy color_map of palette mode for final winner mode
1687 if (best_mbmi.palette_mode_info.palette_size[0] > 0) {
1688 uint8_t *color_map_src =
1689 x->winner_mode_stats[best_mode_idx].color_index_map;
1690 memcpy(color_map_dst, color_map_src,
1691 block_width * block_height * sizeof(*color_map_src));
1692 }
1693 } else {
1694 // If previous searches use only the default tx type/no R-D optimization of
1695 // quantized coeffs, do an extra search for the best tx type/better R-D
1696 // optimization of quantized coeffs
1697 if (is_winner_mode_processing_enabled(cpi, x, mbmi, 0)) {
1698 // Set params for winner mode evaluation
1699 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
1700 *mbmi = best_mbmi;
1701 intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate,
1702 rate_tokenonly, distortion, skippable, &best_mbmi, ctx);
1703 }
1704 }
1705 *mbmi = best_mbmi;
1706 av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
1707 return best_rd;
1708 }
1709