• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2020, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "av1/common/av1_common_int.h"
13 #include "av1/common/cfl.h"
14 #include "av1/common/reconintra.h"
15 
16 #include "av1/encoder/intra_mode_search.h"
17 #include "av1/encoder/intra_mode_search_utils.h"
18 #include "av1/encoder/palette.h"
19 #include "av1/encoder/speed_features.h"
20 #include "av1/encoder/tx_search.h"
21 
22 // Even though there are 7 delta angles, this macro is set to 9 to facilitate
23 // the rd threshold check to prune -3 and 3 delta angles.
24 #define SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY (2 * MAX_ANGLE_DELTA + 3)
25 
26 // The order for evaluating delta angles while processing the luma directional
27 // intra modes. Currently, this order of evaluation is applicable only when
28 // speed feature prune_luma_odd_delta_angles_in_intra is enabled. In this case,
29 // even angles are evaluated first in order to facilitate the pruning of odd
30 // delta angles based on the rd costs of the neighboring delta angles.
31 static const int8_t luma_delta_angles_order[2 * MAX_ANGLE_DELTA] = {
32   -2, 2, -3, -1, 1, 3,
33 };
34 
35 /*!\cond */
36 static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
37   DC_PRED,       H_PRED,        V_PRED,    SMOOTH_PRED, PAETH_PRED,
38   SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED,   D157_PRED,
39   D67_PRED,      D113_PRED,     D45_PRED,
40 };
41 
42 static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
43   UV_DC_PRED,     UV_CFL_PRED,   UV_H_PRED,        UV_V_PRED,
44   UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
45   UV_D135_PRED,   UV_D203_PRED,  UV_D157_PRED,     UV_D67_PRED,
46   UV_D113_PRED,   UV_D45_PRED,
47 };
48 
49 // The bitmask corresponds to the filter intra modes as defined in enums.h
50 // FILTER_INTRA_MODE enumeration type. Setting a bit to 0 in the mask means to
51 // disable the evaluation of corresponding filter intra mode. The table
52 // av1_derived_filter_intra_mode_used_flag is used when speed feature
53 // prune_filter_intra_level is 1. The evaluated filter intra modes are union
54 // of the following:
55 // 1) FILTER_DC_PRED
56 // 2) mode that corresponds to best mode so far of DC_PRED, V_PRED, H_PRED,
57 // D157_PRED and PAETH_PRED. (Eg: FILTER_V_PRED if best mode so far is V_PRED).
58 static const uint8_t av1_derived_filter_intra_mode_used_flag[INTRA_MODES] = {
59   0x01,  // DC_PRED:           0000 0001
60   0x03,  // V_PRED:            0000 0011
61   0x05,  // H_PRED:            0000 0101
62   0x01,  // D45_PRED:          0000 0001
63   0x01,  // D135_PRED:         0000 0001
64   0x01,  // D113_PRED:         0000 0001
65   0x09,  // D157_PRED:         0000 1001
66   0x01,  // D203_PRED:         0000 0001
67   0x01,  // D67_PRED:          0000 0001
68   0x01,  // SMOOTH_PRED:       0000 0001
69   0x01,  // SMOOTH_V_PRED:     0000 0001
70   0x01,  // SMOOTH_H_PRED:     0000 0001
71   0x11   // PAETH_PRED:        0001 0001
72 };
73 
74 // The bitmask corresponds to the chroma intra modes as defined in enums.h
75 // UV_PREDICTION_MODE enumeration type. Setting a bit to 0 in the mask means to
76 // disable the evaluation of corresponding chroma intra mode. The table
77 // av1_derived_chroma_intra_mode_used_flag is used when speed feature
78 // prune_chroma_modes_using_luma_winner is enabled. The evaluated chroma
79 // intra modes are union of the following:
80 // 1) UV_DC_PRED
81 // 2) UV_SMOOTH_PRED
82 // 3) UV_CFL_PRED
83 // 4) mode that corresponds to luma intra mode winner (Eg : UV_V_PRED if luma
84 // intra mode winner is V_PRED).
85 static const uint16_t av1_derived_chroma_intra_mode_used_flag[INTRA_MODES] = {
86   0x2201,  // DC_PRED:           0010 0010 0000 0001
87   0x2203,  // V_PRED:            0010 0010 0000 0011
88   0x2205,  // H_PRED:            0010 0010 0000 0101
89   0x2209,  // D45_PRED:          0010 0010 0000 1001
90   0x2211,  // D135_PRED:         0010 0010 0001 0001
91   0x2221,  // D113_PRED:         0010 0010 0010 0001
92   0x2241,  // D157_PRED:         0010 0010 0100 0001
93   0x2281,  // D203_PRED:         0010 0010 1000 0001
94   0x2301,  // D67_PRED:          0010 0011 0000 0001
95   0x2201,  // SMOOTH_PRED:       0010 0010 0000 0001
96   0x2601,  // SMOOTH_V_PRED:     0010 0110 0000 0001
97   0x2a01,  // SMOOTH_H_PRED:     0010 1010 0000 0001
98   0x3201   // PAETH_PRED:        0011 0010 0000 0001
99 };
100 
101 DECLARE_ALIGNED(16, static const uint8_t, all_zeros[MAX_SB_SIZE]) = { 0 };
102 DECLARE_ALIGNED(16, static const uint16_t,
103                 highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
104 
av1_calc_normalized_variance(aom_variance_fn_t vf,const uint8_t * const buf,const int stride,const int is_hbd)105 int av1_calc_normalized_variance(aom_variance_fn_t vf, const uint8_t *const buf,
106                                  const int stride, const int is_hbd) {
107   unsigned int sse;
108 
109   if (is_hbd)
110     return vf(buf, stride, CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse);
111   else
112     return vf(buf, stride, all_zeros, 0, &sse);
113 }
114 
115 // Computes average of log(1 + variance) across 4x4 sub-blocks for source and
116 // reconstructed blocks.
compute_avg_log_variance(const AV1_COMP * const cpi,MACROBLOCK * x,const BLOCK_SIZE bs,double * avg_log_src_variance,double * avg_log_recon_variance)117 static void compute_avg_log_variance(const AV1_COMP *const cpi, MACROBLOCK *x,
118                                      const BLOCK_SIZE bs,
119                                      double *avg_log_src_variance,
120                                      double *avg_log_recon_variance) {
121   const MACROBLOCKD *const xd = &x->e_mbd;
122   const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
123   const int mi_row_in_sb = x->e_mbd.mi_row & (mi_size_high[sb_size] - 1);
124   const int mi_col_in_sb = x->e_mbd.mi_col & (mi_size_wide[sb_size] - 1);
125   const int right_overflow =
126       (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
127   const int bottom_overflow =
128       (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
129   const int bw = (MI_SIZE * mi_size_wide[bs] - right_overflow);
130   const int bh = (MI_SIZE * mi_size_high[bs] - bottom_overflow);
131   const int is_hbd = is_cur_buf_hbd(xd);
132 
133   aom_variance_fn_t vf = cpi->ppi->fn_ptr[BLOCK_4X4].vf;
134   for (int i = 0; i < bh; i += MI_SIZE) {
135     const int r = mi_row_in_sb + (i >> MI_SIZE_LOG2);
136     for (int j = 0; j < bw; j += MI_SIZE) {
137       const int c = mi_col_in_sb + (j >> MI_SIZE_LOG2);
138       const int mi_offset = r * mi_size_wide[sb_size] + c;
139       Block4x4VarInfo *block_4x4_var_info =
140           &x->src_var_info_of_4x4_sub_blocks[mi_offset];
141       int src_var = block_4x4_var_info->var;
142       double log_src_var = block_4x4_var_info->log_var;
143       // Compute average of log(1 + variance) for the source block from 4x4
144       // sub-block variance values. Calculate and store 4x4 sub-block variance
145       // and log(1 + variance), if the values present in
146       // src_var_of_4x4_sub_blocks are invalid. Reuse the same if it is readily
147       // available with valid values.
148       if (src_var < 0) {
149         src_var = av1_calc_normalized_variance(
150             vf, x->plane[0].src.buf + i * x->plane[0].src.stride + j,
151             x->plane[0].src.stride, is_hbd);
152         block_4x4_var_info->var = src_var;
153         log_src_var = log1p(src_var / 16.0);
154         block_4x4_var_info->log_var = log_src_var;
155       } else {
156         // When source variance is already calculated and available for
157         // retrieval, check if log(1 + variance) is also available. If it is
158         // available, then retrieve from buffer. Else, calculate the same and
159         // store to the buffer.
160         if (log_src_var < 0) {
161           log_src_var = log1p(src_var / 16.0);
162           block_4x4_var_info->log_var = log_src_var;
163         }
164       }
165       *avg_log_src_variance += log_src_var;
166 
167       const int recon_var = av1_calc_normalized_variance(
168           vf, xd->plane[0].dst.buf + i * xd->plane[0].dst.stride + j,
169           xd->plane[0].dst.stride, is_hbd);
170       *avg_log_recon_variance += log1p(recon_var / 16.0);
171     }
172   }
173 
174   const int blocks = (bw * bh) / 16;
175   *avg_log_src_variance /= (double)blocks;
176   *avg_log_recon_variance /= (double)blocks;
177 }
178 
179 // Returns a factor to be applied to the RD value based on how well the
180 // reconstructed block variance matches the source variance.
intra_rd_variance_factor(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bs)181 static double intra_rd_variance_factor(const AV1_COMP *cpi, MACROBLOCK *x,
182                                        BLOCK_SIZE bs) {
183   double threshold = INTRA_RD_VAR_THRESH(cpi->oxcf.speed);
184   // For non-positive threshold values, the comparison of source and
185   // reconstructed variances with threshold evaluates to false
186   // (src_var < threshold/rec_var < threshold) as these metrics are greater than
187   // than 0. Hence further calculations are skipped.
188   if (threshold <= 0) return 1.0;
189 
190   double variance_rd_factor = 1.0;
191   double avg_log_src_variance = 0.0;
192   double avg_log_recon_variance = 0.0;
193   double var_diff = 0.0;
194 
195   compute_avg_log_variance(cpi, x, bs, &avg_log_src_variance,
196                            &avg_log_recon_variance);
197 
198   // Dont allow 0 to prevent / 0 below.
199   avg_log_src_variance += 0.000001;
200   avg_log_recon_variance += 0.000001;
201 
202   if (avg_log_src_variance >= avg_log_recon_variance) {
203     var_diff = (avg_log_src_variance - avg_log_recon_variance);
204     if ((var_diff > 0.5) && (avg_log_recon_variance < threshold)) {
205       variance_rd_factor = 1.0 + ((var_diff * 2) / avg_log_src_variance);
206     }
207   } else {
208     var_diff = (avg_log_recon_variance - avg_log_src_variance);
209     if ((var_diff > 0.5) && (avg_log_src_variance < threshold)) {
210       variance_rd_factor = 1.0 + (var_diff / (2 * avg_log_src_variance));
211     }
212   }
213 
214   // Limit adjustment;
215   variance_rd_factor = AOMMIN(3.0, variance_rd_factor);
216 
217   return variance_rd_factor;
218 }
219 /*!\endcond */
220 
221 /*!\brief Search for the best filter_intra mode when coding intra frame.
222  *
223  * \ingroup intra_mode_search
224  * \callergraph
225  * This function loops through all filter_intra modes to find the best one.
226  *
227  * \return Returns 1 if a new filter_intra mode is selected; 0 otherwise.
228  */
rd_pick_filter_intra_sby(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,BLOCK_SIZE bsize,int mode_cost,PREDICTION_MODE best_mode_so_far,int64_t * best_rd,int64_t * best_model_rd,PICK_MODE_CONTEXT * ctx)229 static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
230                                     int *rate, int *rate_tokenonly,
231                                     int64_t *distortion, uint8_t *skippable,
232                                     BLOCK_SIZE bsize, int mode_cost,
233                                     PREDICTION_MODE best_mode_so_far,
234                                     int64_t *best_rd, int64_t *best_model_rd,
235                                     PICK_MODE_CONTEXT *ctx) {
236   // Skip the evaluation of filter intra modes.
237   if (cpi->sf.intra_sf.prune_filter_intra_level == 2) return 0;
238 
239   MACROBLOCKD *const xd = &x->e_mbd;
240   MB_MODE_INFO *mbmi = xd->mi[0];
241   int filter_intra_selected_flag = 0;
242   FILTER_INTRA_MODE mode;
243   TX_SIZE best_tx_size = TX_8X8;
244   FILTER_INTRA_MODE_INFO filter_intra_mode_info;
245   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
246   av1_zero(filter_intra_mode_info);
247   mbmi->filter_intra_mode_info.use_filter_intra = 1;
248   mbmi->mode = DC_PRED;
249   mbmi->palette_mode_info.palette_size[0] = 0;
250 
251   // Skip the evaluation of filter-intra if cached MB_MODE_INFO does not have
252   // filter-intra as winner.
253   if (x->use_mb_mode_cache &&
254       !x->mb_mode_cache->filter_intra_mode_info.use_filter_intra)
255     return 0;
256 
257   for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
258     int64_t this_rd;
259     RD_STATS tokenonly_rd_stats;
260     mbmi->filter_intra_mode_info.filter_intra_mode = mode;
261 
262     if ((cpi->sf.intra_sf.prune_filter_intra_level == 1) &&
263         !(av1_derived_filter_intra_mode_used_flag[best_mode_so_far] &
264           (1 << mode)))
265       continue;
266 
267     // Skip the evaluation of modes that do not match with the winner mode in
268     // x->mb_mode_cache.
269     if (x->use_mb_mode_cache &&
270         mode != x->mb_mode_cache->filter_intra_mode_info.filter_intra_mode)
271       continue;
272 
273     if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd)) {
274       continue;
275     }
276     av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize,
277                                       *best_rd);
278     if (tokenonly_rd_stats.rate == INT_MAX) continue;
279     const int this_rate =
280         tokenonly_rd_stats.rate +
281         intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0);
282     this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
283 
284     // Visual quality adjustment based on recon vs source variance.
285     if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) {
286       this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize));
287     }
288 
289     // Collect mode stats for multiwinner mode processing
290     const int txfm_search_done = 1;
291     store_winner_mode_stats(
292         &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd,
293         cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
294     if (this_rd < *best_rd) {
295       *best_rd = this_rd;
296       best_tx_size = mbmi->tx_size;
297       filter_intra_mode_info = mbmi->filter_intra_mode_info;
298       av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
299       memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip,
300              sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
301       *rate = this_rate;
302       *rate_tokenonly = tokenonly_rd_stats.rate;
303       *distortion = tokenonly_rd_stats.dist;
304       *skippable = tokenonly_rd_stats.skip_txfm;
305       filter_intra_selected_flag = 1;
306     }
307   }
308 
309   if (filter_intra_selected_flag) {
310     mbmi->mode = DC_PRED;
311     mbmi->tx_size = best_tx_size;
312     mbmi->filter_intra_mode_info = filter_intra_mode_info;
313     av1_copy_array(ctx->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
314     return 1;
315   } else {
316     return 0;
317   }
318 }
319 
av1_count_colors(const uint8_t * src,int stride,int rows,int cols,int * val_count,int * num_colors)320 void av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
321                       int *val_count, int *num_colors) {
322   const int max_pix_val = 1 << 8;
323   memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
324   for (int r = 0; r < rows; ++r) {
325     for (int c = 0; c < cols; ++c) {
326       const int this_val = src[r * stride + c];
327       assert(this_val < max_pix_val);
328       ++val_count[this_val];
329     }
330   }
331   int n = 0;
332   for (int i = 0; i < max_pix_val; ++i) {
333     if (val_count[i]) ++n;
334   }
335   *num_colors = n;
336 }
337 
av1_count_colors_highbd(const uint8_t * src8,int stride,int rows,int cols,int bit_depth,int * val_count,int * bin_val_count,int * num_color_bins,int * num_colors)338 void av1_count_colors_highbd(const uint8_t *src8, int stride, int rows,
339                              int cols, int bit_depth, int *val_count,
340                              int *bin_val_count, int *num_color_bins,
341                              int *num_colors) {
342   assert(bit_depth <= 12);
343   const int max_bin_val = 1 << 8;
344   const int max_pix_val = 1 << bit_depth;
345   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
346   memset(bin_val_count, 0, max_bin_val * sizeof(val_count[0]));
347   if (val_count != NULL)
348     memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
349   for (int r = 0; r < rows; ++r) {
350     for (int c = 0; c < cols; ++c) {
351       /*
352        * Down-convert the pixels to 8-bit domain before counting.
353        * This provides consistency of behavior for palette search
354        * between lbd and hbd encodes. This down-converted pixels
355        * are only used for calculating the threshold (n).
356        */
357       const int this_val = ((src[r * stride + c]) >> (bit_depth - 8));
358       assert(this_val < max_bin_val);
359       if (this_val >= max_bin_val) continue;
360       ++bin_val_count[this_val];
361       if (val_count != NULL) ++val_count[(src[r * stride + c])];
362     }
363   }
364   int n = 0;
365   // Count the colors based on 8-bit domain used to gate the palette path
366   for (int i = 0; i < max_bin_val; ++i) {
367     if (bin_val_count[i]) ++n;
368   }
369   *num_color_bins = n;
370 
371   // Count the actual hbd colors used to create top_colors
372   n = 0;
373   if (val_count != NULL) {
374     for (int i = 0; i < max_pix_val; ++i) {
375       if (val_count[i]) ++n;
376     }
377     *num_colors = n;
378   }
379 }
380 
set_y_mode_and_delta_angle(const int mode_idx,MB_MODE_INFO * const mbmi,int reorder_delta_angle_eval)381 void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi,
382                                 int reorder_delta_angle_eval) {
383   if (mode_idx < INTRA_MODE_END) {
384     mbmi->mode = intra_rd_search_mode_order[mode_idx];
385     mbmi->angle_delta[PLANE_TYPE_Y] = 0;
386   } else {
387     mbmi->mode = (mode_idx - INTRA_MODE_END) / (MAX_ANGLE_DELTA * 2) + V_PRED;
388     int delta_angle_eval_idx =
389         (mode_idx - INTRA_MODE_END) % (MAX_ANGLE_DELTA * 2);
390     if (reorder_delta_angle_eval) {
391       mbmi->angle_delta[PLANE_TYPE_Y] =
392           luma_delta_angles_order[delta_angle_eval_idx];
393     } else {
394       mbmi->angle_delta[PLANE_TYPE_Y] =
395           (delta_angle_eval_idx < 3 ? (delta_angle_eval_idx - 3)
396                                     : (delta_angle_eval_idx - 2));
397     }
398   }
399 }
400 
get_model_rd_index_for_pruning(const MACROBLOCK * const x,const INTRA_MODE_SPEED_FEATURES * const intra_sf)401 static inline int get_model_rd_index_for_pruning(
402     const MACROBLOCK *const x,
403     const INTRA_MODE_SPEED_FEATURES *const intra_sf) {
404   const int top_intra_model_count_allowed =
405       intra_sf->top_intra_model_count_allowed;
406   if (!intra_sf->adapt_top_model_rd_count_using_neighbors)
407     return top_intra_model_count_allowed - 1;
408 
409   const MACROBLOCKD *const xd = &x->e_mbd;
410   const PREDICTION_MODE mode = xd->mi[0]->mode;
411   int model_rd_index_for_pruning = top_intra_model_count_allowed - 1;
412   int is_left_mode_neq_cur_mode = 0, is_above_mode_neq_cur_mode = 0;
413   if (xd->left_available)
414     is_left_mode_neq_cur_mode = xd->left_mbmi->mode != mode;
415   if (xd->up_available)
416     is_above_mode_neq_cur_mode = xd->above_mbmi->mode != mode;
417   // The pruning of luma intra modes is made more aggressive at lower quantizers
418   // and vice versa. The value for model_rd_index_for_pruning is derived as
419   // follows.
420   // qidx 0 to 127: Reduce the index of a candidate used for comparison only if
421   // the current mode does not match either of the available neighboring modes.
422   // qidx 128 to 255: Reduce the index of a candidate used for comparison only
423   // if the current mode does not match both the available neighboring modes.
424   if (x->qindex <= 127) {
425     if (is_left_mode_neq_cur_mode || is_above_mode_neq_cur_mode)
426       model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0);
427   } else {
428     if (is_left_mode_neq_cur_mode && is_above_mode_neq_cur_mode)
429       model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0);
430   }
431   return model_rd_index_for_pruning;
432 }
433 
434 /*! \brief prune luma intra mode based on the model rd.
435  * \param[in]    this_model_rd              model rd for current mode.
436  * \param[in]    best_model_rd              Best model RD seen for this block so
437  *                                          far.
438  * \param[in]    top_intra_model_rd         Top intra model RD seen for this
439  *                                          block so far.
440  * \param[in]    max_model_cnt_allowed      The maximum number of top intra
441  *                                          model RD allowed.
442  * \param[in]    model_rd_index_for_pruning Index of the candidate used for
443  *                                          pruning based on model rd.
444  */
prune_intra_y_mode(int64_t this_model_rd,int64_t * best_model_rd,int64_t top_intra_model_rd[],int max_model_cnt_allowed,int model_rd_index_for_pruning)445 static int prune_intra_y_mode(int64_t this_model_rd, int64_t *best_model_rd,
446                               int64_t top_intra_model_rd[],
447                               int max_model_cnt_allowed,
448                               int model_rd_index_for_pruning) {
449   const double thresh_best = 1.50;
450   const double thresh_top = 1.00;
451   for (int i = 0; i < max_model_cnt_allowed; i++) {
452     if (this_model_rd < top_intra_model_rd[i]) {
453       for (int j = max_model_cnt_allowed - 1; j > i; j--) {
454         top_intra_model_rd[j] = top_intra_model_rd[j - 1];
455       }
456       top_intra_model_rd[i] = this_model_rd;
457       break;
458     }
459   }
460   if (top_intra_model_rd[model_rd_index_for_pruning] != INT64_MAX &&
461       this_model_rd >
462           thresh_top * top_intra_model_rd[model_rd_index_for_pruning])
463     return 1;
464 
465   if (this_model_rd != INT64_MAX &&
466       this_model_rd > thresh_best * (*best_model_rd))
467     return 1;
468   if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
469   return 0;
470 }
471 
472 // Run RD calculation with given chroma intra prediction angle., and return
473 // the RD cost. Update the best mode info. if the RD cost is the best so far.
pick_intra_angle_routine_sbuv(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int rate_overhead,int64_t best_rd_in,int * rate,RD_STATS * rd_stats,int * best_angle_delta,int64_t * best_rd)474 static int64_t pick_intra_angle_routine_sbuv(
475     const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
476     int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
477     int *best_angle_delta, int64_t *best_rd) {
478   MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
479   assert(!is_inter_block(mbmi));
480   int this_rate;
481   int64_t this_rd;
482   RD_STATS tokenonly_rd_stats;
483 
484   if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
485     return INT64_MAX;
486   this_rate = tokenonly_rd_stats.rate +
487               intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead);
488   this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
489   if (this_rd < *best_rd) {
490     *best_rd = this_rd;
491     *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
492     *rate = this_rate;
493     rd_stats->rate = tokenonly_rd_stats.rate;
494     rd_stats->dist = tokenonly_rd_stats.dist;
495     rd_stats->skip_txfm = tokenonly_rd_stats.skip_txfm;
496   }
497   return this_rd;
498 }
499 
500 /*!\brief Search for the best angle delta for chroma prediction
501  *
502  * \ingroup intra_mode_search
503  * \callergraph
504  * Given a chroma directional intra prediction mode, this function will try to
505  * estimate the best delta_angle.
506  *
507  * \returns Return if there is a new mode with smaller rdcost than best_rd.
508  */
rd_pick_intra_angle_sbuv(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int rate_overhead,int64_t best_rd,int * rate,RD_STATS * rd_stats)509 static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
510                                     BLOCK_SIZE bsize, int rate_overhead,
511                                     int64_t best_rd, int *rate,
512                                     RD_STATS *rd_stats) {
513   MACROBLOCKD *const xd = &x->e_mbd;
514   MB_MODE_INFO *mbmi = xd->mi[0];
515   assert(!is_inter_block(mbmi));
516   int i, angle_delta, best_angle_delta = 0;
517   int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
518 
519   rd_stats->rate = INT_MAX;
520   rd_stats->skip_txfm = 0;
521   rd_stats->dist = INT64_MAX;
522   for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
523 
524   for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
525     for (i = 0; i < 2; ++i) {
526       best_rd_in = (best_rd == INT64_MAX)
527                        ? INT64_MAX
528                        : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
529       mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
530       this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
531                                               best_rd_in, rate, rd_stats,
532                                               &best_angle_delta, &best_rd);
533       rd_cost[2 * angle_delta + i] = this_rd;
534       if (angle_delta == 0) {
535         if (this_rd == INT64_MAX) return 0;
536         rd_cost[1] = this_rd;
537         break;
538       }
539     }
540   }
541 
542   assert(best_rd != INT64_MAX);
543   for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
544     int64_t rd_thresh;
545     for (i = 0; i < 2; ++i) {
546       int skip_search = 0;
547       rd_thresh = best_rd + (best_rd >> 5);
548       if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
549           rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
550         skip_search = 1;
551       if (!skip_search) {
552         mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta;
553         pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
554                                       rate, rd_stats, &best_angle_delta,
555                                       &best_rd);
556       }
557     }
558   }
559 
560   mbmi->angle_delta[PLANE_TYPE_UV] = best_angle_delta;
561   return rd_stats->rate != INT_MAX;
562 }
563 
564 #define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \
565   (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1)
566 
cfl_idx_to_sign_and_alpha(int cfl_idx,CFL_SIGN_TYPE * cfl_sign,int * cfl_alpha)567 static void cfl_idx_to_sign_and_alpha(int cfl_idx, CFL_SIGN_TYPE *cfl_sign,
568                                       int *cfl_alpha) {
569   int cfl_linear_idx = cfl_idx - CFL_INDEX_ZERO;
570   if (cfl_linear_idx == 0) {
571     *cfl_sign = CFL_SIGN_ZERO;
572     *cfl_alpha = 0;
573   } else {
574     *cfl_sign = cfl_linear_idx > 0 ? CFL_SIGN_POS : CFL_SIGN_NEG;
575     *cfl_alpha = abs(cfl_linear_idx) - 1;
576   }
577 }
578 
cfl_compute_rd(const AV1_COMP * const cpi,MACROBLOCK * x,int plane,TX_SIZE tx_size,BLOCK_SIZE plane_bsize,int cfl_idx,int fast_mode,RD_STATS * rd_stats)579 static int64_t cfl_compute_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
580                               int plane, TX_SIZE tx_size,
581                               BLOCK_SIZE plane_bsize, int cfl_idx,
582                               int fast_mode, RD_STATS *rd_stats) {
583   assert(IMPLIES(fast_mode, rd_stats == NULL));
584   const AV1_COMMON *const cm = &cpi->common;
585   MACROBLOCKD *const xd = &x->e_mbd;
586   MB_MODE_INFO *const mbmi = xd->mi[0];
587   int cfl_plane = get_cfl_pred_type(plane);
588   CFL_SIGN_TYPE cfl_sign;
589   int cfl_alpha;
590   cfl_idx_to_sign_and_alpha(cfl_idx, &cfl_sign, &cfl_alpha);
591   // We conly build CFL for a given plane, the other plane's sign is dummy
592   int dummy_sign = CFL_SIGN_NEG;
593   const int8_t orig_cfl_alpha_signs = mbmi->cfl_alpha_signs;
594   const uint8_t orig_cfl_alpha_idx = mbmi->cfl_alpha_idx;
595   mbmi->cfl_alpha_signs =
596       PLANE_SIGN_TO_JOINT_SIGN(cfl_plane, cfl_sign, dummy_sign);
597   mbmi->cfl_alpha_idx = (cfl_alpha << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha;
598   int64_t cfl_cost;
599   if (fast_mode) {
600     cfl_cost =
601         intra_model_rd(cm, x, plane, plane_bsize, tx_size, /*use_hadamard=*/0);
602   } else {
603     av1_init_rd_stats(rd_stats);
604     av1_txfm_rd_in_plane(x, cpi, rd_stats, INT64_MAX, 0, plane, plane_bsize,
605                          tx_size, FTXS_NONE, 0);
606     av1_rd_cost_update(x->rdmult, rd_stats);
607     cfl_cost = rd_stats->rdcost;
608   }
609   mbmi->cfl_alpha_signs = orig_cfl_alpha_signs;
610   mbmi->cfl_alpha_idx = orig_cfl_alpha_idx;
611   return cfl_cost;
612 }
613 
614 static const int cfl_dir_ls[2] = { 1, -1 };
615 
616 // If cfl_search_range is CFL_MAGS_SIZE, return zero. Otherwise return the index
617 // of the best alpha found using intra_model_rd().
cfl_pick_plane_parameter(const AV1_COMP * const cpi,MACROBLOCK * x,int plane,TX_SIZE tx_size,int cfl_search_range)618 static int cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x,
619                                     int plane, TX_SIZE tx_size,
620                                     int cfl_search_range) {
621   assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
622 
623   if (cfl_search_range == CFL_MAGS_SIZE) return CFL_INDEX_ZERO;
624 
625   const MACROBLOCKD *const xd = &x->e_mbd;
626   const MB_MODE_INFO *const mbmi = xd->mi[0];
627   assert(mbmi->uv_mode == UV_CFL_PRED);
628   const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
629   const BLOCK_SIZE plane_bsize =
630       get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
631 
632   int est_best_cfl_idx = CFL_INDEX_ZERO;
633   int fast_mode = 1;
634   int start_cfl_idx = CFL_INDEX_ZERO;
635   int64_t best_cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
636                                          start_cfl_idx, fast_mode, NULL);
637   for (int si = 0; si < 2; ++si) {
638     const int dir = cfl_dir_ls[si];
639     for (int i = 1; i < CFL_MAGS_SIZE; ++i) {
640       int cfl_idx = start_cfl_idx + dir * i;
641       if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
642       int64_t cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
643                                         cfl_idx, fast_mode, NULL);
644       if (cfl_cost < best_cfl_cost) {
645         best_cfl_cost = cfl_cost;
646         est_best_cfl_idx = cfl_idx;
647       } else {
648         break;
649       }
650     }
651   }
652   return est_best_cfl_idx;
653 }
654 
set_invalid_cfl_parameters(uint8_t * best_cfl_alpha_idx,int8_t * best_cfl_alpha_signs)655 static inline void set_invalid_cfl_parameters(uint8_t *best_cfl_alpha_idx,
656                                               int8_t *best_cfl_alpha_signs) {
657   *best_cfl_alpha_idx = 0;
658   *best_cfl_alpha_signs = 0;
659 }
660 
cfl_pick_plane_rd(const AV1_COMP * const cpi,MACROBLOCK * x,int plane,TX_SIZE tx_size,int cfl_search_range,RD_STATS cfl_rd_arr[CFL_MAGS_SIZE],int est_best_cfl_idx)661 static void cfl_pick_plane_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
662                               int plane, TX_SIZE tx_size, int cfl_search_range,
663                               RD_STATS cfl_rd_arr[CFL_MAGS_SIZE],
664                               int est_best_cfl_idx) {
665   assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
666   const MACROBLOCKD *const xd = &x->e_mbd;
667   const MB_MODE_INFO *const mbmi = xd->mi[0];
668   assert(mbmi->uv_mode == UV_CFL_PRED);
669   const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
670   const BLOCK_SIZE plane_bsize =
671       get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
672 
673   for (int cfl_idx = 0; cfl_idx < CFL_MAGS_SIZE; ++cfl_idx) {
674     av1_invalid_rd_stats(&cfl_rd_arr[cfl_idx]);
675   }
676 
677   int fast_mode = 0;
678   int start_cfl_idx = est_best_cfl_idx;
679   cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, start_cfl_idx, fast_mode,
680                  &cfl_rd_arr[start_cfl_idx]);
681 
682   if (cfl_search_range == 1) return;
683 
684   for (int si = 0; si < 2; ++si) {
685     const int dir = cfl_dir_ls[si];
686     for (int i = 1; i < cfl_search_range; ++i) {
687       int cfl_idx = start_cfl_idx + dir * i;
688       if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
689       cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, cfl_idx, fast_mode,
690                      &cfl_rd_arr[cfl_idx]);
691     }
692   }
693 }
694 
695 /*!\brief Pick the optimal parameters for Chroma to Luma (CFL) component
696  *
697  * \ingroup intra_mode_search
698  * \callergraph
699  *
700  * This function will use DCT_DCT followed by computing SATD (sum of absolute
701  * transformed differences) to estimate the RD score and find the best possible
702  * CFL parameter.
703  *
704  * Then the function will apply a full RD search near the best possible CFL
705  * parameter to find the best actual CFL parameter.
706  *
707  * Side effect:
708  * We use ths buffers in x->plane[] and xd->plane[] as throw-away buffers for RD
709  * search.
710  *
711  * \param[in] x                Encoder prediction block structure.
712  * \param[in] cpi              Top-level encoder instance structure.
713  * \param[in] tx_size          Transform size.
714  * \param[in] ref_best_rd      Reference best RD.
715  * \param[in] cfl_search_range The search range of full RD search near the
716  *                             estimated best CFL parameter.
717  *
718  * \param[out]   best_rd_stats          RD stats of the best CFL parameter
719  * \param[out]   best_cfl_alpha_idx     Best CFL alpha index
720  * \param[out]   best_cfl_alpha_signs   Best CFL joint signs
721  *
722  */
cfl_rd_pick_alpha(MACROBLOCK * const x,const AV1_COMP * const cpi,TX_SIZE tx_size,int64_t ref_best_rd,int cfl_search_range,RD_STATS * best_rd_stats,uint8_t * best_cfl_alpha_idx,int8_t * best_cfl_alpha_signs)723 static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
724                              TX_SIZE tx_size, int64_t ref_best_rd,
725                              int cfl_search_range, RD_STATS *best_rd_stats,
726                              uint8_t *best_cfl_alpha_idx,
727                              int8_t *best_cfl_alpha_signs) {
728   assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
729   const ModeCosts *mode_costs = &x->mode_costs;
730   RD_STATS cfl_rd_arr_u[CFL_MAGS_SIZE];
731   RD_STATS cfl_rd_arr_v[CFL_MAGS_SIZE];
732   MACROBLOCKD *const xd = &x->e_mbd;
733   int est_best_cfl_idx_u, est_best_cfl_idx_v;
734 
735   av1_invalid_rd_stats(best_rd_stats);
736 
737   // As the dc pred data is same for different values of alpha, enable the
738   // caching of dc pred data. Call clear_cfl_dc_pred_cache_flags() before
739   // returning to avoid the unintentional usage of cached dc pred data.
740   xd->cfl.use_dc_pred_cache = true;
741   // Evaluate alpha parameter of each chroma plane.
742   est_best_cfl_idx_u =
743       cfl_pick_plane_parameter(cpi, x, 1, tx_size, cfl_search_range);
744   est_best_cfl_idx_v =
745       cfl_pick_plane_parameter(cpi, x, 2, tx_size, cfl_search_range);
746 
747   if (cfl_search_range == 1) {
748     // For cfl_search_range=1, further refinement of alpha is not enabled. Hence
749     // CfL index=0 for both the chroma planes implies invalid CfL mode.
750     if (est_best_cfl_idx_u == CFL_INDEX_ZERO &&
751         est_best_cfl_idx_v == CFL_INDEX_ZERO) {
752       set_invalid_cfl_parameters(best_cfl_alpha_idx, best_cfl_alpha_signs);
753       clear_cfl_dc_pred_cache_flags(&xd->cfl);
754       return 0;
755     }
756 
757     int cfl_alpha_u, cfl_alpha_v;
758     CFL_SIGN_TYPE cfl_sign_u, cfl_sign_v;
759     const MB_MODE_INFO *mbmi = xd->mi[0];
760     cfl_idx_to_sign_and_alpha(est_best_cfl_idx_u, &cfl_sign_u, &cfl_alpha_u);
761     cfl_idx_to_sign_and_alpha(est_best_cfl_idx_v, &cfl_sign_v, &cfl_alpha_v);
762     const int joint_sign = cfl_sign_u * CFL_SIGNS + cfl_sign_v - 1;
763     // Compute alpha and mode signaling rate.
764     const int rate_overhead =
765         mode_costs->cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u] +
766         mode_costs->cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v] +
767         mode_costs
768             ->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][UV_CFL_PRED];
769     // Skip the CfL mode evaluation if the RD cost derived using the rate needed
770     // to signal the CfL mode and alpha parameter exceeds the ref_best_rd.
771     if (RDCOST(x->rdmult, rate_overhead, 0) > ref_best_rd) {
772       set_invalid_cfl_parameters(best_cfl_alpha_idx, best_cfl_alpha_signs);
773       clear_cfl_dc_pred_cache_flags(&xd->cfl);
774       return 0;
775     }
776   }
777 
778   // Compute the rd cost of each chroma plane using the alpha parameters which
779   // were already evaluated.
780   cfl_pick_plane_rd(cpi, x, 1, tx_size, cfl_search_range, cfl_rd_arr_u,
781                     est_best_cfl_idx_u);
782   cfl_pick_plane_rd(cpi, x, 2, tx_size, cfl_search_range, cfl_rd_arr_v,
783                     est_best_cfl_idx_v);
784 
785   clear_cfl_dc_pred_cache_flags(&xd->cfl);
786 
787   for (int ui = 0; ui < CFL_MAGS_SIZE; ++ui) {
788     if (cfl_rd_arr_u[ui].rate == INT_MAX) continue;
789     int cfl_alpha_u;
790     CFL_SIGN_TYPE cfl_sign_u;
791     cfl_idx_to_sign_and_alpha(ui, &cfl_sign_u, &cfl_alpha_u);
792     for (int vi = 0; vi < CFL_MAGS_SIZE; ++vi) {
793       if (cfl_rd_arr_v[vi].rate == INT_MAX) continue;
794       int cfl_alpha_v;
795       CFL_SIGN_TYPE cfl_sign_v;
796       cfl_idx_to_sign_and_alpha(vi, &cfl_sign_v, &cfl_alpha_v);
797       // cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO is not a
798       // valid parameter for CFL
799       if (cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO) continue;
800       int joint_sign = cfl_sign_u * CFL_SIGNS + cfl_sign_v - 1;
801       RD_STATS rd_stats = cfl_rd_arr_u[ui];
802       av1_merge_rd_stats(&rd_stats, &cfl_rd_arr_v[vi]);
803       if (rd_stats.rate != INT_MAX) {
804         rd_stats.rate +=
805             mode_costs->cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u];
806         rd_stats.rate +=
807             mode_costs->cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v];
808       }
809       av1_rd_cost_update(x->rdmult, &rd_stats);
810       if (rd_stats.rdcost < best_rd_stats->rdcost) {
811         *best_rd_stats = rd_stats;
812         *best_cfl_alpha_idx =
813             (cfl_alpha_u << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha_v;
814         *best_cfl_alpha_signs = joint_sign;
815       }
816     }
817   }
818   if (best_rd_stats->rdcost >= ref_best_rd) {
819     av1_invalid_rd_stats(best_rd_stats);
820     // Set invalid CFL parameters here since the rdcost is not better than
821     // ref_best_rd.
822     set_invalid_cfl_parameters(best_cfl_alpha_idx, best_cfl_alpha_signs);
823     return 0;
824   }
825   return 1;
826 }
827 
should_prune_chroma_smooth_pred_based_on_source_variance(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize)828 static bool should_prune_chroma_smooth_pred_based_on_source_variance(
829     const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bsize) {
830   if (!cpi->sf.intra_sf.prune_smooth_intra_mode_for_chroma) return false;
831 
832   // If the source variance of both chroma planes is less than 20 (empirically
833   // derived), prune UV_SMOOTH_PRED.
834   for (int i = AOM_PLANE_U; i < av1_num_planes(&cpi->common); i++) {
835     const unsigned int variance = av1_get_perpixel_variance_facade(
836         cpi, &x->e_mbd, &x->plane[i].src, bsize, i);
837     if (variance >= 20) return false;
838   }
839   return true;
840 }
841 
av1_rd_pick_intra_sbuv_mode(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,BLOCK_SIZE bsize,TX_SIZE max_tx_size)842 int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
843                                     int *rate, int *rate_tokenonly,
844                                     int64_t *distortion, uint8_t *skippable,
845                                     BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
846   const AV1_COMMON *const cm = &cpi->common;
847   MACROBLOCKD *xd = &x->e_mbd;
848   MB_MODE_INFO *mbmi = xd->mi[0];
849   assert(!is_inter_block(mbmi));
850   MB_MODE_INFO best_mbmi = *mbmi;
851   int64_t best_rd = INT64_MAX, this_rd;
852   const ModeCosts *mode_costs = &x->mode_costs;
853   const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
854 
855   init_sbuv_mode(mbmi);
856 
857   // Return if the current block does not correspond to a chroma block.
858   if (!xd->is_chroma_ref) {
859     *rate = 0;
860     *rate_tokenonly = 0;
861     *distortion = 0;
862     *skippable = 1;
863     return INT64_MAX;
864   }
865 
866   // Only store reconstructed luma when there's chroma RDO. When there's no
867   // chroma RDO, the reconstructed luma will be stored in encode_superblock().
868   xd->cfl.store_y = store_cfl_required_rdo(cm, x);
869   if (xd->cfl.store_y) {
870     // Restore reconstructed luma values.
871     // TODO(chiyotsai@google.com): right now we are re-computing the txfm in
872     // this function everytime we search through uv modes. There is some
873     // potential speed up here if we cache the result to avoid redundant
874     // computation.
875     av1_encode_intra_block_plane(cpi, x, mbmi->bsize, AOM_PLANE_Y,
876                                  DRY_RUN_NORMAL,
877                                  cpi->optimize_seg_arr[mbmi->segment_id]);
878     xd->cfl.store_y = 0;
879   }
880   IntraModeSearchState intra_search_state;
881   init_intra_mode_search_state(&intra_search_state);
882   const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd);
883 
884   // Search through all non-palette modes.
885   for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
886     int this_rate;
887     RD_STATS tokenonly_rd_stats;
888     UV_PREDICTION_MODE uv_mode = uv_rd_search_mode_order[mode_idx];
889 
890     // Skip the current mode evaluation if the RD cost derived using the mode
891     // signaling rate exceeds the best_rd so far.
892     const int mode_rate =
893         mode_costs->intra_uv_mode_cost[cfl_allowed][mbmi->mode][uv_mode];
894     if (RDCOST(x->rdmult, mode_rate, 0) > best_rd) continue;
895 
896     PREDICTION_MODE intra_mode = get_uv_mode(uv_mode);
897     const int is_diagonal_mode = av1_is_diagonal_mode(intra_mode);
898     const int is_directional_mode = av1_is_directional_mode(intra_mode);
899 
900     if (is_diagonal_mode && !cpi->oxcf.intra_mode_cfg.enable_diagonal_intra)
901       continue;
902     if (is_directional_mode &&
903         !cpi->oxcf.intra_mode_cfg.enable_directional_intra)
904       continue;
905 
906     if (!(cpi->sf.intra_sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
907           (1 << uv_mode)))
908       continue;
909     if (!intra_mode_cfg->enable_smooth_intra && uv_mode >= UV_SMOOTH_PRED &&
910         uv_mode <= UV_SMOOTH_H_PRED)
911       continue;
912 
913     if (!intra_mode_cfg->enable_paeth_intra && uv_mode == UV_PAETH_PRED)
914       continue;
915 
916     assert(mbmi->mode < INTRA_MODES);
917     if (cpi->sf.intra_sf.prune_chroma_modes_using_luma_winner &&
918         !(av1_derived_chroma_intra_mode_used_flag[mbmi->mode] & (1 << uv_mode)))
919       continue;
920 
921     mbmi->uv_mode = uv_mode;
922 
923     // Init variables for cfl and angle delta
924     const SPEED_FEATURES *sf = &cpi->sf;
925     mbmi->angle_delta[PLANE_TYPE_UV] = 0;
926     if (uv_mode == UV_CFL_PRED) {
927       if (!cfl_allowed || !intra_mode_cfg->enable_cfl_intra) continue;
928       assert(!is_directional_mode);
929       const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
930       if (!cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd,
931                              sf->intra_sf.cfl_search_range, &tokenonly_rd_stats,
932                              &mbmi->cfl_alpha_idx, &mbmi->cfl_alpha_signs)) {
933         continue;
934       }
935     } else if (is_directional_mode && av1_use_angle_delta(mbmi->bsize) &&
936                intra_mode_cfg->enable_angle_delta) {
937       if (sf->intra_sf.chroma_intra_pruning_with_hog &&
938           !intra_search_state.dir_mode_skip_mask_ready) {
939         static const float thresh[2][4] = {
940           { -1.2f, 0.0f, 0.0f, 1.2f },    // Interframe
941           { -1.2f, -1.2f, -0.6f, 0.4f },  // Intraframe
942         };
943         const int is_chroma = 1;
944         const int is_intra_frame = frame_is_intra_only(cm);
945         prune_intra_mode_with_hog(
946             x, bsize, cm->seq_params->sb_size,
947             thresh[is_intra_frame]
948                   [sf->intra_sf.chroma_intra_pruning_with_hog - 1],
949             intra_search_state.directional_mode_skip_mask, is_chroma);
950         intra_search_state.dir_mode_skip_mask_ready = 1;
951       }
952       if (intra_search_state.directional_mode_skip_mask[uv_mode]) {
953         continue;
954       }
955 
956       // Search through angle delta
957       const int rate_overhead =
958           mode_costs->intra_uv_mode_cost[cfl_allowed][mbmi->mode][uv_mode];
959       if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
960                                     &this_rate, &tokenonly_rd_stats))
961         continue;
962     } else {
963       if (uv_mode == UV_SMOOTH_PRED &&
964           should_prune_chroma_smooth_pred_based_on_source_variance(cpi, x,
965                                                                    bsize))
966         continue;
967 
968       // Predict directly if we don't need to search for angle delta.
969       if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
970         continue;
971       }
972     }
973     const int mode_cost =
974         mode_costs->intra_uv_mode_cost[cfl_allowed][mbmi->mode][uv_mode];
975     this_rate = tokenonly_rd_stats.rate +
976                 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost);
977     this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
978 
979     if (this_rd < best_rd) {
980       best_mbmi = *mbmi;
981       best_rd = this_rd;
982       *rate = this_rate;
983       *rate_tokenonly = tokenonly_rd_stats.rate;
984       *distortion = tokenonly_rd_stats.dist;
985       *skippable = tokenonly_rd_stats.skip_txfm;
986     }
987   }
988 
989   // Search palette mode
990   const int try_palette =
991       cpi->oxcf.tool_cfg.enable_palette &&
992       av1_allow_palette(cpi->common.features.allow_screen_content_tools,
993                         mbmi->bsize);
994   if (try_palette) {
995     uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
996     av1_rd_pick_palette_intra_sbuv(
997         cpi, x,
998         mode_costs->intra_uv_mode_cost[cfl_allowed][mbmi->mode][UV_DC_PRED],
999         best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly,
1000         distortion, skippable);
1001   }
1002 
1003   *mbmi = best_mbmi;
1004   // Make sure we actually chose a mode
1005   assert(best_rd < INT64_MAX);
1006   return best_rd;
1007 }
1008 
1009 // Searches palette mode for luma channel in inter frame.
av1_search_palette_mode(IntraModeSearchState * intra_search_state,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int ref_frame_cost,PICK_MODE_CONTEXT * ctx,RD_STATS * this_rd_cost,int64_t best_rd)1010 int av1_search_palette_mode(IntraModeSearchState *intra_search_state,
1011                             const AV1_COMP *cpi, MACROBLOCK *x,
1012                             BLOCK_SIZE bsize, unsigned int ref_frame_cost,
1013                             PICK_MODE_CONTEXT *ctx, RD_STATS *this_rd_cost,
1014                             int64_t best_rd) {
1015   const AV1_COMMON *const cm = &cpi->common;
1016   MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
1017   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1018   const int num_planes = av1_num_planes(cm);
1019   MACROBLOCKD *const xd = &x->e_mbd;
1020   int rate2 = 0;
1021   int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd;
1022   int skippable = 0;
1023   uint8_t *const best_palette_color_map =
1024       x->palette_buffer->best_palette_color_map;
1025   uint8_t *const color_map = xd->plane[0].color_index_map;
1026   MB_MODE_INFO best_mbmi_palette = *mbmi;
1027   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1028   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1029   const ModeCosts *mode_costs = &x->mode_costs;
1030   const int *const intra_mode_cost =
1031       mode_costs->mbmode_cost[size_group_lookup[bsize]];
1032   const int rows = block_size_high[bsize];
1033   const int cols = block_size_wide[bsize];
1034 
1035   mbmi->mode = DC_PRED;
1036   mbmi->uv_mode = UV_DC_PRED;
1037   mbmi->ref_frame[0] = INTRA_FRAME;
1038   mbmi->ref_frame[1] = NONE_FRAME;
1039   av1_zero(pmi->palette_size);
1040 
1041   RD_STATS rd_stats_y;
1042   av1_invalid_rd_stats(&rd_stats_y);
1043   av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED],
1044                                 &best_mbmi_palette, best_palette_color_map,
1045                                 &best_rd_palette, &rd_stats_y.rate, NULL,
1046                                 &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL,
1047                                 ctx, best_blk_skip, best_tx_type_map);
1048   if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) {
1049     this_rd_cost->rdcost = INT64_MAX;
1050     return skippable;
1051   }
1052 
1053   memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
1054          sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
1055   av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
1056   memcpy(color_map, best_palette_color_map,
1057          rows * cols * sizeof(best_palette_color_map[0]));
1058 
1059   skippable = rd_stats_y.skip_txfm;
1060   distortion2 = rd_stats_y.dist;
1061   rate2 = rd_stats_y.rate + ref_frame_cost;
1062   if (num_planes > 1) {
1063     if (intra_search_state->rate_uv_intra == INT_MAX) {
1064       // We have not found any good uv mode yet, so we need to search for it.
1065       TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
1066       av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra,
1067                                   &intra_search_state->rate_uv_tokenonly,
1068                                   &intra_search_state->dist_uvs,
1069                                   &intra_search_state->skip_uvs, bsize, uv_tx);
1070       intra_search_state->mode_uv = mbmi->uv_mode;
1071       intra_search_state->pmi_uv = *pmi;
1072       intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
1073     }
1074 
1075     // We have found at least one good uv mode before, so copy and paste it
1076     // over.
1077     mbmi->uv_mode = intra_search_state->mode_uv;
1078     pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1];
1079     if (pmi->palette_size[1] > 0) {
1080       memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
1081              intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE,
1082              2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
1083     }
1084     mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta;
1085     skippable = skippable && intra_search_state->skip_uvs;
1086     distortion2 += intra_search_state->dist_uvs;
1087     rate2 += intra_search_state->rate_uv_intra;
1088   }
1089 
1090   if (skippable) {
1091     rate2 -= rd_stats_y.rate;
1092     if (num_planes > 1) rate2 -= intra_search_state->rate_uv_tokenonly;
1093     rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1];
1094   } else {
1095     rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
1096   }
1097   this_rd = RDCOST(x->rdmult, rate2, distortion2);
1098   this_rd_cost->rate = rate2;
1099   this_rd_cost->dist = distortion2;
1100   this_rd_cost->rdcost = this_rd;
1101   return skippable;
1102 }
1103 
av1_search_palette_mode_luma(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int ref_frame_cost,PICK_MODE_CONTEXT * ctx,RD_STATS * this_rd_cost,int64_t best_rd)1104 void av1_search_palette_mode_luma(const AV1_COMP *cpi, MACROBLOCK *x,
1105                                   BLOCK_SIZE bsize, unsigned int ref_frame_cost,
1106                                   PICK_MODE_CONTEXT *ctx,
1107                                   RD_STATS *this_rd_cost, int64_t best_rd) {
1108   MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
1109   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1110   MACROBLOCKD *const xd = &x->e_mbd;
1111   int64_t best_rd_palette = best_rd, this_rd;
1112   uint8_t *const best_palette_color_map =
1113       x->palette_buffer->best_palette_color_map;
1114   uint8_t *const color_map = xd->plane[0].color_index_map;
1115   MB_MODE_INFO best_mbmi_palette = *mbmi;
1116   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1117   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1118   const ModeCosts *mode_costs = &x->mode_costs;
1119   const int *const intra_mode_cost =
1120       mode_costs->mbmode_cost[size_group_lookup[bsize]];
1121   const int rows = block_size_high[bsize];
1122   const int cols = block_size_wide[bsize];
1123 
1124   mbmi->mode = DC_PRED;
1125   mbmi->uv_mode = UV_DC_PRED;
1126   mbmi->ref_frame[0] = INTRA_FRAME;
1127   mbmi->ref_frame[1] = NONE_FRAME;
1128   av1_zero(pmi->palette_size);
1129 
1130   RD_STATS rd_stats_y;
1131   av1_invalid_rd_stats(&rd_stats_y);
1132   av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED],
1133                                 &best_mbmi_palette, best_palette_color_map,
1134                                 &best_rd_palette, &rd_stats_y.rate, NULL,
1135                                 &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL,
1136                                 ctx, best_blk_skip, best_tx_type_map);
1137   if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) {
1138     this_rd_cost->rdcost = INT64_MAX;
1139     return;
1140   }
1141 
1142   memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
1143          sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
1144   av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
1145   memcpy(color_map, best_palette_color_map,
1146          rows * cols * sizeof(best_palette_color_map[0]));
1147 
1148   rd_stats_y.rate += ref_frame_cost;
1149 
1150   if (rd_stats_y.skip_txfm) {
1151     rd_stats_y.rate =
1152         ref_frame_cost +
1153         mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1];
1154   } else {
1155     rd_stats_y.rate +=
1156         mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
1157   }
1158   this_rd = RDCOST(x->rdmult, rd_stats_y.rate, rd_stats_y.dist);
1159   this_rd_cost->rate = rd_stats_y.rate;
1160   this_rd_cost->dist = rd_stats_y.dist;
1161   this_rd_cost->rdcost = this_rd;
1162   this_rd_cost->skip_txfm = rd_stats_y.skip_txfm;
1163 }
1164 
1165 /*!\brief Get the intra prediction by searching through tx_type and tx_size.
1166  *
1167  * \ingroup intra_mode_search
1168  * \callergraph
1169  * Currently this function is only used in the intra frame code path for
1170  * winner-mode processing.
1171  *
1172  * \return Returns whether the current mode is an improvement over best_rd.
1173  */
intra_block_yrd(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,const int * bmode_costs,int64_t * best_rd,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,MB_MODE_INFO * best_mbmi,PICK_MODE_CONTEXT * ctx)1174 static inline int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
1175                                   BLOCK_SIZE bsize, const int *bmode_costs,
1176                                   int64_t *best_rd, int *rate,
1177                                   int *rate_tokenonly, int64_t *distortion,
1178                                   uint8_t *skippable, MB_MODE_INFO *best_mbmi,
1179                                   PICK_MODE_CONTEXT *ctx) {
1180   MACROBLOCKD *const xd = &x->e_mbd;
1181   MB_MODE_INFO *const mbmi = xd->mi[0];
1182   RD_STATS rd_stats;
1183   // In order to improve txfm search, avoid rd based breakouts during winner
1184   // mode evaluation. Hence passing ref_best_rd as INT64_MAX by default when the
1185   // speed feature use_rd_based_breakout_for_intra_tx_search is disabled.
1186   int64_t ref_best_rd = cpi->sf.tx_sf.use_rd_based_breakout_for_intra_tx_search
1187                             ? *best_rd
1188                             : INT64_MAX;
1189   av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats, bsize, ref_best_rd);
1190   if (rd_stats.rate == INT_MAX) return 0;
1191   int this_rate_tokenonly = rd_stats.rate;
1192   if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) {
1193     // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
1194     // in the tokenonly rate, but for intra blocks, tx_size is always coded
1195     // (prediction granularity), so we account for it in the full rate,
1196     // not the tokenonly rate.
1197     this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size);
1198   }
1199   const int this_rate =
1200       rd_stats.rate +
1201       intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode], 0);
1202   const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist);
1203   if (this_rd < *best_rd) {
1204     *best_mbmi = *mbmi;
1205     *best_rd = this_rd;
1206     *rate = this_rate;
1207     *rate_tokenonly = this_rate_tokenonly;
1208     *distortion = rd_stats.dist;
1209     *skippable = rd_stats.skip_txfm;
1210     av1_copy_array(ctx->blk_skip, x->txfm_search_info.blk_skip,
1211                    ctx->num_4x4_blk);
1212     av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1213     return 1;
1214   }
1215   return 0;
1216 }
1217 
1218 /*!\brief Search for the best filter_intra mode when coding inter frame.
1219  *
1220  * \ingroup intra_mode_search
1221  * \callergraph
1222  * This function loops through all filter_intra modes to find the best one.
1223  *
1224  * \remark Returns nothing, but updates the mbmi and rd_stats.
1225  */
handle_filter_intra_mode(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,const PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats_y,int mode_cost,int64_t best_rd,int64_t best_rd_so_far)1226 static inline void handle_filter_intra_mode(const AV1_COMP *cpi, MACROBLOCK *x,
1227                                             BLOCK_SIZE bsize,
1228                                             const PICK_MODE_CONTEXT *ctx,
1229                                             RD_STATS *rd_stats_y, int mode_cost,
1230                                             int64_t best_rd,
1231                                             int64_t best_rd_so_far) {
1232   MACROBLOCKD *const xd = &x->e_mbd;
1233   MB_MODE_INFO *const mbmi = xd->mi[0];
1234   assert(mbmi->mode == DC_PRED &&
1235          av1_filter_intra_allowed_bsize(&cpi->common, bsize));
1236 
1237   RD_STATS rd_stats_y_fi;
1238   int filter_intra_selected_flag = 0;
1239   TX_SIZE best_tx_size = mbmi->tx_size;
1240   FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
1241   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1242   memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
1243          sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
1244   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1245   av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1246   mbmi->filter_intra_mode_info.use_filter_intra = 1;
1247   for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED; fi_mode < FILTER_INTRA_MODES;
1248        ++fi_mode) {
1249     mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode;
1250     av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd);
1251     if (rd_stats_y_fi.rate == INT_MAX) continue;
1252     const int this_rate_tmp =
1253         rd_stats_y_fi.rate +
1254         intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0);
1255     const int64_t this_rd_tmp =
1256         RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
1257 
1258     if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > best_rd) {
1259       break;
1260     }
1261     if (this_rd_tmp < best_rd_so_far) {
1262       best_tx_size = mbmi->tx_size;
1263       av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1264       memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
1265              sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
1266       best_fi_mode = fi_mode;
1267       *rd_stats_y = rd_stats_y_fi;
1268       filter_intra_selected_flag = 1;
1269       best_rd_so_far = this_rd_tmp;
1270     }
1271   }
1272 
1273   mbmi->tx_size = best_tx_size;
1274   av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
1275   memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
1276          sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
1277 
1278   if (filter_intra_selected_flag) {
1279     mbmi->filter_intra_mode_info.use_filter_intra = 1;
1280     mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode;
1281   } else {
1282     mbmi->filter_intra_mode_info.use_filter_intra = 0;
1283   }
1284 }
1285 
1286 // Evaluate a given luma intra-mode in inter frames.
av1_handle_intra_y_mode(IntraModeSearchState * intra_search_state,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int ref_frame_cost,const PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats_y,int64_t best_rd,int * mode_cost_y,int64_t * rd_y,int64_t * best_model_rd,int64_t top_intra_model_rd[])1287 int av1_handle_intra_y_mode(IntraModeSearchState *intra_search_state,
1288                             const AV1_COMP *cpi, MACROBLOCK *x,
1289                             BLOCK_SIZE bsize, unsigned int ref_frame_cost,
1290                             const PICK_MODE_CONTEXT *ctx, RD_STATS *rd_stats_y,
1291                             int64_t best_rd, int *mode_cost_y, int64_t *rd_y,
1292                             int64_t *best_model_rd,
1293                             int64_t top_intra_model_rd[]) {
1294   const AV1_COMMON *cm = &cpi->common;
1295   const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf;
1296   MACROBLOCKD *const xd = &x->e_mbd;
1297   MB_MODE_INFO *const mbmi = xd->mi[0];
1298   assert(mbmi->ref_frame[0] == INTRA_FRAME);
1299   const PREDICTION_MODE mode = mbmi->mode;
1300   const ModeCosts *mode_costs = &x->mode_costs;
1301   const int mode_cost =
1302       mode_costs->mbmode_cost[size_group_lookup[bsize]][mode] + ref_frame_cost;
1303   const int skip_ctx = av1_get_skip_txfm_context(xd);
1304 
1305   int known_rate = mode_cost;
1306   const int intra_cost_penalty = av1_get_intra_cost_penalty(
1307       cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q,
1308       cm->seq_params->bit_depth);
1309 
1310   if (mode != DC_PRED && mode != PAETH_PRED) known_rate += intra_cost_penalty;
1311   known_rate += AOMMIN(mode_costs->skip_txfm_cost[skip_ctx][0],
1312                        mode_costs->skip_txfm_cost[skip_ctx][1]);
1313   const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0);
1314   if (known_rd > best_rd) {
1315     intra_search_state->skip_intra_modes = 1;
1316     return 0;
1317   }
1318 
1319   const int is_directional_mode = av1_is_directional_mode(mode);
1320   if (is_directional_mode && av1_use_angle_delta(bsize) &&
1321       cpi->oxcf.intra_mode_cfg.enable_angle_delta) {
1322     if (intra_sf->intra_pruning_with_hog &&
1323         !intra_search_state->dir_mode_skip_mask_ready) {
1324       const float thresh[4] = { -1.2f, 0.0f, 0.0f, 1.2f };
1325       const int is_chroma = 0;
1326       prune_intra_mode_with_hog(x, bsize, cm->seq_params->sb_size,
1327                                 thresh[intra_sf->intra_pruning_with_hog - 1],
1328                                 intra_search_state->directional_mode_skip_mask,
1329                                 is_chroma);
1330       intra_search_state->dir_mode_skip_mask_ready = 1;
1331     }
1332     if (intra_search_state->directional_mode_skip_mask[mode]) return 0;
1333   }
1334   const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
1335   const int64_t this_model_rd =
1336       intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1);
1337 
1338   const int model_rd_index_for_pruning =
1339       get_model_rd_index_for_pruning(x, intra_sf);
1340 
1341   if (prune_intra_y_mode(this_model_rd, best_model_rd, top_intra_model_rd,
1342                          intra_sf->top_intra_model_count_allowed,
1343                          model_rd_index_for_pruning))
1344     return 0;
1345   av1_init_rd_stats(rd_stats_y);
1346   av1_pick_uniform_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, best_rd);
1347 
1348   // Pick filter intra modes.
1349   if (mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) {
1350     int try_filter_intra = 1;
1351     int64_t best_rd_so_far = INT64_MAX;
1352     if (rd_stats_y->rate != INT_MAX) {
1353       // best_rd_so_far is the rdcost of DC_PRED without using filter_intra.
1354       // Later, in filter intra search, best_rd_so_far is used for comparison.
1355       mbmi->filter_intra_mode_info.use_filter_intra = 0;
1356       const int tmp_rate =
1357           rd_stats_y->rate +
1358           intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0);
1359       best_rd_so_far = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist);
1360       try_filter_intra = (best_rd_so_far / 2) <= best_rd;
1361     } else if (intra_sf->skip_filter_intra_in_inter_frames >= 1) {
1362       // As rd cost of luma intra dc mode is more than best_rd (i.e.,
1363       // rd_stats_y->rate = INT_MAX), skip the evaluation of filter intra modes.
1364       try_filter_intra = 0;
1365     }
1366 
1367     if (try_filter_intra) {
1368       handle_filter_intra_mode(cpi, x, bsize, ctx, rd_stats_y, mode_cost,
1369                                best_rd, best_rd_so_far);
1370     }
1371   }
1372 
1373   if (rd_stats_y->rate == INT_MAX) return 0;
1374 
1375   *mode_cost_y = intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost, 0);
1376   const int rate_y = rd_stats_y->skip_txfm
1377                          ? mode_costs->skip_txfm_cost[skip_ctx][1]
1378                          : rd_stats_y->rate;
1379   *rd_y = RDCOST(x->rdmult, rate_y + *mode_cost_y, rd_stats_y->dist);
1380   if (best_rd < (INT64_MAX / 2) && *rd_y > (best_rd + (best_rd >> 2))) {
1381     intra_search_state->skip_intra_modes = 1;
1382     return 0;
1383   }
1384 
1385   return 1;
1386 }
1387 
av1_search_intra_uv_modes_in_interframe(IntraModeSearchState * intra_search_state,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,const RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,int64_t best_rd)1388 int av1_search_intra_uv_modes_in_interframe(
1389     IntraModeSearchState *intra_search_state, const AV1_COMP *cpi,
1390     MACROBLOCK *x, BLOCK_SIZE bsize, RD_STATS *rd_stats,
1391     const RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, int64_t best_rd) {
1392   const AV1_COMMON *cm = &cpi->common;
1393   MACROBLOCKD *const xd = &x->e_mbd;
1394   MB_MODE_INFO *const mbmi = xd->mi[0];
1395   assert(mbmi->ref_frame[0] == INTRA_FRAME);
1396 
1397   // TODO(chiyotsai@google.com): Consolidate the chroma search code here with
1398   // the one in av1_search_palette_mode.
1399   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1400   const int try_palette =
1401       cpi->oxcf.tool_cfg.enable_palette &&
1402       av1_allow_palette(cm->features.allow_screen_content_tools, mbmi->bsize);
1403 
1404   assert(intra_search_state->rate_uv_intra == INT_MAX);
1405   if (intra_search_state->rate_uv_intra == INT_MAX) {
1406     // If no good uv-predictor had been found, search for it.
1407     const TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
1408     av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra,
1409                                 &intra_search_state->rate_uv_tokenonly,
1410                                 &intra_search_state->dist_uvs,
1411                                 &intra_search_state->skip_uvs, bsize, uv_tx);
1412     intra_search_state->mode_uv = mbmi->uv_mode;
1413     if (try_palette) intra_search_state->pmi_uv = *pmi;
1414     intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV];
1415 
1416     const int uv_rate = intra_search_state->rate_uv_tokenonly;
1417     const int64_t uv_dist = intra_search_state->dist_uvs;
1418     const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist);
1419     if (uv_rd > best_rd) {
1420       // If there is no good intra uv-mode available, we can skip all intra
1421       // modes.
1422       intra_search_state->skip_intra_modes = 1;
1423       return 0;
1424     }
1425   }
1426 
1427   // If we are here, then the encoder has found at least one good intra uv
1428   // predictor, so we can directly copy its statistics over.
1429   // TODO(any): the stats here is not right if the best uv mode is CFL but the
1430   // best y mode is palette.
1431   rd_stats_uv->rate = intra_search_state->rate_uv_tokenonly;
1432   rd_stats_uv->dist = intra_search_state->dist_uvs;
1433   rd_stats_uv->skip_txfm = intra_search_state->skip_uvs;
1434   rd_stats->skip_txfm = rd_stats_y->skip_txfm && rd_stats_uv->skip_txfm;
1435   mbmi->uv_mode = intra_search_state->mode_uv;
1436   if (try_palette) {
1437     pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1];
1438     memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
1439            intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE,
1440            2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
1441   }
1442   mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta;
1443 
1444   return 1;
1445 }
1446 
1447 // Checks if odd delta angles can be pruned based on rdcosts of even delta
1448 // angles of the corresponding directional mode.
prune_luma_odd_delta_angles_using_rd_cost(const MB_MODE_INFO * const mbmi,const int64_t * const intra_modes_rd_cost,int64_t best_rd,int prune_luma_odd_delta_angles_in_intra)1449 static inline int prune_luma_odd_delta_angles_using_rd_cost(
1450     const MB_MODE_INFO *const mbmi, const int64_t *const intra_modes_rd_cost,
1451     int64_t best_rd, int prune_luma_odd_delta_angles_in_intra) {
1452   const int luma_delta_angle = mbmi->angle_delta[PLANE_TYPE_Y];
1453   if (!prune_luma_odd_delta_angles_in_intra ||
1454       !av1_is_directional_mode(mbmi->mode) || !(abs(luma_delta_angle) & 1) ||
1455       best_rd == INT64_MAX)
1456     return 0;
1457 
1458   const int64_t rd_thresh = best_rd + (best_rd >> 3);
1459 
1460   // Neighbour rdcosts are considered for pruning of odd delta angles as
1461   // mentioned below:
1462   // Delta angle      Delta angle rdcost
1463   // to be pruned     to be considered
1464   //    -3                   -2
1465   //    -1                -2, 0
1466   //     1                 0, 2
1467   //     3                    2
1468   return intra_modes_rd_cost[luma_delta_angle + MAX_ANGLE_DELTA] > rd_thresh &&
1469          intra_modes_rd_cost[luma_delta_angle + MAX_ANGLE_DELTA + 2] >
1470              rd_thresh;
1471 }
1472 
1473 // Finds the best non-intrabc mode on an intra frame.
av1_rd_pick_intra_sby_mode(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,BLOCK_SIZE bsize,int64_t best_rd,PICK_MODE_CONTEXT * ctx)1474 int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
1475                                    int *rate, int *rate_tokenonly,
1476                                    int64_t *distortion, uint8_t *skippable,
1477                                    BLOCK_SIZE bsize, int64_t best_rd,
1478                                    PICK_MODE_CONTEXT *ctx) {
1479   MACROBLOCKD *const xd = &x->e_mbd;
1480   MB_MODE_INFO *const mbmi = xd->mi[0];
1481   assert(!is_inter_block(mbmi));
1482   int64_t best_model_rd = INT64_MAX;
1483   int is_directional_mode;
1484   uint8_t directional_mode_skip_mask[INTRA_MODES] = { 0 };
1485   // Flag to check rd of any intra mode is better than best_rd passed to this
1486   // function
1487   int beat_best_rd = 0;
1488   const int *bmode_costs;
1489   const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg;
1490   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
1491   const int try_palette =
1492       cpi->oxcf.tool_cfg.enable_palette &&
1493       av1_allow_palette(cpi->common.features.allow_screen_content_tools,
1494                         mbmi->bsize);
1495   uint8_t *best_palette_color_map =
1496       try_palette ? x->palette_buffer->best_palette_color_map : NULL;
1497   const MB_MODE_INFO *above_mi = xd->above_mbmi;
1498   const MB_MODE_INFO *left_mi = xd->left_mbmi;
1499   const PREDICTION_MODE A = av1_above_block_mode(above_mi);
1500   const PREDICTION_MODE L = av1_left_block_mode(left_mi);
1501   const int above_ctx = intra_mode_context[A];
1502   const int left_ctx = intra_mode_context[L];
1503   bmode_costs = x->mode_costs.y_mode_costs[above_ctx][left_ctx];
1504 
1505   mbmi->angle_delta[PLANE_TYPE_Y] = 0;
1506   const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf;
1507   if (intra_sf->intra_pruning_with_hog) {
1508     // Less aggressive thresholds are used here than those used in inter frame
1509     // encoding in av1_handle_intra_y_mode() because we want key frames/intra
1510     // frames to have higher quality.
1511     const float thresh[4] = { -1.2f, -1.2f, -0.6f, 0.4f };
1512     const int is_chroma = 0;
1513     prune_intra_mode_with_hog(x, bsize, cpi->common.seq_params->sb_size,
1514                               thresh[intra_sf->intra_pruning_with_hog - 1],
1515                               directional_mode_skip_mask, is_chroma);
1516   }
1517   mbmi->filter_intra_mode_info.use_filter_intra = 0;
1518   pmi->palette_size[0] = 0;
1519 
1520   // Set params for mode evaluation
1521   set_mode_eval_params(cpi, x, MODE_EVAL);
1522 
1523   MB_MODE_INFO best_mbmi = *mbmi;
1524   const int max_winner_mode_count =
1525       winner_mode_count_allowed[cpi->sf.winner_mode_sf.multi_winner_mode_type];
1526   zero_winner_mode_stats(bsize, max_winner_mode_count, x->winner_mode_stats);
1527   x->winner_mode_count = 0;
1528 
1529   // Searches the intra-modes except for intrabc, palette, and filter_intra.
1530   int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
1531   for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
1532     top_intra_model_rd[i] = INT64_MAX;
1533   }
1534 
1535   // Initialize the rdcost corresponding to all the directional and
1536   // non-directional intra modes.
1537   // 1. For directional modes, it stores the rdcost values for delta angles -4,
1538   // -3, ..., 3, 4.
1539   // 2. The rdcost value for luma_delta_angle is stored at index
1540   // luma_delta_angle + MAX_ANGLE_DELTA + 1.
1541   // 3. The rdcost values for fictitious/nonexistent luma_delta_angle -4 and 4
1542   // (array indices 0 and 8) are always set to INT64_MAX (the initial value).
1543   int64_t intra_modes_rd_cost[INTRA_MODE_END]
1544                              [SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY];
1545   for (int i = 0; i < INTRA_MODE_END; i++) {
1546     for (int j = 0; j < SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY; j++) {
1547       intra_modes_rd_cost[i][j] = INT64_MAX;
1548     }
1549   }
1550 
1551   for (int mode_idx = INTRA_MODE_START; mode_idx < LUMA_MODE_COUNT;
1552        ++mode_idx) {
1553     set_y_mode_and_delta_angle(mode_idx, mbmi,
1554                                intra_sf->prune_luma_odd_delta_angles_in_intra);
1555     RD_STATS this_rd_stats;
1556     int this_rate, this_rate_tokenonly, s;
1557     int is_diagonal_mode;
1558     int64_t this_distortion, this_rd;
1559     const int luma_delta_angle = mbmi->angle_delta[PLANE_TYPE_Y];
1560 
1561     is_diagonal_mode = av1_is_diagonal_mode(mbmi->mode);
1562     if (is_diagonal_mode && !intra_mode_cfg->enable_diagonal_intra) continue;
1563     if (av1_is_directional_mode(mbmi->mode) &&
1564         !intra_mode_cfg->enable_directional_intra)
1565       continue;
1566 
1567     // The smooth prediction mode appears to be more frequently picked
1568     // than horizontal / vertical smooth prediction modes. Hence treat
1569     // them differently in speed features.
1570     if ((!intra_mode_cfg->enable_smooth_intra ||
1571          intra_sf->disable_smooth_intra) &&
1572         (mbmi->mode == SMOOTH_H_PRED || mbmi->mode == SMOOTH_V_PRED))
1573       continue;
1574     if (!intra_mode_cfg->enable_smooth_intra && mbmi->mode == SMOOTH_PRED)
1575       continue;
1576 
1577     // The functionality of filter intra modes and smooth prediction
1578     // overlap. Hence smooth prediction is pruned only if all the
1579     // filter intra modes are enabled.
1580     if (intra_sf->disable_smooth_intra &&
1581         intra_sf->prune_filter_intra_level == 0 && mbmi->mode == SMOOTH_PRED)
1582       continue;
1583     if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED)
1584       continue;
1585 
1586     // Skip the evaluation of modes that do not match with the winner mode in
1587     // x->mb_mode_cache.
1588     if (x->use_mb_mode_cache && mbmi->mode != x->mb_mode_cache->mode) continue;
1589 
1590     is_directional_mode = av1_is_directional_mode(mbmi->mode);
1591     if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
1592     if (is_directional_mode &&
1593         !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
1594         luma_delta_angle != 0)
1595       continue;
1596 
1597     // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
1598     if (!(intra_sf->intra_y_mode_mask[max_txsize_lookup[bsize]] &
1599           (1 << mbmi->mode)))
1600       continue;
1601 
1602     if (prune_luma_odd_delta_angles_using_rd_cost(
1603             mbmi, intra_modes_rd_cost[mbmi->mode], best_rd,
1604             intra_sf->prune_luma_odd_delta_angles_in_intra))
1605       continue;
1606 
1607     const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
1608     const int64_t this_model_rd =
1609         intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1);
1610 
1611     const int model_rd_index_for_pruning =
1612         get_model_rd_index_for_pruning(x, intra_sf);
1613 
1614     if (prune_intra_y_mode(this_model_rd, &best_model_rd, top_intra_model_rd,
1615                            intra_sf->top_intra_model_count_allowed,
1616                            model_rd_index_for_pruning))
1617       continue;
1618 
1619     // Builds the actual prediction. The prediction from
1620     // model_intra_yrd_and_prune was just an estimation that did not take into
1621     // account the effect of txfm pipeline, so we need to redo it for real
1622     // here.
1623     av1_pick_uniform_tx_size_type_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
1624     this_rate_tokenonly = this_rd_stats.rate;
1625     this_distortion = this_rd_stats.dist;
1626     s = this_rd_stats.skip_txfm;
1627 
1628     if (this_rate_tokenonly == INT_MAX) continue;
1629 
1630     if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) {
1631       // av1_pick_uniform_tx_size_type_yrd above includes the cost of the
1632       // tx_size in the tokenonly rate, but for intra blocks, tx_size is always
1633       // coded (prediction granularity), so we account for it in the full rate,
1634       // not the tokenonly rate.
1635       this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size);
1636     }
1637     this_rate =
1638         this_rd_stats.rate +
1639         intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode], 0);
1640     this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
1641 
1642     // Visual quality adjustment based on recon vs source variance.
1643     if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) {
1644       this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize));
1645     }
1646 
1647     intra_modes_rd_cost[mbmi->mode][luma_delta_angle + MAX_ANGLE_DELTA + 1] =
1648         this_rd;
1649 
1650     // Collect mode stats for multiwinner mode processing
1651     const int txfm_search_done = 1;
1652     store_winner_mode_stats(
1653         &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd,
1654         cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
1655     if (this_rd < best_rd) {
1656       best_mbmi = *mbmi;
1657       best_rd = this_rd;
1658       // Setting beat_best_rd flag because current mode rd is better than
1659       // best_rd passed to this function
1660       beat_best_rd = 1;
1661       *rate = this_rate;
1662       *rate_tokenonly = this_rate_tokenonly;
1663       *distortion = this_distortion;
1664       *skippable = s;
1665       memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip,
1666              sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
1667       av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
1668     }
1669   }
1670 
1671   // Searches palette
1672   if (try_palette) {
1673     av1_rd_pick_palette_intra_sby(
1674         cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi, best_palette_color_map,
1675         &best_rd, rate, rate_tokenonly, distortion, skippable, &beat_best_rd,
1676         ctx, ctx->blk_skip, ctx->tx_type_map);
1677   }
1678 
1679   // Searches filter_intra
1680   if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) {
1681     if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
1682                                  skippable, bsize, bmode_costs[DC_PRED],
1683                                  best_mbmi.mode, &best_rd, &best_model_rd,
1684                                  ctx)) {
1685       best_mbmi = *mbmi;
1686     }
1687   }
1688 
1689   // No mode is identified with less rd value than best_rd passed to this
1690   // function. In such cases winner mode processing is not necessary and return
1691   // best_rd as INT64_MAX to indicate best mode is not identified
1692   if (!beat_best_rd) return INT64_MAX;
1693 
1694   // In multi-winner mode processing, perform tx search for few best modes
1695   // identified during mode evaluation. Winner mode processing uses best tx
1696   // configuration for tx search.
1697   if (cpi->sf.winner_mode_sf.multi_winner_mode_type) {
1698     int best_mode_idx = 0;
1699     int block_width, block_height;
1700     uint8_t *color_map_dst = xd->plane[PLANE_TYPE_Y].color_index_map;
1701     av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width,
1702                              &block_height, NULL, NULL);
1703 
1704     for (int mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++) {
1705       *mbmi = x->winner_mode_stats[mode_idx].mbmi;
1706       if (is_winner_mode_processing_enabled(cpi, x, mbmi, 0)) {
1707         // Restore color_map of palette mode before winner mode processing
1708         if (mbmi->palette_mode_info.palette_size[0] > 0) {
1709           uint8_t *color_map_src =
1710               x->winner_mode_stats[mode_idx].color_index_map;
1711           memcpy(color_map_dst, color_map_src,
1712                  block_width * block_height * sizeof(*color_map_src));
1713         }
1714         // Set params for winner mode evaluation
1715         set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
1716 
1717         // Winner mode processing
1718         // If previous searches use only the default tx type/no R-D optimization
1719         // of quantized coeffs, do an extra search for the best tx type/better
1720         // R-D optimization of quantized coeffs
1721         if (intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate,
1722                             rate_tokenonly, distortion, skippable, &best_mbmi,
1723                             ctx))
1724           best_mode_idx = mode_idx;
1725       }
1726     }
1727     // Copy color_map of palette mode for final winner mode
1728     if (best_mbmi.palette_mode_info.palette_size[0] > 0) {
1729       uint8_t *color_map_src =
1730           x->winner_mode_stats[best_mode_idx].color_index_map;
1731       memcpy(color_map_dst, color_map_src,
1732              block_width * block_height * sizeof(*color_map_src));
1733     }
1734   } else {
1735     // If previous searches use only the default tx type/no R-D optimization of
1736     // quantized coeffs, do an extra search for the best tx type/better R-D
1737     // optimization of quantized coeffs
1738     if (is_winner_mode_processing_enabled(cpi, x, mbmi, 0)) {
1739       // Set params for winner mode evaluation
1740       set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
1741       *mbmi = best_mbmi;
1742       intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate,
1743                       rate_tokenonly, distortion, skippable, &best_mbmi, ctx);
1744     }
1745   }
1746   *mbmi = best_mbmi;
1747   av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
1748   return best_rd;
1749 }
1750