• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2020, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 /*!\file
13  * \brief Defines utility functions used in intra mode search.
14  *
15  * This includes rdcost estimations, histogram based pruning, etc.
16  */
17 #ifndef AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_
18 #define AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_
19 
20 #include "av1/common/enums.h"
21 #include "av1/common/pred_common.h"
22 #include "av1/common/reconintra.h"
23 
24 #include "av1/encoder/encoder.h"
25 #include "av1/encoder/encodeframe.h"
26 #include "av1/encoder/model_rd.h"
27 #include "av1/encoder/palette.h"
28 #include "av1/encoder/hybrid_fwd_txfm.h"
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 /*!\cond */
35 #define BINS 32
36 static const float av1_intra_hog_model_bias[DIRECTIONAL_MODES] = {
37   0.450578f,  0.695518f,  -0.717944f, -0.639894f,
38   -0.602019f, -0.453454f, 0.055857f,  -0.465480f,
39 };
40 
41 static const float av1_intra_hog_model_weights[BINS * DIRECTIONAL_MODES] = {
42   -3.076402f, -3.757063f, -3.275266f, -3.180665f, -3.452105f, -3.216593f,
43   -2.871212f, -3.134296f, -1.822324f, -2.401411f, -1.541016f, -1.195322f,
44   -0.434156f, 0.322868f,  2.260546f,  3.368715f,  3.989290f,  3.308487f,
45   2.277893f,  0.923793f,  0.026412f,  -0.385174f, -0.718622f, -1.408867f,
46   -1.050558f, -2.323941f, -2.225827f, -2.585453f, -3.054283f, -2.875087f,
47   -2.985709f, -3.447155f, 3.758139f,  3.204353f,  2.170998f,  0.826587f,
48   -0.269665f, -0.702068f, -1.085776f, -2.175249f, -1.623180f, -2.975142f,
49   -2.779629f, -3.190799f, -3.521900f, -3.375480f, -3.319355f, -3.897389f,
50   -3.172334f, -3.594528f, -2.879132f, -2.547777f, -2.921023f, -2.281844f,
51   -1.818988f, -2.041771f, -0.618268f, -1.396458f, -0.567153f, -0.285868f,
52   -0.088058f, 0.753494f,  2.092413f,  3.215266f,  -3.300277f, -2.748658f,
53   -2.315784f, -2.423671f, -2.257283f, -2.269583f, -2.196660f, -2.301076f,
54   -2.646516f, -2.271319f, -2.254366f, -2.300102f, -2.217960f, -2.473300f,
55   -2.116866f, -2.528246f, -3.314712f, -1.701010f, -0.589040f, -0.088077f,
56   0.813112f,  1.702213f,  2.653045f,  3.351749f,  3.243554f,  3.199409f,
57   2.437856f,  1.468854f,  0.533039f,  -0.099065f, -0.622643f, -2.200732f,
58   -4.228861f, -2.875263f, -1.273956f, -0.433280f, 0.803771f,  1.975043f,
59   3.179528f,  3.939064f,  3.454379f,  3.689386f,  3.116411f,  1.970991f,
60   0.798406f,  -0.628514f, -1.252546f, -2.825176f, -4.090178f, -3.777448f,
61   -3.227314f, -3.479403f, -3.320569f, -3.159372f, -2.729202f, -2.722341f,
62   -3.054913f, -2.742923f, -2.612703f, -2.662632f, -2.907314f, -3.117794f,
63   -3.102660f, -3.970972f, -4.891357f, -3.935582f, -3.347758f, -2.721924f,
64   -2.219011f, -1.702391f, -0.866529f, -0.153743f, 0.107733f,  1.416882f,
65   2.572884f,  3.607755f,  3.974820f,  3.997783f,  2.970459f,  0.791687f,
66   -1.478921f, -1.228154f, -1.216955f, -1.765932f, -1.951003f, -1.985301f,
67   -1.975881f, -1.985593f, -2.422371f, -2.419978f, -2.531288f, -2.951853f,
68   -3.071380f, -3.277027f, -3.373539f, -4.462010f, -0.967888f, 0.805524f,
69   2.794130f,  3.685984f,  3.745195f,  3.252444f,  2.316108f,  1.399146f,
70   -0.136519f, -0.162811f, -1.004357f, -1.667911f, -1.964662f, -2.937579f,
71   -3.019533f, -3.942766f, -5.102767f, -3.882073f, -3.532027f, -3.451956f,
72   -2.944015f, -2.643064f, -2.529872f, -2.077290f, -2.809965f, -1.803734f,
73   -1.783593f, -1.662585f, -1.415484f, -1.392673f, -0.788794f, -1.204819f,
74   -1.998864f, -1.182102f, -0.892110f, -1.317415f, -1.359112f, -1.522867f,
75   -1.468552f, -1.779072f, -2.332959f, -2.160346f, -2.329387f, -2.631259f,
76   -2.744936f, -3.052494f, -2.787363f, -3.442548f, -4.245075f, -3.032172f,
77   -2.061609f, -1.768116f, -1.286072f, -0.706587f, -0.192413f, 0.386938f,
78   0.716997f,  1.481393f,  2.216702f,  2.737986f,  3.109809f,  3.226084f,
79   2.490098f,  -0.095827f, -3.864816f, -3.507248f, -3.128925f, -2.908251f,
80   -2.883836f, -2.881411f, -2.524377f, -2.624478f, -2.399573f, -2.367718f,
81   -1.918255f, -1.926277f, -1.694584f, -1.723790f, -0.966491f, -1.183115f,
82   -1.430687f, 0.872896f,  2.766550f,  3.610080f,  3.578041f,  3.334928f,
83   2.586680f,  1.895721f,  1.122195f,  0.488519f,  -0.140689f, -0.799076f,
84   -1.222860f, -1.502437f, -1.900969f, -3.206816f,
85 };
86 
87 static const NN_CONFIG av1_intra_hog_model_nnconfig = {
88   BINS,               // num_inputs
89   DIRECTIONAL_MODES,  // num_outputs
90   0,                  // num_hidden_layers
91   { 0 },
92   {
93       av1_intra_hog_model_weights,
94   },
95   {
96       av1_intra_hog_model_bias,
97   },
98 };
99 
100 #define FIX_PREC_BITS (16)
get_hist_bin_idx(int dx,int dy)101 static AOM_INLINE int get_hist_bin_idx(int dx, int dy) {
102   const int32_t ratio = (dy * (1 << FIX_PREC_BITS)) / dx;
103 
104   // Find index by bisection
105   static const int thresholds[BINS] = {
106     -1334015, -441798, -261605, -183158, -138560, -109331, -88359, -72303,
107     -59392,   -48579,  -39272,  -30982,  -23445,  -16400,  -9715,  -3194,
108     3227,     9748,    16433,   23478,   31015,   39305,   48611,  59425,
109     72336,    88392,   109364,  138593,  183191,  261638,  441831, INT32_MAX
110   };
111 
112   int lo_idx = 0, hi_idx = BINS - 1;
113   // Divide into segments of size 8 gives better performance than binary search
114   // here.
115   if (ratio <= thresholds[7]) {
116     lo_idx = 0;
117     hi_idx = 7;
118   } else if (ratio <= thresholds[15]) {
119     lo_idx = 8;
120     hi_idx = 15;
121   } else if (ratio <= thresholds[23]) {
122     lo_idx = 16;
123     hi_idx = 23;
124   } else {
125     lo_idx = 24;
126     hi_idx = 31;
127   }
128 
129   for (int idx = lo_idx; idx <= hi_idx; idx++) {
130     if (ratio <= thresholds[idx]) {
131       return idx;
132     }
133   }
134   assert(0 && "No valid histogram bin found!");
135   return BINS - 1;
136 }
137 #undef FIX_PREC_BITS
138 
139 // Normalizes the hog data.
normalize_hog(float total,float * hist)140 static AOM_INLINE void normalize_hog(float total, float *hist) {
141   for (int i = 0; i < BINS; ++i) hist[i] /= total;
142 }
143 
lowbd_generate_hog(const uint8_t * src,int stride,int rows,int cols,float * hist)144 static AOM_INLINE void lowbd_generate_hog(const uint8_t *src, int stride,
145                                           int rows, int cols, float *hist) {
146   float total = 0.1f;
147   src += stride;
148   for (int r = 1; r < rows - 1; ++r) {
149     for (int c = 1; c < cols - 1; ++c) {
150       const uint8_t *above = &src[c - stride];
151       const uint8_t *below = &src[c + stride];
152       const uint8_t *left = &src[c - 1];
153       const uint8_t *right = &src[c + 1];
154       // Calculate gradient using Sobel filters.
155       const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
156                      (left[-stride] + 2 * left[0] + left[stride]);
157       const int dy = (below[-1] + 2 * below[0] + below[1]) -
158                      (above[-1] + 2 * above[0] + above[1]);
159       if (dx == 0 && dy == 0) continue;
160       const int temp = abs(dx) + abs(dy);
161       if (!temp) continue;
162       total += temp;
163       if (dx == 0) {
164         hist[0] += temp / 2;
165         hist[BINS - 1] += temp / 2;
166       } else {
167         const int idx = get_hist_bin_idx(dx, dy);
168         assert(idx >= 0 && idx < BINS);
169         hist[idx] += temp;
170       }
171     }
172     src += stride;
173   }
174 
175   normalize_hog(total, hist);
176 }
177 
178 // Computes and stores pixel level gradient information of a given superblock
179 // for LBD encode.
lowbd_compute_gradient_info_sb(MACROBLOCK * const x,BLOCK_SIZE sb_size,PLANE_TYPE plane)180 static AOM_INLINE void lowbd_compute_gradient_info_sb(MACROBLOCK *const x,
181                                                       BLOCK_SIZE sb_size,
182                                                       PLANE_TYPE plane) {
183   PixelLevelGradientInfo *const grad_info_sb =
184       x->pixel_gradient_info + plane * MAX_SB_SQUARE;
185   const uint8_t *src = x->plane[plane].src.buf;
186   const int stride = x->plane[plane].src.stride;
187   const int ss_x = x->e_mbd.plane[plane].subsampling_x;
188   const int ss_y = x->e_mbd.plane[plane].subsampling_y;
189   const int sb_height = block_size_high[sb_size] >> ss_y;
190   const int sb_width = block_size_wide[sb_size] >> ss_x;
191   src += stride;
192   for (int r = 1; r < sb_height - 1; ++r) {
193     for (int c = 1; c < sb_width - 1; ++c) {
194       const uint8_t *above = &src[c - stride];
195       const uint8_t *below = &src[c + stride];
196       const uint8_t *left = &src[c - 1];
197       const uint8_t *right = &src[c + 1];
198       // Calculate gradient using Sobel filters.
199       const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
200                      (left[-stride] + 2 * left[0] + left[stride]);
201       const int dy = (below[-1] + 2 * below[0] + below[1]) -
202                      (above[-1] + 2 * above[0] + above[1]);
203       grad_info_sb[r * sb_width + c].is_dx_zero = (dx == 0);
204       grad_info_sb[r * sb_width + c].abs_dx_abs_dy_sum =
205           (uint16_t)(abs(dx) + abs(dy));
206       grad_info_sb[r * sb_width + c].hist_bin_idx =
207           (dx != 0) ? get_hist_bin_idx(dx, dy) : -1;
208     }
209     src += stride;
210   }
211 }
212 
213 #if CONFIG_AV1_HIGHBITDEPTH
highbd_generate_hog(const uint8_t * src8,int stride,int rows,int cols,float * hist)214 static AOM_INLINE void highbd_generate_hog(const uint8_t *src8, int stride,
215                                            int rows, int cols, float *hist) {
216   float total = 0.1f;
217   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
218   src += stride;
219   for (int r = 1; r < rows - 1; ++r) {
220     for (int c = 1; c < cols - 1; ++c) {
221       const uint16_t *above = &src[c - stride];
222       const uint16_t *below = &src[c + stride];
223       const uint16_t *left = &src[c - 1];
224       const uint16_t *right = &src[c + 1];
225       // Calculate gradient using Sobel filters.
226       const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
227                      (left[-stride] + 2 * left[0] + left[stride]);
228       const int dy = (below[-1] + 2 * below[0] + below[1]) -
229                      (above[-1] + 2 * above[0] + above[1]);
230       if (dx == 0 && dy == 0) continue;
231       const int temp = abs(dx) + abs(dy);
232       if (!temp) continue;
233       total += temp;
234       if (dx == 0) {
235         hist[0] += temp / 2;
236         hist[BINS - 1] += temp / 2;
237       } else {
238         const int idx = get_hist_bin_idx(dx, dy);
239         assert(idx >= 0 && idx < BINS);
240         hist[idx] += temp;
241       }
242     }
243     src += stride;
244   }
245 
246   normalize_hog(total, hist);
247 }
248 
249 // Computes and stores pixel level gradient information of a given superblock
250 // for HBD encode.
highbd_compute_gradient_info_sb(MACROBLOCK * const x,BLOCK_SIZE sb_size,PLANE_TYPE plane)251 static AOM_INLINE void highbd_compute_gradient_info_sb(MACROBLOCK *const x,
252                                                        BLOCK_SIZE sb_size,
253                                                        PLANE_TYPE plane) {
254   PixelLevelGradientInfo *const grad_info_sb =
255       x->pixel_gradient_info + plane * MAX_SB_SQUARE;
256   const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[plane].src.buf);
257   const int stride = x->plane[plane].src.stride;
258   const int ss_x = x->e_mbd.plane[plane].subsampling_x;
259   const int ss_y = x->e_mbd.plane[plane].subsampling_y;
260   const int sb_height = block_size_high[sb_size] >> ss_y;
261   const int sb_width = block_size_wide[sb_size] >> ss_x;
262   src += stride;
263   for (int r = 1; r < sb_height - 1; ++r) {
264     for (int c = 1; c < sb_width - 1; ++c) {
265       const uint16_t *above = &src[c - stride];
266       const uint16_t *below = &src[c + stride];
267       const uint16_t *left = &src[c - 1];
268       const uint16_t *right = &src[c + 1];
269       // Calculate gradient using Sobel filters.
270       const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
271                      (left[-stride] + 2 * left[0] + left[stride]);
272       const int dy = (below[-1] + 2 * below[0] + below[1]) -
273                      (above[-1] + 2 * above[0] + above[1]);
274       grad_info_sb[r * sb_width + c].is_dx_zero = (dx == 0);
275       grad_info_sb[r * sb_width + c].abs_dx_abs_dy_sum =
276           (uint16_t)(abs(dx) + abs(dy));
277       grad_info_sb[r * sb_width + c].hist_bin_idx =
278           (dx != 0) ? get_hist_bin_idx(dx, dy) : -1;
279     }
280     src += stride;
281   }
282 }
283 #endif  // CONFIG_AV1_HIGHBITDEPTH
284 
generate_hog(const uint8_t * src8,int stride,int rows,int cols,float * hist,int highbd)285 static AOM_INLINE void generate_hog(const uint8_t *src8, int stride, int rows,
286                                     int cols, float *hist, int highbd) {
287 #if CONFIG_AV1_HIGHBITDEPTH
288   if (highbd) {
289     highbd_generate_hog(src8, stride, rows, cols, hist);
290     return;
291   }
292 #else
293   (void)highbd;
294 #endif  // CONFIG_AV1_HIGHBITDEPTH
295   lowbd_generate_hog(src8, stride, rows, cols, hist);
296 }
297 
compute_gradient_info_sb(MACROBLOCK * const x,BLOCK_SIZE sb_size,PLANE_TYPE plane)298 static AOM_INLINE void compute_gradient_info_sb(MACROBLOCK *const x,
299                                                 BLOCK_SIZE sb_size,
300                                                 PLANE_TYPE plane) {
301 #if CONFIG_AV1_HIGHBITDEPTH
302   if (is_cur_buf_hbd(&x->e_mbd)) {
303     highbd_compute_gradient_info_sb(x, sb_size, plane);
304     return;
305   }
306 #endif  // CONFIG_AV1_HIGHBITDEPTH
307   lowbd_compute_gradient_info_sb(x, sb_size, plane);
308 }
309 
310 // Function to generate pixel level gradient information for a given superblock.
311 // Sets the flags 'is_sb_gradient_cached' for the specific plane-type if
312 // gradient info is generated for the same.
produce_gradients_for_sb(AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE sb_size,int mi_row,int mi_col)313 static AOM_INLINE void produce_gradients_for_sb(AV1_COMP *cpi, MACROBLOCK *x,
314                                                 BLOCK_SIZE sb_size, int mi_row,
315                                                 int mi_col) {
316   const SPEED_FEATURES *sf = &cpi->sf;
317   // Initialise flags related to hog data caching.
318   x->is_sb_gradient_cached[PLANE_TYPE_Y] = false;
319   x->is_sb_gradient_cached[PLANE_TYPE_UV] = false;
320 
321   // SB level caching of gradient data may not help in speedup for the following
322   // cases:
323   // (1) Inter frames (due to early intra gating)
324   // (2) When partition_search_type is not SEARCH_PARTITION
325   // Hence, gradient data is computed at block level in such cases.
326 
327   if (!frame_is_intra_only(&cpi->common) ||
328       sf->part_sf.partition_search_type != SEARCH_PARTITION)
329     return;
330 
331   const int num_planes = av1_num_planes(&cpi->common);
332 
333   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
334 
335   if (sf->intra_sf.intra_pruning_with_hog) {
336     compute_gradient_info_sb(x, sb_size, PLANE_TYPE_Y);
337     x->is_sb_gradient_cached[PLANE_TYPE_Y] = true;
338   }
339   if (sf->intra_sf.chroma_intra_pruning_with_hog && num_planes > 1) {
340     compute_gradient_info_sb(x, sb_size, PLANE_TYPE_UV);
341     x->is_sb_gradient_cached[PLANE_TYPE_UV] = true;
342   }
343 }
344 
345 // Reuses the pixel level gradient data generated at superblock level for block
346 // level histogram computation.
generate_hog_using_gradient_cache(const MACROBLOCK * x,int rows,int cols,BLOCK_SIZE sb_size,PLANE_TYPE plane,float * hist)347 static AOM_INLINE void generate_hog_using_gradient_cache(const MACROBLOCK *x,
348                                                          int rows, int cols,
349                                                          BLOCK_SIZE sb_size,
350                                                          PLANE_TYPE plane,
351                                                          float *hist) {
352   float total = 0.1f;
353   const int ss_x = x->e_mbd.plane[plane].subsampling_x;
354   const int ss_y = x->e_mbd.plane[plane].subsampling_y;
355   const int sb_width = block_size_wide[sb_size] >> ss_x;
356 
357   // Derive the offset from the starting of the superblock in order to locate
358   // the block level gradient data in the cache.
359   const int mi_row_in_sb = x->e_mbd.mi_row & (mi_size_high[sb_size] - 1);
360   const int mi_col_in_sb = x->e_mbd.mi_col & (mi_size_wide[sb_size] - 1);
361   const int block_offset_in_grad_cache =
362       sb_width * (mi_row_in_sb << (MI_SIZE_LOG2 - ss_y)) +
363       (mi_col_in_sb << (MI_SIZE_LOG2 - ss_x));
364   const PixelLevelGradientInfo *grad_info_blk = x->pixel_gradient_info +
365                                                 plane * MAX_SB_SQUARE +
366                                                 block_offset_in_grad_cache;
367 
368   // Retrieve the cached gradient information and generate the histogram.
369   for (int r = 1; r < rows - 1; ++r) {
370     for (int c = 1; c < cols - 1; ++c) {
371       const uint16_t abs_dx_abs_dy_sum =
372           grad_info_blk[r * sb_width + c].abs_dx_abs_dy_sum;
373       if (!abs_dx_abs_dy_sum) continue;
374       total += abs_dx_abs_dy_sum;
375       const bool is_dx_zero = grad_info_blk[r * sb_width + c].is_dx_zero;
376       if (is_dx_zero) {
377         hist[0] += abs_dx_abs_dy_sum >> 1;
378         hist[BINS - 1] += abs_dx_abs_dy_sum >> 1;
379       } else {
380         const int8_t idx = grad_info_blk[r * sb_width + c].hist_bin_idx;
381         assert(idx >= 0 && idx < BINS);
382         hist[idx] += abs_dx_abs_dy_sum;
383       }
384     }
385   }
386   normalize_hog(total, hist);
387 }
388 
collect_hog_data(const MACROBLOCK * x,BLOCK_SIZE bsize,BLOCK_SIZE sb_size,int plane,float * hog)389 static INLINE void collect_hog_data(const MACROBLOCK *x, BLOCK_SIZE bsize,
390                                     BLOCK_SIZE sb_size, int plane, float *hog) {
391   const MACROBLOCKD *xd = &x->e_mbd;
392   const struct macroblockd_plane *const pd = &xd->plane[plane];
393   const int ss_x = pd->subsampling_x;
394   const int ss_y = pd->subsampling_y;
395   const int bh = block_size_high[bsize];
396   const int bw = block_size_wide[bsize];
397   const int rows =
398       ((xd->mb_to_bottom_edge >= 0) ? bh : (xd->mb_to_bottom_edge >> 3) + bh) >>
399       ss_y;
400   const int cols =
401       ((xd->mb_to_right_edge >= 0) ? bw : (xd->mb_to_right_edge >> 3) + bw) >>
402       ss_x;
403 
404   // If gradient data is already generated at SB level, reuse the cached data.
405   // Otherwise, compute the data.
406   if (x->is_sb_gradient_cached[plane]) {
407     generate_hog_using_gradient_cache(x, rows, cols, sb_size, plane, hog);
408   } else {
409     const uint8_t *src = x->plane[plane].src.buf;
410     const int src_stride = x->plane[plane].src.stride;
411     generate_hog(src, src_stride, rows, cols, hog, is_cur_buf_hbd(xd));
412   }
413 
414   // Scale the hog so the luma and chroma are on the same scale
415   for (int b = 0; b < BINS; ++b) {
416     hog[b] *= (1 + ss_x) * (1 + ss_y);
417   }
418 }
419 
prune_intra_mode_with_hog(const MACROBLOCK * x,BLOCK_SIZE bsize,BLOCK_SIZE sb_size,float th,uint8_t * directional_mode_skip_mask,int is_chroma)420 static AOM_INLINE void prune_intra_mode_with_hog(
421     const MACROBLOCK *x, BLOCK_SIZE bsize, BLOCK_SIZE sb_size, float th,
422     uint8_t *directional_mode_skip_mask, int is_chroma) {
423   const int plane = is_chroma ? AOM_PLANE_U : AOM_PLANE_Y;
424   float hist[BINS] = { 0.0f };
425   collect_hog_data(x, bsize, sb_size, plane, hist);
426 
427   // Make prediction for each of the mode
428   float scores[DIRECTIONAL_MODES] = { 0.0f };
429   av1_nn_predict(hist, &av1_intra_hog_model_nnconfig, 1, scores);
430   for (UV_PREDICTION_MODE uv_mode = UV_V_PRED; uv_mode <= UV_D67_PRED;
431        uv_mode++) {
432     if (scores[uv_mode - UV_V_PRED] <= th) {
433       directional_mode_skip_mask[uv_mode] = 1;
434     }
435   }
436 }
437 #undef BINS
438 
439 // Returns the cost needed to send a uniformly distributed r.v.
write_uniform_cost(int n,int v)440 static AOM_INLINE int write_uniform_cost(int n, int v) {
441   const int l = get_unsigned_bits(n);
442   const int m = (1 << l) - n;
443   if (l == 0) return 0;
444   if (v < m)
445     return av1_cost_literal(l - 1);
446   else
447     return av1_cost_literal(l);
448 }
449 /*!\endcond */
450 
451 /*!\brief Returns the rate cost for luma prediction mode info of intra blocks.
452  *
453  * \callergraph
454  */
intra_mode_info_cost_y(const AV1_COMP * cpi,const MACROBLOCK * x,const MB_MODE_INFO * mbmi,BLOCK_SIZE bsize,int mode_cost)455 static AOM_INLINE int intra_mode_info_cost_y(const AV1_COMP *cpi,
456                                              const MACROBLOCK *x,
457                                              const MB_MODE_INFO *mbmi,
458                                              BLOCK_SIZE bsize, int mode_cost) {
459   int total_rate = mode_cost;
460   const ModeCosts *mode_costs = &x->mode_costs;
461   const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0;
462   const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra;
463   const int use_intrabc = mbmi->use_intrabc;
464   // Can only activate one mode.
465   assert(((mbmi->mode != DC_PRED) + use_palette + use_intrabc +
466           use_filter_intra) <= 1);
467   const int try_palette = av1_allow_palette(
468       cpi->common.features.allow_screen_content_tools, mbmi->bsize);
469   if (try_palette && mbmi->mode == DC_PRED) {
470     const MACROBLOCKD *xd = &x->e_mbd;
471     const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
472     const int mode_ctx = av1_get_palette_mode_ctx(xd);
473     total_rate +=
474         mode_costs->palette_y_mode_cost[bsize_ctx][mode_ctx][use_palette];
475     if (use_palette) {
476       const uint8_t *const color_map = xd->plane[0].color_index_map;
477       int block_width, block_height, rows, cols;
478       av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
479                                &cols);
480       const int plt_size = mbmi->palette_mode_info.palette_size[0];
481       int palette_mode_cost =
482           mode_costs
483               ->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
484           write_uniform_cost(plt_size, color_map[0]);
485       uint16_t color_cache[2 * PALETTE_MAX_SIZE];
486       const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
487       palette_mode_cost +=
488           av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache,
489                                    n_cache, cpi->common.seq_params->bit_depth);
490       palette_mode_cost +=
491           av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP);
492       total_rate += palette_mode_cost;
493     }
494   }
495   if (av1_filter_intra_allowed(&cpi->common, mbmi)) {
496     total_rate += mode_costs->filter_intra_cost[mbmi->bsize][use_filter_intra];
497     if (use_filter_intra) {
498       total_rate +=
499           mode_costs->filter_intra_mode_cost[mbmi->filter_intra_mode_info
500                                                  .filter_intra_mode];
501     }
502   }
503   if (av1_is_directional_mode(mbmi->mode)) {
504     if (av1_use_angle_delta(bsize)) {
505       total_rate +=
506           mode_costs->angle_delta_cost[mbmi->mode - V_PRED]
507                                       [MAX_ANGLE_DELTA +
508                                        mbmi->angle_delta[PLANE_TYPE_Y]];
509     }
510   }
511   if (av1_allow_intrabc(&cpi->common))
512     total_rate += mode_costs->intrabc_cost[use_intrabc];
513   return total_rate;
514 }
515 
516 /*!\brief Return the rate cost for chroma prediction mode info of intra blocks.
517  *
518  * \callergraph
519  */
intra_mode_info_cost_uv(const AV1_COMP * cpi,const MACROBLOCK * x,const MB_MODE_INFO * mbmi,BLOCK_SIZE bsize,int mode_cost)520 static AOM_INLINE int intra_mode_info_cost_uv(const AV1_COMP *cpi,
521                                               const MACROBLOCK *x,
522                                               const MB_MODE_INFO *mbmi,
523                                               BLOCK_SIZE bsize, int mode_cost) {
524   int total_rate = mode_cost;
525   const ModeCosts *mode_costs = &x->mode_costs;
526   const int use_palette = mbmi->palette_mode_info.palette_size[1] > 0;
527   const UV_PREDICTION_MODE mode = mbmi->uv_mode;
528   // Can only activate one mode.
529   assert(((mode != UV_DC_PRED) + use_palette + mbmi->use_intrabc) <= 1);
530 
531   const int try_palette = av1_allow_palette(
532       cpi->common.features.allow_screen_content_tools, mbmi->bsize);
533   if (try_palette && mode == UV_DC_PRED) {
534     const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
535     total_rate +=
536         mode_costs->palette_uv_mode_cost[pmi->palette_size[0] > 0][use_palette];
537     if (use_palette) {
538       const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
539       const int plt_size = pmi->palette_size[1];
540       const MACROBLOCKD *xd = &x->e_mbd;
541       const uint8_t *const color_map = xd->plane[1].color_index_map;
542       int palette_mode_cost =
543           mode_costs
544               ->palette_uv_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
545           write_uniform_cost(plt_size, color_map[0]);
546       uint16_t color_cache[2 * PALETTE_MAX_SIZE];
547       const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
548       palette_mode_cost += av1_palette_color_cost_uv(
549           pmi, color_cache, n_cache, cpi->common.seq_params->bit_depth);
550       palette_mode_cost +=
551           av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP);
552       total_rate += palette_mode_cost;
553     }
554   }
555   if (av1_is_directional_mode(get_uv_mode(mode))) {
556     if (av1_use_angle_delta(bsize)) {
557       total_rate +=
558           mode_costs->angle_delta_cost[mode - V_PRED]
559                                       [mbmi->angle_delta[PLANE_TYPE_UV] +
560                                        MAX_ANGLE_DELTA];
561     }
562   }
563   return total_rate;
564 }
565 
566 /*!\cond */
567 // Makes a quick intra prediction and estimate the rdcost with a model without
568 // going through the whole txfm/quantize/itxfm process.
intra_model_rd(const AV1_COMMON * cm,MACROBLOCK * const x,int plane,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,int use_hadamard)569 static int64_t intra_model_rd(const AV1_COMMON *cm, MACROBLOCK *const x,
570                               int plane, BLOCK_SIZE plane_bsize,
571                               TX_SIZE tx_size, int use_hadamard) {
572   MACROBLOCKD *const xd = &x->e_mbd;
573   const BitDepthInfo bd_info = get_bit_depth_info(xd);
574   int row, col;
575   assert(!is_inter_block(xd->mi[0]));
576   const int stepr = tx_size_high_unit[tx_size];
577   const int stepc = tx_size_wide_unit[tx_size];
578   const int txbw = tx_size_wide[tx_size];
579   const int txbh = tx_size_high[tx_size];
580   const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
581   const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
582   int64_t satd_cost = 0;
583   struct macroblock_plane *p = &x->plane[plane];
584   struct macroblockd_plane *pd = &xd->plane[plane];
585   // Prediction.
586   for (row = 0; row < max_blocks_high; row += stepr) {
587     for (col = 0; col < max_blocks_wide; col += stepc) {
588       av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size);
589       // Here we use p->src_diff and p->coeff as temporary buffers for
590       // prediction residue and transform coefficients. The buffers are only
591       // used in this for loop, therefore we don't need to properly add offset
592       // to the buffers.
593       av1_subtract_block(
594           bd_info, txbh, txbw, p->src_diff, block_size_wide[plane_bsize],
595           p->src.buf + (((row * p->src.stride) + col) << 2), p->src.stride,
596           pd->dst.buf + (((row * pd->dst.stride) + col) << 2), pd->dst.stride);
597       av1_quick_txfm(use_hadamard, tx_size, bd_info, p->src_diff,
598                      block_size_wide[plane_bsize], p->coeff);
599       satd_cost += aom_satd(p->coeff, tx_size_2d[tx_size]);
600     }
601   }
602   return satd_cost;
603 }
604 /*!\endcond */
605 
606 /*!\brief Estimate the luma rdcost of a given intra mode and try to prune it.
607  *
608  * \ingroup intra_mode_search
609  * \callergraph
610  * This function first makes a quick luma prediction and estimates the rdcost
611  * with a model without going through the txfm, then try to prune the current
612  * mode if the new estimate y_rd > 1.25 * best_model_rd.
613  *
614  * \return Returns 1 if the given mode is prune; 0 otherwise.
615  */
model_intra_yrd_and_prune(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int64_t * best_model_rd)616 static AOM_INLINE int model_intra_yrd_and_prune(const AV1_COMP *const cpi,
617                                                 MACROBLOCK *x, BLOCK_SIZE bsize,
618                                                 int64_t *best_model_rd) {
619   const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
620   const int plane = 0;
621   const AV1_COMMON *cm = &cpi->common;
622   const int64_t this_model_rd =
623       intra_model_rd(cm, x, plane, bsize, tx_size, /*use_hadamard=*/1);
624   if (*best_model_rd != INT64_MAX &&
625       this_model_rd > *best_model_rd + (*best_model_rd >> 2)) {
626     return 1;
627   } else if (this_model_rd < *best_model_rd) {
628     *best_model_rd = this_model_rd;
629   }
630   return 0;
631 }
632 
633 #ifdef __cplusplus
634 }  // extern "C"
635 #endif
636 
637 #endif  // AOM_AV1_ENCODER_INTRA_MODE_SEARCH_UTILS_H_
638