• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17 
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "av1/common/av1_common_int.h"
23 #include "av1/common/cfl.h"
24 #include "av1/common/reconintra.h"
25 
26 enum {
27   NEED_LEFT = 1 << 1,
28   NEED_ABOVE = 1 << 2,
29   NEED_ABOVERIGHT = 1 << 3,
30   NEED_ABOVELEFT = 1 << 4,
31   NEED_BOTTOMLEFT = 1 << 5,
32 };
33 
34 #define INTRA_EDGE_FILT 3
35 #define INTRA_EDGE_TAPS 5
36 #define MAX_UPSAMPLE_SZ 16
37 #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
38 
39 static const uint8_t extend_modes[INTRA_MODES] = {
40   NEED_ABOVE | NEED_LEFT,                   // DC
41   NEED_ABOVE,                               // V
42   NEED_LEFT,                                // H
43   NEED_ABOVE | NEED_ABOVERIGHT,             // D45
44   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
45   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
46   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
47   NEED_LEFT | NEED_BOTTOMLEFT,              // D203
48   NEED_ABOVE | NEED_ABOVERIGHT,             // D67
49   NEED_LEFT | NEED_ABOVE,                   // SMOOTH
50   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
51   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
52   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
53 };
54 
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 //       . . . .
61 //       . . . .
62 //       . . o .
63 //       . . . .
64 static uint8_t has_tr_4x4[128] = {
65   255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69   255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75   255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76   119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77   127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78   119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79   119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82   255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84   255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88   255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89   255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92   255, 255, 119, 119, 127, 127, 119, 119,
93   255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96   255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99   255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111   255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112   127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113   127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116   255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117   127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120   255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123   15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127 
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129   // 4X4
130   has_tr_4x4,
131   // 4X8,       8X4,            8X8
132   has_tr_4x8, has_tr_8x4, has_tr_8x8,
133   // 8X16,      16X8,           16X16
134   has_tr_8x16, has_tr_16x8, has_tr_16x16,
135   // 16X32,     32X16,          32X32
136   has_tr_16x32, has_tr_32x16, has_tr_32x32,
137   // 32X64,     64X32,          64X64
138   has_tr_32x64, has_tr_64x32, has_tr_64x64,
139   // 64x128,    128x64,         128x128
140   has_tr_64x128, has_tr_128x64, has_tr_128x128,
141   // 4x16,      16x4,            8x32
142   has_tr_4x16, has_tr_16x4, has_tr_8x32,
143   // 32x8,      16x64,           64x16
144   has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146 
147 static uint8_t has_tr_vert_8x8[32] = {
148   255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149   255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152   255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156 
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167   // 4X4
168   NULL,
169   // 4X8,      8X4,         8X8
170   has_tr_4x8, NULL, has_tr_vert_8x8,
171   // 8X16,     16X8,        16X16
172   has_tr_8x16, NULL, has_tr_vert_16x16,
173   // 16X32,    32X16,       32X32
174   has_tr_16x32, NULL, has_tr_vert_32x32,
175   // 32X64,    64X32,       64X64
176   has_tr_32x64, NULL, has_tr_vert_64x64,
177   // 64x128,   128x64,      128x128
178   has_tr_64x128, NULL, has_tr_128x128
179 };
180 
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182                                        BLOCK_SIZE bsize) {
183   const uint8_t *ret = NULL;
184   // If this is a mixed vertical partition, look up bsize in orders_vert.
185   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186     assert(bsize < BLOCK_SIZES);
187     ret = has_tr_vert_tables[bsize];
188   } else {
189     ret = has_tr_tables[bsize];
190   }
191   assert(ret);
192   return ret;
193 }
194 
has_top_right(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
196                          int mi_col, int top_available, int right_available,
197                          PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198                          int col_off, int ss_x, int ss_y) {
199   if (!top_available || !right_available) return 0;
200 
201   const int bw_unit = mi_size_wide[bsize];
202   const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203   const int top_right_count_unit = tx_size_wide_unit[txsz];
204 
205   if (row_off > 0) {  // Just need to check if enough pixels on the right.
206     if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207       // Special case: For 128x128 blocks, the transform unit whose
208       // top-right corner is at the center of the block does in fact have
209       // pixels available at its top-right corner.
210       if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211           col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212         return 1;
213       }
214       const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215       const int col_off_64 = col_off % plane_bw_unit_64;
216       return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217     }
218     return col_off + top_right_count_unit < plane_bw_unit;
219   } else {
220     // All top-right pixels are in the block above, which is already available.
221     if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222 
223     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225     const int sb_mi_size = mi_size_high[sb_size];
226     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228 
229     // Top row of superblock: so top-right pixels are in the top and/or
230     // top-right superblocks, both of which are already available.
231     if (blk_row_in_sb == 0) return 1;
232 
233     // Rightmost column of superblock (and not the top row): so top-right pixels
234     // fall in the right superblock, which is not available yet.
235     if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236       return 0;
237     }
238 
239     // General case (neither top row nor rightmost column): check if the
240     // top-right block is coded before the current block.
241     const int this_blk_index =
242         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243         blk_col_in_sb + 0;
244     const int idx1 = this_blk_index / 8;
245     const int idx2 = this_blk_index % 8;
246     const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247     return (has_tr_table[idx1] >> idx2) & 1;
248   }
249 }
250 
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254   84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
255   85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
256   17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
257   85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258   0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
259   0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
260   85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279   16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282   254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285   84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305   0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308   238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312 
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314   // 4X4
315   has_bl_4x4,
316   // 4X8,         8X4,         8X8
317   has_bl_4x8, has_bl_8x4, has_bl_8x8,
318   // 8X16,        16X8,        16X16
319   has_bl_8x16, has_bl_16x8, has_bl_16x16,
320   // 16X32,       32X16,       32X32
321   has_bl_16x32, has_bl_32x16, has_bl_32x32,
322   // 32X64,       64X32,       64X64
323   has_bl_32x64, has_bl_64x32, has_bl_64x64,
324   // 64x128,      128x64,      128x128
325   has_bl_64x128, has_bl_128x64, has_bl_128x128,
326   // 4x16,        16x4,        8x32
327   has_bl_4x16, has_bl_16x4, has_bl_8x32,
328   // 32x8,        16x64,       64x16
329   has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331 
332 static uint8_t has_bl_vert_8x8[32] = {
333   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337   254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341 
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352   // 4X4
353   NULL,
354   // 4X8,     8X4,         8X8
355   has_bl_4x8, NULL, has_bl_vert_8x8,
356   // 8X16,    16X8,        16X16
357   has_bl_8x16, NULL, has_bl_vert_16x16,
358   // 16X32,   32X16,       32X32
359   has_bl_16x32, NULL, has_bl_vert_32x32,
360   // 32X64,   64X32,       64X64
361   has_bl_32x64, NULL, has_bl_vert_64x64,
362   // 64x128,  128x64,      128x128
363   has_bl_64x128, NULL, has_bl_128x128
364 };
365 
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367                                        BLOCK_SIZE bsize) {
368   const uint8_t *ret = NULL;
369   // If this is a mixed vertical partition, look up bsize in orders_vert.
370   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371     assert(bsize < BLOCK_SIZES);
372     ret = has_bl_vert_tables[bsize];
373   } else {
374     ret = has_bl_tables[bsize];
375   }
376   assert(ret);
377   return ret;
378 }
379 
has_bottom_left(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
381                            int mi_col, int bottom_available, int left_available,
382                            PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383                            int col_off, int ss_x, int ss_y) {
384   if (!bottom_available || !left_available) return 0;
385 
386   // Special case for 128x* blocks, when col_off is half the block width.
387   // This is needed because 128x* superblocks are divided into 64x* blocks in
388   // raster order
389   if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390     const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391     const int col_off_64 = col_off % plane_bw_unit_64;
392     if (col_off_64 == 0) {
393       // We are at the left edge of top-right or bottom-right 64x* block.
394       const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395       const int row_off_64 = row_off % plane_bh_unit_64;
396       const int plane_bh_unit =
397           AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398       // Check if all bottom-left pixels are in the left 64x* block (which is
399       // already coded).
400       return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401     }
402   }
403 
404   if (col_off > 0) {
405     // Bottom-left pixels are in the bottom-left block, which is not available.
406     return 0;
407   } else {
408     const int bh_unit = mi_size_high[bsize];
409     const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410     const int bottom_left_count_unit = tx_size_high_unit[txsz];
411 
412     // All bottom-left pixels are in the left block, which is already available.
413     if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414 
415     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417     const int sb_mi_size = mi_size_high[sb_size];
418     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420 
421     // Leftmost column of superblock: so bottom-left pixels maybe in the left
422     // and/or bottom-left superblocks. But only the left superblock is
423     // available, so check if all required pixels fall in that superblock.
424     if (blk_col_in_sb == 0) {
425       const int blk_start_row_off =
426           blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427           ss_y;
428       const int row_off_in_sb = blk_start_row_off + row_off;
429       const int sb_height_unit = sb_mi_size >> ss_y;
430       return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431     }
432 
433     // Bottom row of superblock (and not the leftmost column): so bottom-left
434     // pixels fall in the bottom superblock, which is not available yet.
435     if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436 
437     // General case (neither leftmost column nor bottom row): check if the
438     // bottom-left block is coded before the current block.
439     const int this_blk_index =
440         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441         blk_col_in_sb + 0;
442     const int idx1 = this_blk_index / 8;
443     const int idx2 = this_blk_index % 8;
444     const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445     return (has_bl_table[idx1] >> idx2) & 1;
446   }
447 }
448 
449 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450                               const uint8_t *above, const uint8_t *left);
451 
452 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454 
455 #if CONFIG_AV1_HIGHBITDEPTH
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457                                    const uint16_t *above, const uint16_t *left,
458                                    int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461 #endif
462 
init_intra_predictors_internal(void)463 static void init_intra_predictors_internal(void) {
464   assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465 
466 #if CONFIG_REALTIME_ONLY
467 #define INIT_RECTANGULAR(p, type)             \
468   p[TX_4X8] = aom_##type##_predictor_4x8;     \
469   p[TX_8X4] = aom_##type##_predictor_8x4;     \
470   p[TX_8X16] = aom_##type##_predictor_8x16;   \
471   p[TX_16X8] = aom_##type##_predictor_16x8;   \
472   p[TX_16X32] = aom_##type##_predictor_16x32; \
473   p[TX_32X16] = aom_##type##_predictor_32x16; \
474   p[TX_32X64] = aom_##type##_predictor_32x64; \
475   p[TX_64X32] = aom_##type##_predictor_64x32;
476 #else
477 #define INIT_RECTANGULAR(p, type)             \
478   p[TX_4X8] = aom_##type##_predictor_4x8;     \
479   p[TX_8X4] = aom_##type##_predictor_8x4;     \
480   p[TX_8X16] = aom_##type##_predictor_8x16;   \
481   p[TX_16X8] = aom_##type##_predictor_16x8;   \
482   p[TX_16X32] = aom_##type##_predictor_16x32; \
483   p[TX_32X16] = aom_##type##_predictor_32x16; \
484   p[TX_32X64] = aom_##type##_predictor_32x64; \
485   p[TX_64X32] = aom_##type##_predictor_64x32; \
486   p[TX_4X16] = aom_##type##_predictor_4x16;   \
487   p[TX_16X4] = aom_##type##_predictor_16x4;   \
488   p[TX_8X32] = aom_##type##_predictor_8x32;   \
489   p[TX_32X8] = aom_##type##_predictor_32x8;   \
490   p[TX_16X64] = aom_##type##_predictor_16x64; \
491   p[TX_64X16] = aom_##type##_predictor_64x16;
492 #endif
493 
494 #define INIT_NO_4X4(p, type)                  \
495   p[TX_8X8] = aom_##type##_predictor_8x8;     \
496   p[TX_16X16] = aom_##type##_predictor_16x16; \
497   p[TX_32X32] = aom_##type##_predictor_32x32; \
498   p[TX_64X64] = aom_##type##_predictor_64x64; \
499   INIT_RECTANGULAR(p, type)
500 
501 #define INIT_ALL_SIZES(p, type)           \
502   p[TX_4X4] = aom_##type##_predictor_4x4; \
503   INIT_NO_4X4(p, type)
504 
505   INIT_ALL_SIZES(pred[V_PRED], v);
506   INIT_ALL_SIZES(pred[H_PRED], h);
507   INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
508   INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
509   INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
510   INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
511   INIT_ALL_SIZES(dc_pred[0][0], dc_128);
512   INIT_ALL_SIZES(dc_pred[0][1], dc_top);
513   INIT_ALL_SIZES(dc_pred[1][0], dc_left);
514   INIT_ALL_SIZES(dc_pred[1][1], dc);
515 #if CONFIG_AV1_HIGHBITDEPTH
516   INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
517   INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
518   INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
519   INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
520   INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
521   INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
522   INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
523   INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
524   INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
525   INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
526 #endif
527 #undef intra_pred_allsizes
528 }
529 
530 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)531 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
532                             const uint8_t *above, const uint8_t *left,
533                             int upsample_above, int dx, int dy) {
534   int r, c, x, base, shift, val;
535 
536   (void)left;
537   (void)dy;
538   assert(dy == 1);
539   assert(dx > 0);
540 
541   const int max_base_x = ((bw + bh) - 1) << upsample_above;
542   const int frac_bits = 6 - upsample_above;
543   const int base_inc = 1 << upsample_above;
544   x = dx;
545   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
546     base = x >> frac_bits;
547     shift = ((x << upsample_above) & 0x3F) >> 1;
548 
549     if (base >= max_base_x) {
550       for (int i = r; i < bh; ++i) {
551         memset(dst, above[max_base_x], bw * sizeof(dst[0]));
552         dst += stride;
553       }
554       return;
555     }
556 
557     for (c = 0; c < bw; ++c, base += base_inc) {
558       if (base < max_base_x) {
559         val = above[base] * (32 - shift) + above[base + 1] * shift;
560         dst[c] = ROUND_POWER_OF_TWO(val, 5);
561       } else {
562         dst[c] = above[max_base_x];
563       }
564     }
565   }
566 }
567 
568 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)569 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
570                             const uint8_t *above, const uint8_t *left,
571                             int upsample_above, int upsample_left, int dx,
572                             int dy) {
573   assert(dx > 0);
574   assert(dy > 0);
575 
576   const int min_base_x = -(1 << upsample_above);
577   const int min_base_y = -(1 << upsample_left);
578   (void)min_base_y;
579   const int frac_bits_x = 6 - upsample_above;
580   const int frac_bits_y = 6 - upsample_left;
581 
582   for (int r = 0; r < bh; ++r) {
583     for (int c = 0; c < bw; ++c) {
584       int val;
585       int y = r + 1;
586       int x = (c << 6) - y * dx;
587       const int base_x = x >> frac_bits_x;
588       if (base_x >= min_base_x) {
589         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
590         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
591         val = ROUND_POWER_OF_TWO(val, 5);
592       } else {
593         x = c + 1;
594         y = (r << 6) - x * dy;
595         const int base_y = y >> frac_bits_y;
596         assert(base_y >= min_base_y);
597         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
598         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
599         val = ROUND_POWER_OF_TWO(val, 5);
600       }
601       dst[c] = val;
602     }
603     dst += stride;
604   }
605 }
606 
607 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)608 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
609                             const uint8_t *above, const uint8_t *left,
610                             int upsample_left, int dx, int dy) {
611   int r, c, y, base, shift, val;
612 
613   (void)above;
614   (void)dx;
615 
616   assert(dx == 1);
617   assert(dy > 0);
618 
619   const int max_base_y = (bw + bh - 1) << upsample_left;
620   const int frac_bits = 6 - upsample_left;
621   const int base_inc = 1 << upsample_left;
622   y = dy;
623   for (c = 0; c < bw; ++c, y += dy) {
624     base = y >> frac_bits;
625     shift = ((y << upsample_left) & 0x3F) >> 1;
626 
627     for (r = 0; r < bh; ++r, base += base_inc) {
628       if (base < max_base_y) {
629         val = left[base] * (32 - shift) + left[base + 1] * shift;
630         dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
631       } else {
632         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
633         break;
634       }
635     }
636   }
637 }
638 
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)639 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
640                          const uint8_t *above, const uint8_t *left,
641                          int upsample_above, int upsample_left, int angle) {
642   const int dx = av1_get_dx(angle);
643   const int dy = av1_get_dy(angle);
644   const int bw = tx_size_wide[tx_size];
645   const int bh = tx_size_high[tx_size];
646   assert(angle > 0 && angle < 270);
647 
648   if (angle > 0 && angle < 90) {
649     av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
650                          dy);
651   } else if (angle > 90 && angle < 180) {
652     av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
653                          upsample_left, dx, dy);
654   } else if (angle > 180 && angle < 270) {
655     av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
656                          dy);
657   } else if (angle == 90) {
658     pred[V_PRED][tx_size](dst, stride, above, left);
659   } else if (angle == 180) {
660     pred[H_PRED][tx_size](dst, stride, above, left);
661   }
662 }
663 
664 #if CONFIG_AV1_HIGHBITDEPTH
665 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)666 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
667                                    int bh, const uint16_t *above,
668                                    const uint16_t *left, int upsample_above,
669                                    int dx, int dy, int bd) {
670   int r, c, x, base, shift, val;
671 
672   (void)left;
673   (void)dy;
674   (void)bd;
675   assert(dy == 1);
676   assert(dx > 0);
677 
678   const int max_base_x = ((bw + bh) - 1) << upsample_above;
679   const int frac_bits = 6 - upsample_above;
680   const int base_inc = 1 << upsample_above;
681   x = dx;
682   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
683     base = x >> frac_bits;
684     shift = ((x << upsample_above) & 0x3F) >> 1;
685 
686     if (base >= max_base_x) {
687       for (int i = r; i < bh; ++i) {
688         aom_memset16(dst, above[max_base_x], bw);
689         dst += stride;
690       }
691       return;
692     }
693 
694     for (c = 0; c < bw; ++c, base += base_inc) {
695       if (base < max_base_x) {
696         val = above[base] * (32 - shift) + above[base + 1] * shift;
697         dst[c] = ROUND_POWER_OF_TWO(val, 5);
698       } else {
699         dst[c] = above[max_base_x];
700       }
701     }
702   }
703 }
704 
705 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)706 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
707                                    int bh, const uint16_t *above,
708                                    const uint16_t *left, int upsample_above,
709                                    int upsample_left, int dx, int dy, int bd) {
710   (void)bd;
711   assert(dx > 0);
712   assert(dy > 0);
713 
714   const int min_base_x = -(1 << upsample_above);
715   const int min_base_y = -(1 << upsample_left);
716   (void)min_base_y;
717   const int frac_bits_x = 6 - upsample_above;
718   const int frac_bits_y = 6 - upsample_left;
719 
720   for (int r = 0; r < bh; ++r) {
721     for (int c = 0; c < bw; ++c) {
722       int val;
723       int y = r + 1;
724       int x = (c << 6) - y * dx;
725       const int base_x = x >> frac_bits_x;
726       if (base_x >= min_base_x) {
727         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
728         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
729         val = ROUND_POWER_OF_TWO(val, 5);
730       } else {
731         x = c + 1;
732         y = (r << 6) - x * dy;
733         const int base_y = y >> frac_bits_y;
734         assert(base_y >= min_base_y);
735         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
736         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
737         val = ROUND_POWER_OF_TWO(val, 5);
738       }
739       dst[c] = val;
740     }
741     dst += stride;
742   }
743 }
744 
745 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)746 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
747                                    int bh, const uint16_t *above,
748                                    const uint16_t *left, int upsample_left,
749                                    int dx, int dy, int bd) {
750   int r, c, y, base, shift, val;
751 
752   (void)above;
753   (void)dx;
754   (void)bd;
755   assert(dx == 1);
756   assert(dy > 0);
757 
758   const int max_base_y = (bw + bh - 1) << upsample_left;
759   const int frac_bits = 6 - upsample_left;
760   const int base_inc = 1 << upsample_left;
761   y = dy;
762   for (c = 0; c < bw; ++c, y += dy) {
763     base = y >> frac_bits;
764     shift = ((y << upsample_left) & 0x3F) >> 1;
765 
766     for (r = 0; r < bh; ++r, base += base_inc) {
767       if (base < max_base_y) {
768         val = left[base] * (32 - shift) + left[base + 1] * shift;
769         dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
770       } else {
771         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
772         break;
773       }
774     }
775   }
776 }
777 
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)778 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
779                                 TX_SIZE tx_size, const uint16_t *above,
780                                 const uint16_t *left, int upsample_above,
781                                 int upsample_left, int angle, int bd) {
782   const int dx = av1_get_dx(angle);
783   const int dy = av1_get_dy(angle);
784   const int bw = tx_size_wide[tx_size];
785   const int bh = tx_size_high[tx_size];
786   assert(angle > 0 && angle < 270);
787 
788   if (angle > 0 && angle < 90) {
789     av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
790                                 upsample_above, dx, dy, bd);
791   } else if (angle > 90 && angle < 180) {
792     av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
793                                 upsample_above, upsample_left, dx, dy, bd);
794   } else if (angle > 180 && angle < 270) {
795     av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
796                                 dx, dy, bd);
797   } else if (angle == 90) {
798     pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
799   } else if (angle == 180) {
800     pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
801   }
802 }
803 #endif  // CONFIG_AV1_HIGHBITDEPTH
804 
805 DECLARE_ALIGNED(16, const int8_t,
806                 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
807   {
808       { -6, 10, 0, 0, 0, 12, 0, 0 },
809       { -5, 2, 10, 0, 0, 9, 0, 0 },
810       { -3, 1, 1, 10, 0, 7, 0, 0 },
811       { -3, 1, 1, 2, 10, 5, 0, 0 },
812       { -4, 6, 0, 0, 0, 2, 12, 0 },
813       { -3, 2, 6, 0, 0, 2, 9, 0 },
814       { -3, 2, 2, 6, 0, 2, 7, 0 },
815       { -3, 1, 2, 2, 6, 3, 5, 0 },
816   },
817   {
818       { -10, 16, 0, 0, 0, 10, 0, 0 },
819       { -6, 0, 16, 0, 0, 6, 0, 0 },
820       { -4, 0, 0, 16, 0, 4, 0, 0 },
821       { -2, 0, 0, 0, 16, 2, 0, 0 },
822       { -10, 16, 0, 0, 0, 0, 10, 0 },
823       { -6, 0, 16, 0, 0, 0, 6, 0 },
824       { -4, 0, 0, 16, 0, 0, 4, 0 },
825       { -2, 0, 0, 0, 16, 0, 2, 0 },
826   },
827   {
828       { -8, 8, 0, 0, 0, 16, 0, 0 },
829       { -8, 0, 8, 0, 0, 16, 0, 0 },
830       { -8, 0, 0, 8, 0, 16, 0, 0 },
831       { -8, 0, 0, 0, 8, 16, 0, 0 },
832       { -4, 4, 0, 0, 0, 0, 16, 0 },
833       { -4, 0, 4, 0, 0, 0, 16, 0 },
834       { -4, 0, 0, 4, 0, 0, 16, 0 },
835       { -4, 0, 0, 0, 4, 0, 16, 0 },
836   },
837   {
838       { -2, 8, 0, 0, 0, 10, 0, 0 },
839       { -1, 3, 8, 0, 0, 6, 0, 0 },
840       { -1, 2, 3, 8, 0, 4, 0, 0 },
841       { 0, 1, 2, 3, 8, 2, 0, 0 },
842       { -1, 4, 0, 0, 0, 3, 10, 0 },
843       { -1, 3, 4, 0, 0, 4, 6, 0 },
844       { -1, 2, 3, 4, 0, 4, 4, 0 },
845       { -1, 2, 2, 3, 4, 3, 3, 0 },
846   },
847   {
848       { -12, 14, 0, 0, 0, 14, 0, 0 },
849       { -10, 0, 14, 0, 0, 12, 0, 0 },
850       { -9, 0, 0, 14, 0, 11, 0, 0 },
851       { -8, 0, 0, 0, 14, 10, 0, 0 },
852       { -10, 12, 0, 0, 0, 0, 14, 0 },
853       { -9, 1, 12, 0, 0, 0, 12, 0 },
854       { -8, 0, 0, 12, 0, 1, 11, 0 },
855       { -7, 0, 0, 1, 12, 1, 9, 0 },
856   },
857 };
858 
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)859 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
860                                   TX_SIZE tx_size, const uint8_t *above,
861                                   const uint8_t *left, int mode) {
862   int r, c;
863   uint8_t buffer[33][33];
864   const int bw = tx_size_wide[tx_size];
865   const int bh = tx_size_high[tx_size];
866 
867   assert(bw <= 32 && bh <= 32);
868 
869   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
870   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
871 
872   for (r = 1; r < bh + 1; r += 2)
873     for (c = 1; c < bw + 1; c += 4) {
874       const uint8_t p0 = buffer[r - 1][c - 1];
875       const uint8_t p1 = buffer[r - 1][c];
876       const uint8_t p2 = buffer[r - 1][c + 1];
877       const uint8_t p3 = buffer[r - 1][c + 2];
878       const uint8_t p4 = buffer[r - 1][c + 3];
879       const uint8_t p5 = buffer[r][c - 1];
880       const uint8_t p6 = buffer[r + 1][c - 1];
881       for (int k = 0; k < 8; ++k) {
882         int r_offset = k >> 2;
883         int c_offset = k & 0x03;
884         int pr = av1_filter_intra_taps[mode][k][0] * p0 +
885                  av1_filter_intra_taps[mode][k][1] * p1 +
886                  av1_filter_intra_taps[mode][k][2] * p2 +
887                  av1_filter_intra_taps[mode][k][3] * p3 +
888                  av1_filter_intra_taps[mode][k][4] * p4 +
889                  av1_filter_intra_taps[mode][k][5] * p5 +
890                  av1_filter_intra_taps[mode][k][6] * p6;
891         // Section 7.11.2.3 specifies the right-hand side of the assignment as
892         //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
893         // Since Clip1() clips a negative value to 0, it is safe to replace
894         // Round2Signed() with Round2().
895         buffer[r + r_offset][c + c_offset] =
896             clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
897       }
898     }
899 
900   for (r = 0; r < bh; ++r) {
901     memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
902     dst += stride;
903   }
904 }
905 
906 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)907 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
908                                           TX_SIZE tx_size,
909                                           const uint16_t *above,
910                                           const uint16_t *left, int mode,
911                                           int bd) {
912   int r, c;
913   uint16_t buffer[33][33];
914   const int bw = tx_size_wide[tx_size];
915   const int bh = tx_size_high[tx_size];
916 
917   assert(bw <= 32 && bh <= 32);
918 
919   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
920   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
921 
922   for (r = 1; r < bh + 1; r += 2)
923     for (c = 1; c < bw + 1; c += 4) {
924       const uint16_t p0 = buffer[r - 1][c - 1];
925       const uint16_t p1 = buffer[r - 1][c];
926       const uint16_t p2 = buffer[r - 1][c + 1];
927       const uint16_t p3 = buffer[r - 1][c + 2];
928       const uint16_t p4 = buffer[r - 1][c + 3];
929       const uint16_t p5 = buffer[r][c - 1];
930       const uint16_t p6 = buffer[r + 1][c - 1];
931       for (int k = 0; k < 8; ++k) {
932         int r_offset = k >> 2;
933         int c_offset = k & 0x03;
934         int pr = av1_filter_intra_taps[mode][k][0] * p0 +
935                  av1_filter_intra_taps[mode][k][1] * p1 +
936                  av1_filter_intra_taps[mode][k][2] * p2 +
937                  av1_filter_intra_taps[mode][k][3] * p3 +
938                  av1_filter_intra_taps[mode][k][4] * p4 +
939                  av1_filter_intra_taps[mode][k][5] * p5 +
940                  av1_filter_intra_taps[mode][k][6] * p6;
941         // Section 7.11.2.3 specifies the right-hand side of the assignment as
942         //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
943         // Since Clip1() clips a negative value to 0, it is safe to replace
944         // Round2Signed() with Round2().
945         buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
946             ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
947       }
948     }
949 
950   for (r = 0; r < bh; ++r) {
951     memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
952     dst += stride;
953   }
954 }
955 #endif  // CONFIG_AV1_HIGHBITDEPTH
956 
is_smooth(const MB_MODE_INFO * mbmi,int plane)957 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
958   if (plane == 0) {
959     const PREDICTION_MODE mode = mbmi->mode;
960     return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
961             mode == SMOOTH_H_PRED);
962   } else {
963     // uv_mode is not set for inter blocks, so need to explicitly
964     // detect that case.
965     if (is_inter_block(mbmi)) return 0;
966 
967     const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
968     return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
969             uv_mode == UV_SMOOTH_H_PRED);
970   }
971 }
972 
get_intra_edge_filter_type(const MACROBLOCKD * xd,int plane)973 static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
974   int ab_sm, le_sm;
975 
976   if (plane == 0) {
977     const MB_MODE_INFO *ab = xd->above_mbmi;
978     const MB_MODE_INFO *le = xd->left_mbmi;
979     ab_sm = ab ? is_smooth(ab, plane) : 0;
980     le_sm = le ? is_smooth(le, plane) : 0;
981   } else {
982     const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
983     const MB_MODE_INFO *le = xd->chroma_left_mbmi;
984     ab_sm = ab ? is_smooth(ab, plane) : 0;
985     le_sm = le ? is_smooth(le, plane) : 0;
986   }
987 
988   return (ab_sm || le_sm) ? 1 : 0;
989 }
990 
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)991 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
992   const int d = abs(delta);
993   int strength = 0;
994 
995   const int blk_wh = bs0 + bs1;
996   if (type == 0) {
997     if (blk_wh <= 8) {
998       if (d >= 56) strength = 1;
999     } else if (blk_wh <= 12) {
1000       if (d >= 40) strength = 1;
1001     } else if (blk_wh <= 16) {
1002       if (d >= 40) strength = 1;
1003     } else if (blk_wh <= 24) {
1004       if (d >= 8) strength = 1;
1005       if (d >= 16) strength = 2;
1006       if (d >= 32) strength = 3;
1007     } else if (blk_wh <= 32) {
1008       if (d >= 1) strength = 1;
1009       if (d >= 4) strength = 2;
1010       if (d >= 32) strength = 3;
1011     } else {
1012       if (d >= 1) strength = 3;
1013     }
1014   } else {
1015     if (blk_wh <= 8) {
1016       if (d >= 40) strength = 1;
1017       if (d >= 64) strength = 2;
1018     } else if (blk_wh <= 16) {
1019       if (d >= 20) strength = 1;
1020       if (d >= 48) strength = 2;
1021     } else if (blk_wh <= 24) {
1022       if (d >= 4) strength = 3;
1023     } else {
1024       if (d >= 1) strength = 3;
1025     }
1026   }
1027   return strength;
1028 }
1029 
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1030 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1031   if (!strength) return;
1032 
1033   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1034                                                          { 0, 5, 6, 5, 0 },
1035                                                          { 2, 4, 4, 4, 2 } };
1036   const int filt = strength - 1;
1037   uint8_t edge[129];
1038 
1039   memcpy(edge, p, sz * sizeof(*p));
1040   for (int i = 1; i < sz; i++) {
1041     int s = 0;
1042     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1043       int k = i - 2 + j;
1044       k = (k < 0) ? 0 : k;
1045       k = (k > sz - 1) ? sz - 1 : k;
1046       s += edge[k] * kernel[filt][j];
1047     }
1048     s = (s + 8) >> 4;
1049     p[i] = s;
1050   }
1051 }
1052 
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1053 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1054   const int kernel[3] = { 5, 6, 5 };
1055 
1056   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1057           (p_above[0] * kernel[2]);
1058   s = (s + 8) >> 4;
1059   p_above[-1] = s;
1060   p_left[-1] = s;
1061 }
1062 
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1063 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1064   if (!strength) return;
1065 
1066   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1067                                                          { 0, 5, 6, 5, 0 },
1068                                                          { 2, 4, 4, 4, 2 } };
1069   const int filt = strength - 1;
1070   uint16_t edge[129];
1071 
1072   memcpy(edge, p, sz * sizeof(*p));
1073   for (int i = 1; i < sz; i++) {
1074     int s = 0;
1075     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1076       int k = i - 2 + j;
1077       k = (k < 0) ? 0 : k;
1078       k = (k > sz - 1) ? sz - 1 : k;
1079       s += edge[k] * kernel[filt][j];
1080     }
1081     s = (s + 8) >> 4;
1082     p[i] = s;
1083   }
1084 }
1085 
1086 #if CONFIG_AV1_HIGHBITDEPTH
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1087 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1088   const int kernel[3] = { 5, 6, 5 };
1089 
1090   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1091           (p_above[0] * kernel[2]);
1092   s = (s + 8) >> 4;
1093   p_above[-1] = s;
1094   p_left[-1] = s;
1095 }
1096 #endif
1097 
av1_upsample_intra_edge_c(uint8_t * p,int sz)1098 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1099   // interpolate half-sample positions
1100   assert(sz <= MAX_UPSAMPLE_SZ);
1101 
1102   uint8_t in[MAX_UPSAMPLE_SZ + 3];
1103   // copy p[-1..(sz-1)] and extend first and last samples
1104   in[0] = p[-1];
1105   in[1] = p[-1];
1106   for (int i = 0; i < sz; i++) {
1107     in[i + 2] = p[i];
1108   }
1109   in[sz + 2] = p[sz - 1];
1110 
1111   // interpolate half-sample edge positions
1112   p[-2] = in[0];
1113   for (int i = 0; i < sz; i++) {
1114     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1115     s = clip_pixel((s + 8) >> 4);
1116     p[2 * i - 1] = s;
1117     p[2 * i] = in[i + 2];
1118   }
1119 }
1120 
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1121 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1122   // interpolate half-sample positions
1123   assert(sz <= MAX_UPSAMPLE_SZ);
1124 
1125   uint16_t in[MAX_UPSAMPLE_SZ + 3];
1126   // copy p[-1..(sz-1)] and extend first and last samples
1127   in[0] = p[-1];
1128   in[1] = p[-1];
1129   for (int i = 0; i < sz; i++) {
1130     in[i + 2] = p[i];
1131   }
1132   in[sz + 2] = p[sz - 1];
1133 
1134   // interpolate half-sample edge positions
1135   p[-2] = in[0];
1136   for (int i = 0; i < sz; i++) {
1137     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1138     s = (s + 8) >> 4;
1139     s = clip_pixel_highbd(s, bd);
1140     p[2 * i - 1] = s;
1141     p[2 * i] = in[i + 2];
1142   }
1143 }
1144 #if CONFIG_AV1_HIGHBITDEPTH
build_intra_predictors_high(const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type,int bit_depth)1145 static void build_intra_predictors_high(
1146     const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1147     PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1148     TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1149     int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1150     int bit_depth) {
1151   int i;
1152   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1153   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1154   DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1155   DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1156   uint16_t *const above_row = above_data + 16;
1157   uint16_t *const left_col = left_data + 16;
1158   const int txwpx = tx_size_wide[tx_size];
1159   const int txhpx = tx_size_high[tx_size];
1160   int need_left = extend_modes[mode] & NEED_LEFT;
1161   int need_above = extend_modes[mode] & NEED_ABOVE;
1162   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1163   const uint16_t *above_ref = ref - ref_stride;
1164   const uint16_t *left_ref = ref - 1;
1165   int p_angle = 0;
1166   const int is_dr_mode = av1_is_directional_mode(mode);
1167   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1168   int base = 128 << (bit_depth - 8);
1169   // The left_data, above_data buffers must be zeroed to fix some intermittent
1170   // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1171   // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1172   // seen to be the potential reason for this issue.
1173   aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1174   aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1175 
1176   // The default values if ref pixels are not available:
1177   // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1178   // base+1   A      B  ..     Y      Z
1179   // base+1   C      D  ..     W      X
1180   // base+1   E      F  ..     U      V
1181   // base+1   G      H  ..     S      T      T      T      T      T
1182 
1183   if (is_dr_mode) {
1184     p_angle = mode_to_angle_map[mode] + angle_delta;
1185     if (p_angle <= 90)
1186       need_above = 1, need_left = 0, need_above_left = 1;
1187     else if (p_angle < 180)
1188       need_above = 1, need_left = 1, need_above_left = 1;
1189     else
1190       need_above = 0, need_left = 1, need_above_left = 1;
1191   }
1192   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1193 
1194   assert(n_top_px >= 0);
1195   assert(n_topright_px >= 0);
1196   assert(n_left_px >= 0);
1197   assert(n_bottomleft_px >= 0);
1198 
1199   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1200     int val;
1201     if (need_left) {
1202       val = (n_top_px > 0) ? above_ref[0] : base + 1;
1203     } else {
1204       val = (n_left_px > 0) ? left_ref[0] : base - 1;
1205     }
1206     for (i = 0; i < txhpx; ++i) {
1207       aom_memset16(dst, val, txwpx);
1208       dst += dst_stride;
1209     }
1210     return;
1211   }
1212 
1213   // NEED_LEFT
1214   if (need_left) {
1215     int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1216     if (use_filter_intra) need_bottom = 0;
1217     if (is_dr_mode) need_bottom = p_angle > 180;
1218     const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1219     i = 0;
1220     if (n_left_px > 0) {
1221       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1222       if (need_bottom && n_bottomleft_px > 0) {
1223         assert(i == txhpx);
1224         for (; i < txhpx + n_bottomleft_px; i++)
1225           left_col[i] = left_ref[i * ref_stride];
1226       }
1227       if (i < num_left_pixels_needed)
1228         aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1229     } else if (n_top_px > 0) {
1230       aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1231     }
1232   }
1233 
1234   // NEED_ABOVE
1235   if (need_above) {
1236     int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1237     if (use_filter_intra) need_right = 0;
1238     if (is_dr_mode) need_right = p_angle < 90;
1239     const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1240     if (n_top_px > 0) {
1241       memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1242       i = n_top_px;
1243       if (need_right && n_topright_px > 0) {
1244         assert(n_top_px == txwpx);
1245         memcpy(above_row + txwpx, above_ref + txwpx,
1246                n_topright_px * sizeof(above_ref[0]));
1247         i += n_topright_px;
1248       }
1249       if (i < num_top_pixels_needed)
1250         aom_memset16(&above_row[i], above_row[i - 1],
1251                      num_top_pixels_needed - i);
1252     } else if (n_left_px > 0) {
1253       aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1254     }
1255   }
1256 
1257   if (need_above_left) {
1258     if (n_top_px > 0 && n_left_px > 0) {
1259       above_row[-1] = above_ref[-1];
1260     } else if (n_top_px > 0) {
1261       above_row[-1] = above_ref[0];
1262     } else if (n_left_px > 0) {
1263       above_row[-1] = left_ref[0];
1264     } else {
1265       above_row[-1] = base;
1266     }
1267     left_col[-1] = above_row[-1];
1268   }
1269 
1270   if (use_filter_intra) {
1271     highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1272                                   filter_intra_mode, bit_depth);
1273     return;
1274   }
1275 
1276   if (is_dr_mode) {
1277     int upsample_above = 0;
1278     int upsample_left = 0;
1279     if (!disable_edge_filter) {
1280       const int need_right = p_angle < 90;
1281       const int need_bottom = p_angle > 180;
1282       if (p_angle != 90 && p_angle != 180) {
1283         const int ab_le = need_above_left ? 1 : 0;
1284         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1285           filter_intra_edge_corner_high(above_row, left_col);
1286         }
1287         if (need_above && n_top_px > 0) {
1288           const int strength = intra_edge_filter_strength(
1289               txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1290           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1291           av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1292         }
1293         if (need_left && n_left_px > 0) {
1294           const int strength = intra_edge_filter_strength(
1295               txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1296           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1297           av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1298         }
1299       }
1300       upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1301                                                    intra_edge_filter_type);
1302       if (need_above && upsample_above) {
1303         const int n_px = txwpx + (need_right ? txhpx : 0);
1304         av1_upsample_intra_edge_high(above_row, n_px, bit_depth);
1305       }
1306       upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1307                                                   intra_edge_filter_type);
1308       if (need_left && upsample_left) {
1309         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1310         av1_upsample_intra_edge_high(left_col, n_px, bit_depth);
1311       }
1312     }
1313     highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1314                         upsample_above, upsample_left, p_angle, bit_depth);
1315     return;
1316   }
1317 
1318   // predict
1319   if (mode == DC_PRED) {
1320     dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1321         dst, dst_stride, above_row, left_col, bit_depth);
1322   } else {
1323     pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1324   }
1325 }
1326 #endif  // CONFIG_AV1_HIGHBITDEPTH
1327 
build_intra_predictors(const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type)1328 static void build_intra_predictors(
1329     const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1330     PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1331     TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1332     int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1333   int i;
1334   const uint8_t *above_ref = ref - ref_stride;
1335   const uint8_t *left_ref = ref - 1;
1336   DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1337   DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1338   uint8_t *const above_row = above_data + 16;
1339   uint8_t *const left_col = left_data + 16;
1340   const int txwpx = tx_size_wide[tx_size];
1341   const int txhpx = tx_size_high[tx_size];
1342   int need_left = extend_modes[mode] & NEED_LEFT;
1343   int need_above = extend_modes[mode] & NEED_ABOVE;
1344   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1345   int p_angle = 0;
1346   const int is_dr_mode = av1_is_directional_mode(mode);
1347   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1348   // The left_data, above_data buffers must be zeroed to fix some intermittent
1349   // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1350   // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1351   // be the potential reason for this issue.
1352   memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1353   memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1354 
1355   // The default values if ref pixels are not available:
1356   // 128 127 127 .. 127 127 127 127 127 127
1357   // 129  A   B  ..  Y   Z
1358   // 129  C   D  ..  W   X
1359   // 129  E   F  ..  U   V
1360   // 129  G   H  ..  S   T   T   T   T   T
1361   // ..
1362 
1363   if (is_dr_mode) {
1364     p_angle = mode_to_angle_map[mode] + angle_delta;
1365     if (p_angle <= 90)
1366       need_above = 1, need_left = 0, need_above_left = 1;
1367     else if (p_angle < 180)
1368       need_above = 1, need_left = 1, need_above_left = 1;
1369     else
1370       need_above = 0, need_left = 1, need_above_left = 1;
1371   }
1372   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1373 
1374   assert(n_top_px >= 0);
1375   assert(n_topright_px >= 0);
1376   assert(n_left_px >= 0);
1377   assert(n_bottomleft_px >= 0);
1378 
1379   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1380     int val;
1381     if (need_left) {
1382       val = (n_top_px > 0) ? above_ref[0] : 129;
1383     } else {
1384       val = (n_left_px > 0) ? left_ref[0] : 127;
1385     }
1386     for (i = 0; i < txhpx; ++i) {
1387       memset(dst, val, txwpx);
1388       dst += dst_stride;
1389     }
1390     return;
1391   }
1392 
1393   // NEED_LEFT
1394   if (need_left) {
1395     int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1396     if (use_filter_intra) need_bottom = 0;
1397     if (is_dr_mode) need_bottom = p_angle > 180;
1398     const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1399     i = 0;
1400     if (n_left_px > 0) {
1401       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1402       if (need_bottom && n_bottomleft_px > 0) {
1403         assert(i == txhpx);
1404         for (; i < txhpx + n_bottomleft_px; i++)
1405           left_col[i] = left_ref[i * ref_stride];
1406       }
1407       if (i < num_left_pixels_needed)
1408         memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1409     } else if (n_top_px > 0) {
1410       memset(left_col, above_ref[0], num_left_pixels_needed);
1411     }
1412   }
1413 
1414   // NEED_ABOVE
1415   if (need_above) {
1416     int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1417     if (use_filter_intra) need_right = 0;
1418     if (is_dr_mode) need_right = p_angle < 90;
1419     const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1420     if (n_top_px > 0) {
1421       memcpy(above_row, above_ref, n_top_px);
1422       i = n_top_px;
1423       if (need_right && n_topright_px > 0) {
1424         assert(n_top_px == txwpx);
1425         memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1426         i += n_topright_px;
1427       }
1428       if (i < num_top_pixels_needed)
1429         memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1430     } else if (n_left_px > 0) {
1431       memset(above_row, left_ref[0], num_top_pixels_needed);
1432     }
1433   }
1434 
1435   if (need_above_left) {
1436     if (n_top_px > 0 && n_left_px > 0) {
1437       above_row[-1] = above_ref[-1];
1438     } else if (n_top_px > 0) {
1439       above_row[-1] = above_ref[0];
1440     } else if (n_left_px > 0) {
1441       above_row[-1] = left_ref[0];
1442     } else {
1443       above_row[-1] = 128;
1444     }
1445     left_col[-1] = above_row[-1];
1446   }
1447 
1448   if (use_filter_intra) {
1449     av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1450                                filter_intra_mode);
1451     return;
1452   }
1453 
1454   if (is_dr_mode) {
1455     int upsample_above = 0;
1456     int upsample_left = 0;
1457     if (!disable_edge_filter) {
1458       const int need_right = p_angle < 90;
1459       const int need_bottom = p_angle > 180;
1460       if (p_angle != 90 && p_angle != 180) {
1461         const int ab_le = need_above_left ? 1 : 0;
1462         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1463           filter_intra_edge_corner(above_row, left_col);
1464         }
1465         if (need_above && n_top_px > 0) {
1466           const int strength = intra_edge_filter_strength(
1467               txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1468           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1469           av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1470         }
1471         if (need_left && n_left_px > 0) {
1472           const int strength = intra_edge_filter_strength(
1473               txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1474           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1475           av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1476         }
1477       }
1478       upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1479                                                    intra_edge_filter_type);
1480       if (need_above && upsample_above) {
1481         const int n_px = txwpx + (need_right ? txhpx : 0);
1482         av1_upsample_intra_edge(above_row, n_px);
1483       }
1484       upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1485                                                   intra_edge_filter_type);
1486       if (need_left && upsample_left) {
1487         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1488         av1_upsample_intra_edge(left_col, n_px);
1489       }
1490     }
1491     dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1492                  upsample_left, p_angle);
1493     return;
1494   }
1495 
1496   // predict
1497   if (mode == DC_PRED) {
1498     dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1499                                                   left_col);
1500   } else {
1501     pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1502   }
1503 }
1504 
scale_chroma_bsize(BLOCK_SIZE bsize,int subsampling_x,int subsampling_y)1505 static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1506                                             int subsampling_y) {
1507   assert(subsampling_x >= 0 && subsampling_x < 2);
1508   assert(subsampling_y >= 0 && subsampling_y < 2);
1509   BLOCK_SIZE bs = bsize;
1510   switch (bsize) {
1511     case BLOCK_4X4:
1512       if (subsampling_x == 1 && subsampling_y == 1)
1513         bs = BLOCK_8X8;
1514       else if (subsampling_x == 1)
1515         bs = BLOCK_8X4;
1516       else if (subsampling_y == 1)
1517         bs = BLOCK_4X8;
1518       break;
1519     case BLOCK_4X8:
1520       if (subsampling_x == 1 && subsampling_y == 1)
1521         bs = BLOCK_8X8;
1522       else if (subsampling_x == 1)
1523         bs = BLOCK_8X8;
1524       else if (subsampling_y == 1)
1525         bs = BLOCK_4X8;
1526       break;
1527     case BLOCK_8X4:
1528       if (subsampling_x == 1 && subsampling_y == 1)
1529         bs = BLOCK_8X8;
1530       else if (subsampling_x == 1)
1531         bs = BLOCK_8X4;
1532       else if (subsampling_y == 1)
1533         bs = BLOCK_8X8;
1534       break;
1535     case BLOCK_4X16:
1536       if (subsampling_x == 1 && subsampling_y == 1)
1537         bs = BLOCK_8X16;
1538       else if (subsampling_x == 1)
1539         bs = BLOCK_8X16;
1540       else if (subsampling_y == 1)
1541         bs = BLOCK_4X16;
1542       break;
1543     case BLOCK_16X4:
1544       if (subsampling_x == 1 && subsampling_y == 1)
1545         bs = BLOCK_16X8;
1546       else if (subsampling_x == 1)
1547         bs = BLOCK_16X4;
1548       else if (subsampling_y == 1)
1549         bs = BLOCK_16X8;
1550       break;
1551     default: break;
1552   }
1553   return bs;
1554 }
1555 
av1_predict_intra_block(const MACROBLOCKD * xd,BLOCK_SIZE sb_size,int enable_intra_edge_filter,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1556 void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1557                              int enable_intra_edge_filter, int wpx, int hpx,
1558                              TX_SIZE tx_size, PREDICTION_MODE mode,
1559                              int angle_delta, int use_palette,
1560                              FILTER_INTRA_MODE filter_intra_mode,
1561                              const uint8_t *ref, int ref_stride, uint8_t *dst,
1562                              int dst_stride, int col_off, int row_off,
1563                              int plane) {
1564   const MB_MODE_INFO *const mbmi = xd->mi[0];
1565   const int txwpx = tx_size_wide[tx_size];
1566   const int txhpx = tx_size_high[tx_size];
1567   const int x = col_off << MI_SIZE_LOG2;
1568   const int y = row_off << MI_SIZE_LOG2;
1569 
1570   if (use_palette) {
1571     int r, c;
1572     const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1573                                xd->color_index_map_offset[plane != 0];
1574     const uint16_t *const palette =
1575         mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1576     if (is_cur_buf_hbd(xd)) {
1577       uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1578       for (r = 0; r < txhpx; ++r) {
1579         for (c = 0; c < txwpx; ++c) {
1580           dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1581         }
1582       }
1583     } else {
1584       for (r = 0; r < txhpx; ++r) {
1585         for (c = 0; c < txwpx; ++c) {
1586           dst[r * dst_stride + c] =
1587               (uint8_t)palette[map[(r + y) * wpx + c + x]];
1588         }
1589       }
1590     }
1591     return;
1592   }
1593 
1594   const struct macroblockd_plane *const pd = &xd->plane[plane];
1595   const int txw = tx_size_wide_unit[tx_size];
1596   const int txh = tx_size_high_unit[tx_size];
1597   const int ss_x = pd->subsampling_x;
1598   const int ss_y = pd->subsampling_y;
1599   const int have_top =
1600       row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1601   const int have_left =
1602       col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1603   const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1604   const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1605 
1606   // Distance between the right edge of this prediction block to
1607   // the frame right edge
1608   const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1609   // Distance between the bottom edge of this prediction block to
1610   // the frame bottom edge
1611   const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1612   const int right_available =
1613       mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1614   const int bottom_available =
1615       (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1616 
1617   const PARTITION_TYPE partition = mbmi->partition;
1618 
1619   BLOCK_SIZE bsize = mbmi->bsize;
1620   // force 4x4 chroma component block size.
1621   if (ss_x || ss_y) {
1622     bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1623   }
1624 
1625   const int have_top_right =
1626       has_top_right(sb_size, bsize, mi_row, mi_col, have_top, right_available,
1627                     partition, tx_size, row_off, col_off, ss_x, ss_y);
1628   const int have_bottom_left = has_bottom_left(
1629       sb_size, bsize, mi_row, mi_col, bottom_available, have_left, partition,
1630       tx_size, row_off, col_off, ss_x, ss_y);
1631 
1632   const int disable_edge_filter = !enable_intra_edge_filter;
1633   const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1634 #if CONFIG_AV1_HIGHBITDEPTH
1635   if (is_cur_buf_hbd(xd)) {
1636     build_intra_predictors_high(
1637         ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1638         tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1639         have_top_right ? AOMMIN(txwpx, xr) : 0,
1640         have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1641         have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type,
1642         xd->bd);
1643     return;
1644   }
1645 #endif
1646   build_intra_predictors(
1647       ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1648       tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1649       have_top_right ? AOMMIN(txwpx, xr) : 0,
1650       have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1651       have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type);
1652 }
1653 
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1654 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1655                                     int plane, int blk_col, int blk_row,
1656                                     TX_SIZE tx_size) {
1657   const MB_MODE_INFO *const mbmi = xd->mi[0];
1658   struct macroblockd_plane *const pd = &xd->plane[plane];
1659   const int dst_stride = pd->dst.stride;
1660   uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1661   const PREDICTION_MODE mode =
1662       (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1663   const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1664   const FILTER_INTRA_MODE filter_intra_mode =
1665       (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1666           ? mbmi->filter_intra_mode_info.filter_intra_mode
1667           : FILTER_INTRA_MODES;
1668   const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1669   const SequenceHeader *seq_params = cm->seq_params;
1670 
1671   if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1672 #if CONFIG_DEBUG
1673     assert(is_cfl_allowed(xd));
1674     const BLOCK_SIZE plane_bsize =
1675         get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1676     (void)plane_bsize;
1677     assert(plane_bsize < BLOCK_SIZES_ALL);
1678     if (!xd->lossless[mbmi->segment_id]) {
1679       assert(blk_col == 0);
1680       assert(blk_row == 0);
1681       assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1682       assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1683     }
1684 #endif
1685     CFL_CTX *const cfl = &xd->cfl;
1686     CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1687     if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1688       av1_predict_intra_block(xd, seq_params->sb_size,
1689                               seq_params->enable_intra_edge_filter, pd->width,
1690                               pd->height, tx_size, mode, angle_delta,
1691                               use_palette, filter_intra_mode, dst, dst_stride,
1692                               dst, dst_stride, blk_col, blk_row, plane);
1693       if (cfl->use_dc_pred_cache) {
1694         cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1695         cfl->dc_pred_is_cached[pred_plane] = 1;
1696       }
1697     } else {
1698       cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1699     }
1700     cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1701     return;
1702   }
1703   av1_predict_intra_block(
1704       xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1705       pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1706       dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1707 }
1708 
av1_init_intra_predictors(void)1709 void av1_init_intra_predictors(void) {
1710   aom_once(init_intra_predictors_internal);
1711 }
1712