1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17 
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "av1/common/av1_common_int.h"
23 #include "av1/common/cfl.h"
24 #include "av1/common/reconintra.h"
25 
26 enum {
27   NEED_LEFT = 1 << 1,
28   NEED_ABOVE = 1 << 2,
29   NEED_ABOVERIGHT = 1 << 3,
30   NEED_ABOVELEFT = 1 << 4,
31   NEED_BOTTOMLEFT = 1 << 5,
32 };
33 
34 #define INTRA_EDGE_FILT 3
35 #define INTRA_EDGE_TAPS 5
36 #define MAX_UPSAMPLE_SZ 16
37 #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
38 
39 static const uint8_t extend_modes[INTRA_MODES] = {
40   NEED_ABOVE | NEED_LEFT,                   // DC
41   NEED_ABOVE,                               // V
42   NEED_LEFT,                                // H
43   NEED_ABOVE | NEED_ABOVERIGHT,             // D45
44   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
45   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
46   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
47   NEED_LEFT | NEED_BOTTOMLEFT,              // D203
48   NEED_ABOVE | NEED_ABOVERIGHT,             // D67
49   NEED_LEFT | NEED_ABOVE,                   // SMOOTH
50   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
51   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
52   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
53 };
54 
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 //       . . . .
61 //       . . . .
62 //       . . o .
63 //       . . . .
64 static uint8_t has_tr_4x4[128] = {
65   255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69   255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75   255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76   119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77   127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78   119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79   119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82   255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84   255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88   255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89   255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92   255, 255, 119, 119, 127, 127, 119, 119,
93   255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96   255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99   255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111   255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112   127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113   127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116   255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117   127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120   255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123   15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127 
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129   // 4X4
130   has_tr_4x4,
131   // 4X8,       8X4,            8X8
132   has_tr_4x8, has_tr_8x4, has_tr_8x8,
133   // 8X16,      16X8,           16X16
134   has_tr_8x16, has_tr_16x8, has_tr_16x16,
135   // 16X32,     32X16,          32X32
136   has_tr_16x32, has_tr_32x16, has_tr_32x32,
137   // 32X64,     64X32,          64X64
138   has_tr_32x64, has_tr_64x32, has_tr_64x64,
139   // 64x128,    128x64,         128x128
140   has_tr_64x128, has_tr_128x64, has_tr_128x128,
141   // 4x16,      16x4,            8x32
142   has_tr_4x16, has_tr_16x4, has_tr_8x32,
143   // 32x8,      16x64,           64x16
144   has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146 
147 static uint8_t has_tr_vert_8x8[32] = {
148   255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149   255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152   255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156 
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167   // 4X4
168   NULL,
169   // 4X8,      8X4,         8X8
170   has_tr_4x8, NULL, has_tr_vert_8x8,
171   // 8X16,     16X8,        16X16
172   has_tr_8x16, NULL, has_tr_vert_16x16,
173   // 16X32,    32X16,       32X32
174   has_tr_16x32, NULL, has_tr_vert_32x32,
175   // 32X64,    64X32,       64X64
176   has_tr_32x64, NULL, has_tr_vert_64x64,
177   // 64x128,   128x64,      128x128
178   has_tr_64x128, NULL, has_tr_128x128
179 };
180 
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182                                        BLOCK_SIZE bsize) {
183   const uint8_t *ret = NULL;
184   // If this is a mixed vertical partition, look up bsize in orders_vert.
185   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186     assert(bsize < BLOCK_SIZES);
187     ret = has_tr_vert_tables[bsize];
188   } else {
189     ret = has_tr_tables[bsize];
190   }
191   assert(ret);
192   return ret;
193 }
194 
has_top_right(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
196                          int mi_col, int top_available, int right_available,
197                          PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198                          int col_off, int ss_x, int ss_y) {
199   if (!top_available || !right_available) return 0;
200 
201   const int bw_unit = mi_size_wide[bsize];
202   const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203   const int top_right_count_unit = tx_size_wide_unit[txsz];
204 
205   if (row_off > 0) {  // Just need to check if enough pixels on the right.
206     if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207       // Special case: For 128x128 blocks, the transform unit whose
208       // top-right corner is at the center of the block does in fact have
209       // pixels available at its top-right corner.
210       if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211           col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212         return 1;
213       }
214       const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215       const int col_off_64 = col_off % plane_bw_unit_64;
216       return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217     }
218     return col_off + top_right_count_unit < plane_bw_unit;
219   } else {
220     // All top-right pixels are in the block above, which is already available.
221     if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222 
223     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225     const int sb_mi_size = mi_size_high[sb_size];
226     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228 
229     // Top row of superblock: so top-right pixels are in the top and/or
230     // top-right superblocks, both of which are already available.
231     if (blk_row_in_sb == 0) return 1;
232 
233     // Rightmost column of superblock (and not the top row): so top-right pixels
234     // fall in the right superblock, which is not available yet.
235     if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236       return 0;
237     }
238 
239     // General case (neither top row nor rightmost column): check if the
240     // top-right block is coded before the current block.
241     const int this_blk_index =
242         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243         blk_col_in_sb + 0;
244     const int idx1 = this_blk_index / 8;
245     const int idx2 = this_blk_index % 8;
246     const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247     return (has_tr_table[idx1] >> idx2) & 1;
248   }
249 }
250 
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254   84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
255   85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
256   17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
257   85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258   0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
259   0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
260   85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279   16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282   254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285   84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305   0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308   238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312 
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314   // 4X4
315   has_bl_4x4,
316   // 4X8,         8X4,         8X8
317   has_bl_4x8, has_bl_8x4, has_bl_8x8,
318   // 8X16,        16X8,        16X16
319   has_bl_8x16, has_bl_16x8, has_bl_16x16,
320   // 16X32,       32X16,       32X32
321   has_bl_16x32, has_bl_32x16, has_bl_32x32,
322   // 32X64,       64X32,       64X64
323   has_bl_32x64, has_bl_64x32, has_bl_64x64,
324   // 64x128,      128x64,      128x128
325   has_bl_64x128, has_bl_128x64, has_bl_128x128,
326   // 4x16,        16x4,        8x32
327   has_bl_4x16, has_bl_16x4, has_bl_8x32,
328   // 32x8,        16x64,       64x16
329   has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331 
332 static uint8_t has_bl_vert_8x8[32] = {
333   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337   254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341 
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352   // 4X4
353   NULL,
354   // 4X8,     8X4,         8X8
355   has_bl_4x8, NULL, has_bl_vert_8x8,
356   // 8X16,    16X8,        16X16
357   has_bl_8x16, NULL, has_bl_vert_16x16,
358   // 16X32,   32X16,       32X32
359   has_bl_16x32, NULL, has_bl_vert_32x32,
360   // 32X64,   64X32,       64X64
361   has_bl_32x64, NULL, has_bl_vert_64x64,
362   // 64x128,  128x64,      128x128
363   has_bl_64x128, NULL, has_bl_128x128
364 };
365 
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367                                        BLOCK_SIZE bsize) {
368   const uint8_t *ret = NULL;
369   // If this is a mixed vertical partition, look up bsize in orders_vert.
370   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371     assert(bsize < BLOCK_SIZES);
372     ret = has_bl_vert_tables[bsize];
373   } else {
374     ret = has_bl_tables[bsize];
375   }
376   assert(ret);
377   return ret;
378 }
379 
has_bottom_left(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
381                            int mi_col, int bottom_available, int left_available,
382                            PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383                            int col_off, int ss_x, int ss_y) {
384   if (!bottom_available || !left_available) return 0;
385 
386   // Special case for 128x* blocks, when col_off is half the block width.
387   // This is needed because 128x* superblocks are divided into 64x* blocks in
388   // raster order
389   if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390     const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391     const int col_off_64 = col_off % plane_bw_unit_64;
392     if (col_off_64 == 0) {
393       // We are at the left edge of top-right or bottom-right 64x* block.
394       const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395       const int row_off_64 = row_off % plane_bh_unit_64;
396       const int plane_bh_unit =
397           AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398       // Check if all bottom-left pixels are in the left 64x* block (which is
399       // already coded).
400       return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401     }
402   }
403 
404   if (col_off > 0) {
405     // Bottom-left pixels are in the bottom-left block, which is not available.
406     return 0;
407   } else {
408     const int bh_unit = mi_size_high[bsize];
409     const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410     const int bottom_left_count_unit = tx_size_high_unit[txsz];
411 
412     // All bottom-left pixels are in the left block, which is already available.
413     if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414 
415     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417     const int sb_mi_size = mi_size_high[sb_size];
418     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420 
421     // Leftmost column of superblock: so bottom-left pixels maybe in the left
422     // and/or bottom-left superblocks. But only the left superblock is
423     // available, so check if all required pixels fall in that superblock.
424     if (blk_col_in_sb == 0) {
425       const int blk_start_row_off =
426           blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427           ss_y;
428       const int row_off_in_sb = blk_start_row_off + row_off;
429       const int sb_height_unit = sb_mi_size >> ss_y;
430       return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431     }
432 
433     // Bottom row of superblock (and not the leftmost column): so bottom-left
434     // pixels fall in the bottom superblock, which is not available yet.
435     if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436 
437     // General case (neither leftmost column nor bottom row): check if the
438     // bottom-left block is coded before the current block.
439     const int this_blk_index =
440         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441         blk_col_in_sb + 0;
442     const int idx1 = this_blk_index / 8;
443     const int idx2 = this_blk_index % 8;
444     const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445     return (has_bl_table[idx1] >> idx2) & 1;
446   }
447 }
448 
449 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450                               const uint8_t *above, const uint8_t *left);
451 
452 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454 
455 #if CONFIG_AV1_HIGHBITDEPTH
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457                                    const uint16_t *above, const uint16_t *left,
458                                    int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461 #endif
462 
init_intra_predictors_internal(void)463 static void init_intra_predictors_internal(void) {
464   assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465 
466 #define INIT_RECTANGULAR(p, type)             \
467   p[TX_4X8] = aom_##type##_predictor_4x8;     \
468   p[TX_8X4] = aom_##type##_predictor_8x4;     \
469   p[TX_8X16] = aom_##type##_predictor_8x16;   \
470   p[TX_16X8] = aom_##type##_predictor_16x8;   \
471   p[TX_16X32] = aom_##type##_predictor_16x32; \
472   p[TX_32X16] = aom_##type##_predictor_32x16; \
473   p[TX_32X64] = aom_##type##_predictor_32x64; \
474   p[TX_64X32] = aom_##type##_predictor_64x32; \
475   p[TX_4X16] = aom_##type##_predictor_4x16;   \
476   p[TX_16X4] = aom_##type##_predictor_16x4;   \
477   p[TX_8X32] = aom_##type##_predictor_8x32;   \
478   p[TX_32X8] = aom_##type##_predictor_32x8;   \
479   p[TX_16X64] = aom_##type##_predictor_16x64; \
480   p[TX_64X16] = aom_##type##_predictor_64x16;
481 
482 #define INIT_NO_4X4(p, type)                  \
483   p[TX_8X8] = aom_##type##_predictor_8x8;     \
484   p[TX_16X16] = aom_##type##_predictor_16x16; \
485   p[TX_32X32] = aom_##type##_predictor_32x32; \
486   p[TX_64X64] = aom_##type##_predictor_64x64; \
487   INIT_RECTANGULAR(p, type)
488 
489 #define INIT_ALL_SIZES(p, type)           \
490   p[TX_4X4] = aom_##type##_predictor_4x4; \
491   INIT_NO_4X4(p, type)
492 
493   INIT_ALL_SIZES(pred[V_PRED], v)
494   INIT_ALL_SIZES(pred[H_PRED], h)
495   INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
496   INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
497   INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
498   INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
499   INIT_ALL_SIZES(dc_pred[0][0], dc_128)
500   INIT_ALL_SIZES(dc_pred[0][1], dc_top)
501   INIT_ALL_SIZES(dc_pred[1][0], dc_left)
502   INIT_ALL_SIZES(dc_pred[1][1], dc)
503 #if CONFIG_AV1_HIGHBITDEPTH
504   INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
505   INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
506   INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
507   INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
508   INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
509   INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
510   INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
511   INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
512   INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
513   INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
514 #endif
515 #undef intra_pred_allsizes
516 }
517 
518 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)519 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
520                             const uint8_t *above, const uint8_t *left,
521                             int upsample_above, int dx, int dy) {
522   int r, c, x, base, shift, val;
523 
524   (void)left;
525   (void)dy;
526   assert(dy == 1);
527   assert(dx > 0);
528 
529   const int max_base_x = ((bw + bh) - 1) << upsample_above;
530   const int frac_bits = 6 - upsample_above;
531   const int base_inc = 1 << upsample_above;
532   x = dx;
533   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
534     base = x >> frac_bits;
535     shift = ((x << upsample_above) & 0x3F) >> 1;
536 
537     if (base >= max_base_x) {
538       for (int i = r; i < bh; ++i) {
539         memset(dst, above[max_base_x], bw * sizeof(dst[0]));
540         dst += stride;
541       }
542       return;
543     }
544 
545     for (c = 0; c < bw; ++c, base += base_inc) {
546       if (base < max_base_x) {
547         val = above[base] * (32 - shift) + above[base + 1] * shift;
548         dst[c] = ROUND_POWER_OF_TWO(val, 5);
549       } else {
550         dst[c] = above[max_base_x];
551       }
552     }
553   }
554 }
555 
556 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)557 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
558                             const uint8_t *above, const uint8_t *left,
559                             int upsample_above, int upsample_left, int dx,
560                             int dy) {
561   assert(dx > 0);
562   assert(dy > 0);
563 
564   const int min_base_x = -(1 << upsample_above);
565   const int min_base_y = -(1 << upsample_left);
566   (void)min_base_y;
567   const int frac_bits_x = 6 - upsample_above;
568   const int frac_bits_y = 6 - upsample_left;
569 
570   for (int r = 0; r < bh; ++r) {
571     for (int c = 0; c < bw; ++c) {
572       int val;
573       int y = r + 1;
574       int x = (c << 6) - y * dx;
575       const int base_x = x >> frac_bits_x;
576       if (base_x >= min_base_x) {
577         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
578         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
579         val = ROUND_POWER_OF_TWO(val, 5);
580       } else {
581         x = c + 1;
582         y = (r << 6) - x * dy;
583         const int base_y = y >> frac_bits_y;
584         assert(base_y >= min_base_y);
585         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
586         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
587         val = ROUND_POWER_OF_TWO(val, 5);
588       }
589       dst[c] = val;
590     }
591     dst += stride;
592   }
593 }
594 
595 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)596 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
597                             const uint8_t *above, const uint8_t *left,
598                             int upsample_left, int dx, int dy) {
599   int r, c, y, base, shift, val;
600 
601   (void)above;
602   (void)dx;
603 
604   assert(dx == 1);
605   assert(dy > 0);
606 
607   const int max_base_y = (bw + bh - 1) << upsample_left;
608   const int frac_bits = 6 - upsample_left;
609   const int base_inc = 1 << upsample_left;
610   y = dy;
611   for (c = 0; c < bw; ++c, y += dy) {
612     base = y >> frac_bits;
613     shift = ((y << upsample_left) & 0x3F) >> 1;
614 
615     for (r = 0; r < bh; ++r, base += base_inc) {
616       if (base < max_base_y) {
617         val = left[base] * (32 - shift) + left[base + 1] * shift;
618         dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
619       } else {
620         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
621         break;
622       }
623     }
624   }
625 }
626 
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)627 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
628                          const uint8_t *above, const uint8_t *left,
629                          int upsample_above, int upsample_left, int angle) {
630   const int dx = av1_get_dx(angle);
631   const int dy = av1_get_dy(angle);
632   const int bw = tx_size_wide[tx_size];
633   const int bh = tx_size_high[tx_size];
634   assert(angle > 0 && angle < 270);
635 
636   if (angle > 0 && angle < 90) {
637     av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
638                          dy);
639   } else if (angle > 90 && angle < 180) {
640     av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
641                          upsample_left, dx, dy);
642   } else if (angle > 180 && angle < 270) {
643     av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
644                          dy);
645   } else if (angle == 90) {
646     pred[V_PRED][tx_size](dst, stride, above, left);
647   } else if (angle == 180) {
648     pred[H_PRED][tx_size](dst, stride, above, left);
649   }
650 }
651 
652 #if CONFIG_AV1_HIGHBITDEPTH
653 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)654 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
655                                    int bh, const uint16_t *above,
656                                    const uint16_t *left, int upsample_above,
657                                    int dx, int dy, int bd) {
658   int r, c, x, base, shift, val;
659 
660   (void)left;
661   (void)dy;
662   (void)bd;
663   assert(dy == 1);
664   assert(dx > 0);
665 
666   const int max_base_x = ((bw + bh) - 1) << upsample_above;
667   const int frac_bits = 6 - upsample_above;
668   const int base_inc = 1 << upsample_above;
669   x = dx;
670   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
671     base = x >> frac_bits;
672     shift = ((x << upsample_above) & 0x3F) >> 1;
673 
674     if (base >= max_base_x) {
675       for (int i = r; i < bh; ++i) {
676         aom_memset16(dst, above[max_base_x], bw);
677         dst += stride;
678       }
679       return;
680     }
681 
682     for (c = 0; c < bw; ++c, base += base_inc) {
683       if (base < max_base_x) {
684         val = above[base] * (32 - shift) + above[base + 1] * shift;
685         dst[c] = ROUND_POWER_OF_TWO(val, 5);
686       } else {
687         dst[c] = above[max_base_x];
688       }
689     }
690   }
691 }
692 
693 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)694 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
695                                    int bh, const uint16_t *above,
696                                    const uint16_t *left, int upsample_above,
697                                    int upsample_left, int dx, int dy, int bd) {
698   (void)bd;
699   assert(dx > 0);
700   assert(dy > 0);
701 
702   const int min_base_x = -(1 << upsample_above);
703   const int min_base_y = -(1 << upsample_left);
704   (void)min_base_y;
705   const int frac_bits_x = 6 - upsample_above;
706   const int frac_bits_y = 6 - upsample_left;
707 
708   for (int r = 0; r < bh; ++r) {
709     for (int c = 0; c < bw; ++c) {
710       int val;
711       int y = r + 1;
712       int x = (c << 6) - y * dx;
713       const int base_x = x >> frac_bits_x;
714       if (base_x >= min_base_x) {
715         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
716         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
717         val = ROUND_POWER_OF_TWO(val, 5);
718       } else {
719         x = c + 1;
720         y = (r << 6) - x * dy;
721         const int base_y = y >> frac_bits_y;
722         assert(base_y >= min_base_y);
723         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
724         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
725         val = ROUND_POWER_OF_TWO(val, 5);
726       }
727       dst[c] = val;
728     }
729     dst += stride;
730   }
731 }
732 
733 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)734 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
735                                    int bh, const uint16_t *above,
736                                    const uint16_t *left, int upsample_left,
737                                    int dx, int dy, int bd) {
738   int r, c, y, base, shift, val;
739 
740   (void)above;
741   (void)dx;
742   (void)bd;
743   assert(dx == 1);
744   assert(dy > 0);
745 
746   const int max_base_y = (bw + bh - 1) << upsample_left;
747   const int frac_bits = 6 - upsample_left;
748   const int base_inc = 1 << upsample_left;
749   y = dy;
750   for (c = 0; c < bw; ++c, y += dy) {
751     base = y >> frac_bits;
752     shift = ((y << upsample_left) & 0x3F) >> 1;
753 
754     for (r = 0; r < bh; ++r, base += base_inc) {
755       if (base < max_base_y) {
756         val = left[base] * (32 - shift) + left[base + 1] * shift;
757         dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
758       } else {
759         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
760         break;
761       }
762     }
763   }
764 }
765 
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)766 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
767                                 TX_SIZE tx_size, const uint16_t *above,
768                                 const uint16_t *left, int upsample_above,
769                                 int upsample_left, int angle, int bd) {
770   const int dx = av1_get_dx(angle);
771   const int dy = av1_get_dy(angle);
772   const int bw = tx_size_wide[tx_size];
773   const int bh = tx_size_high[tx_size];
774   assert(angle > 0 && angle < 270);
775 
776   if (angle > 0 && angle < 90) {
777     av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
778                                 upsample_above, dx, dy, bd);
779   } else if (angle > 90 && angle < 180) {
780     av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
781                                 upsample_above, upsample_left, dx, dy, bd);
782   } else if (angle > 180 && angle < 270) {
783     av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
784                                 dx, dy, bd);
785   } else if (angle == 90) {
786     pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
787   } else if (angle == 180) {
788     pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
789   }
790 }
791 #endif  // CONFIG_AV1_HIGHBITDEPTH
792 
793 DECLARE_ALIGNED(16, const int8_t,
794                 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
795   {
796       { -6, 10, 0, 0, 0, 12, 0, 0 },
797       { -5, 2, 10, 0, 0, 9, 0, 0 },
798       { -3, 1, 1, 10, 0, 7, 0, 0 },
799       { -3, 1, 1, 2, 10, 5, 0, 0 },
800       { -4, 6, 0, 0, 0, 2, 12, 0 },
801       { -3, 2, 6, 0, 0, 2, 9, 0 },
802       { -3, 2, 2, 6, 0, 2, 7, 0 },
803       { -3, 1, 2, 2, 6, 3, 5, 0 },
804   },
805   {
806       { -10, 16, 0, 0, 0, 10, 0, 0 },
807       { -6, 0, 16, 0, 0, 6, 0, 0 },
808       { -4, 0, 0, 16, 0, 4, 0, 0 },
809       { -2, 0, 0, 0, 16, 2, 0, 0 },
810       { -10, 16, 0, 0, 0, 0, 10, 0 },
811       { -6, 0, 16, 0, 0, 0, 6, 0 },
812       { -4, 0, 0, 16, 0, 0, 4, 0 },
813       { -2, 0, 0, 0, 16, 0, 2, 0 },
814   },
815   {
816       { -8, 8, 0, 0, 0, 16, 0, 0 },
817       { -8, 0, 8, 0, 0, 16, 0, 0 },
818       { -8, 0, 0, 8, 0, 16, 0, 0 },
819       { -8, 0, 0, 0, 8, 16, 0, 0 },
820       { -4, 4, 0, 0, 0, 0, 16, 0 },
821       { -4, 0, 4, 0, 0, 0, 16, 0 },
822       { -4, 0, 0, 4, 0, 0, 16, 0 },
823       { -4, 0, 0, 0, 4, 0, 16, 0 },
824   },
825   {
826       { -2, 8, 0, 0, 0, 10, 0, 0 },
827       { -1, 3, 8, 0, 0, 6, 0, 0 },
828       { -1, 2, 3, 8, 0, 4, 0, 0 },
829       { 0, 1, 2, 3, 8, 2, 0, 0 },
830       { -1, 4, 0, 0, 0, 3, 10, 0 },
831       { -1, 3, 4, 0, 0, 4, 6, 0 },
832       { -1, 2, 3, 4, 0, 4, 4, 0 },
833       { -1, 2, 2, 3, 4, 3, 3, 0 },
834   },
835   {
836       { -12, 14, 0, 0, 0, 14, 0, 0 },
837       { -10, 0, 14, 0, 0, 12, 0, 0 },
838       { -9, 0, 0, 14, 0, 11, 0, 0 },
839       { -8, 0, 0, 0, 14, 10, 0, 0 },
840       { -10, 12, 0, 0, 0, 0, 14, 0 },
841       { -9, 1, 12, 0, 0, 0, 12, 0 },
842       { -8, 0, 0, 12, 0, 1, 11, 0 },
843       { -7, 0, 0, 1, 12, 1, 9, 0 },
844   },
845 };
846 
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)847 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
848                                   TX_SIZE tx_size, const uint8_t *above,
849                                   const uint8_t *left, int mode) {
850   int r, c;
851   uint8_t buffer[33][33];
852   const int bw = tx_size_wide[tx_size];
853   const int bh = tx_size_high[tx_size];
854 
855   assert(bw <= 32 && bh <= 32);
856 
857   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
858   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
859 
860   for (r = 1; r < bh + 1; r += 2)
861     for (c = 1; c < bw + 1; c += 4) {
862       const uint8_t p0 = buffer[r - 1][c - 1];
863       const uint8_t p1 = buffer[r - 1][c];
864       const uint8_t p2 = buffer[r - 1][c + 1];
865       const uint8_t p3 = buffer[r - 1][c + 2];
866       const uint8_t p4 = buffer[r - 1][c + 3];
867       const uint8_t p5 = buffer[r][c - 1];
868       const uint8_t p6 = buffer[r + 1][c - 1];
869       for (int k = 0; k < 8; ++k) {
870         int r_offset = k >> 2;
871         int c_offset = k & 0x03;
872         int pr = av1_filter_intra_taps[mode][k][0] * p0 +
873                  av1_filter_intra_taps[mode][k][1] * p1 +
874                  av1_filter_intra_taps[mode][k][2] * p2 +
875                  av1_filter_intra_taps[mode][k][3] * p3 +
876                  av1_filter_intra_taps[mode][k][4] * p4 +
877                  av1_filter_intra_taps[mode][k][5] * p5 +
878                  av1_filter_intra_taps[mode][k][6] * p6;
879         // Section 7.11.2.3 specifies the right-hand side of the assignment as
880         //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
881         // Since Clip1() clips a negative value to 0, it is safe to replace
882         // Round2Signed() with Round2().
883         buffer[r + r_offset][c + c_offset] =
884             clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
885       }
886     }
887 
888   for (r = 0; r < bh; ++r) {
889     memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
890     dst += stride;
891   }
892 }
893 
894 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)895 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
896                                           TX_SIZE tx_size,
897                                           const uint16_t *above,
898                                           const uint16_t *left, int mode,
899                                           int bd) {
900   int r, c;
901   uint16_t buffer[33][33];
902   const int bw = tx_size_wide[tx_size];
903   const int bh = tx_size_high[tx_size];
904 
905   assert(bw <= 32 && bh <= 32);
906 
907   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
908   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
909 
910   for (r = 1; r < bh + 1; r += 2)
911     for (c = 1; c < bw + 1; c += 4) {
912       const uint16_t p0 = buffer[r - 1][c - 1];
913       const uint16_t p1 = buffer[r - 1][c];
914       const uint16_t p2 = buffer[r - 1][c + 1];
915       const uint16_t p3 = buffer[r - 1][c + 2];
916       const uint16_t p4 = buffer[r - 1][c + 3];
917       const uint16_t p5 = buffer[r][c - 1];
918       const uint16_t p6 = buffer[r + 1][c - 1];
919       for (int k = 0; k < 8; ++k) {
920         int r_offset = k >> 2;
921         int c_offset = k & 0x03;
922         int pr = av1_filter_intra_taps[mode][k][0] * p0 +
923                  av1_filter_intra_taps[mode][k][1] * p1 +
924                  av1_filter_intra_taps[mode][k][2] * p2 +
925                  av1_filter_intra_taps[mode][k][3] * p3 +
926                  av1_filter_intra_taps[mode][k][4] * p4 +
927                  av1_filter_intra_taps[mode][k][5] * p5 +
928                  av1_filter_intra_taps[mode][k][6] * p6;
929         // Section 7.11.2.3 specifies the right-hand side of the assignment as
930         //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
931         // Since Clip1() clips a negative value to 0, it is safe to replace
932         // Round2Signed() with Round2().
933         buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
934             ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
935       }
936     }
937 
938   for (r = 0; r < bh; ++r) {
939     memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
940     dst += stride;
941   }
942 }
943 #endif  // CONFIG_AV1_HIGHBITDEPTH
944 
is_smooth(const MB_MODE_INFO * mbmi,int plane)945 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
946   if (plane == 0) {
947     const PREDICTION_MODE mode = mbmi->mode;
948     return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
949             mode == SMOOTH_H_PRED);
950   } else {
951     // uv_mode is not set for inter blocks, so need to explicitly
952     // detect that case.
953     if (is_inter_block(mbmi)) return 0;
954 
955     const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
956     return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
957             uv_mode == UV_SMOOTH_H_PRED);
958   }
959 }
960 
get_intra_edge_filter_type(const MACROBLOCKD * xd,int plane)961 static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
962   int ab_sm, le_sm;
963 
964   if (plane == 0) {
965     const MB_MODE_INFO *ab = xd->above_mbmi;
966     const MB_MODE_INFO *le = xd->left_mbmi;
967     ab_sm = ab ? is_smooth(ab, plane) : 0;
968     le_sm = le ? is_smooth(le, plane) : 0;
969   } else {
970     const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
971     const MB_MODE_INFO *le = xd->chroma_left_mbmi;
972     ab_sm = ab ? is_smooth(ab, plane) : 0;
973     le_sm = le ? is_smooth(le, plane) : 0;
974   }
975 
976   return (ab_sm || le_sm) ? 1 : 0;
977 }
978 
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)979 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
980   const int d = abs(delta);
981   int strength = 0;
982 
983   const int blk_wh = bs0 + bs1;
984   if (type == 0) {
985     if (blk_wh <= 8) {
986       if (d >= 56) strength = 1;
987     } else if (blk_wh <= 12) {
988       if (d >= 40) strength = 1;
989     } else if (blk_wh <= 16) {
990       if (d >= 40) strength = 1;
991     } else if (blk_wh <= 24) {
992       if (d >= 8) strength = 1;
993       if (d >= 16) strength = 2;
994       if (d >= 32) strength = 3;
995     } else if (blk_wh <= 32) {
996       if (d >= 1) strength = 1;
997       if (d >= 4) strength = 2;
998       if (d >= 32) strength = 3;
999     } else {
1000       if (d >= 1) strength = 3;
1001     }
1002   } else {
1003     if (blk_wh <= 8) {
1004       if (d >= 40) strength = 1;
1005       if (d >= 64) strength = 2;
1006     } else if (blk_wh <= 16) {
1007       if (d >= 20) strength = 1;
1008       if (d >= 48) strength = 2;
1009     } else if (blk_wh <= 24) {
1010       if (d >= 4) strength = 3;
1011     } else {
1012       if (d >= 1) strength = 3;
1013     }
1014   }
1015   return strength;
1016 }
1017 
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1018 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1019   if (!strength) return;
1020 
1021   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1022                                                          { 0, 5, 6, 5, 0 },
1023                                                          { 2, 4, 4, 4, 2 } };
1024   const int filt = strength - 1;
1025   uint8_t edge[129];
1026 
1027   memcpy(edge, p, sz * sizeof(*p));
1028   for (int i = 1; i < sz; i++) {
1029     int s = 0;
1030     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1031       int k = i - 2 + j;
1032       k = (k < 0) ? 0 : k;
1033       k = (k > sz - 1) ? sz - 1 : k;
1034       s += edge[k] * kernel[filt][j];
1035     }
1036     s = (s + 8) >> 4;
1037     p[i] = s;
1038   }
1039 }
1040 
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1041 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1042   const int kernel[3] = { 5, 6, 5 };
1043 
1044   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1045           (p_above[0] * kernel[2]);
1046   s = (s + 8) >> 4;
1047   p_above[-1] = s;
1048   p_left[-1] = s;
1049 }
1050 
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1051 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1052   if (!strength) return;
1053 
1054   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1055                                                          { 0, 5, 6, 5, 0 },
1056                                                          { 2, 4, 4, 4, 2 } };
1057   const int filt = strength - 1;
1058   uint16_t edge[129];
1059 
1060   memcpy(edge, p, sz * sizeof(*p));
1061   for (int i = 1; i < sz; i++) {
1062     int s = 0;
1063     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1064       int k = i - 2 + j;
1065       k = (k < 0) ? 0 : k;
1066       k = (k > sz - 1) ? sz - 1 : k;
1067       s += edge[k] * kernel[filt][j];
1068     }
1069     s = (s + 8) >> 4;
1070     p[i] = s;
1071   }
1072 }
1073 
1074 #if CONFIG_AV1_HIGHBITDEPTH
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1075 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1076   const int kernel[3] = { 5, 6, 5 };
1077 
1078   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1079           (p_above[0] * kernel[2]);
1080   s = (s + 8) >> 4;
1081   p_above[-1] = s;
1082   p_left[-1] = s;
1083 }
1084 #endif
1085 
av1_upsample_intra_edge_c(uint8_t * p,int sz)1086 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1087   // interpolate half-sample positions
1088   assert(sz <= MAX_UPSAMPLE_SZ);
1089 
1090   uint8_t in[MAX_UPSAMPLE_SZ + 3];
1091   // copy p[-1..(sz-1)] and extend first and last samples
1092   in[0] = p[-1];
1093   in[1] = p[-1];
1094   for (int i = 0; i < sz; i++) {
1095     in[i + 2] = p[i];
1096   }
1097   in[sz + 2] = p[sz - 1];
1098 
1099   // interpolate half-sample edge positions
1100   p[-2] = in[0];
1101   for (int i = 0; i < sz; i++) {
1102     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1103     s = clip_pixel((s + 8) >> 4);
1104     p[2 * i - 1] = s;
1105     p[2 * i] = in[i + 2];
1106   }
1107 }
1108 
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1109 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1110   // interpolate half-sample positions
1111   assert(sz <= MAX_UPSAMPLE_SZ);
1112 
1113   uint16_t in[MAX_UPSAMPLE_SZ + 3];
1114   // copy p[-1..(sz-1)] and extend first and last samples
1115   in[0] = p[-1];
1116   in[1] = p[-1];
1117   for (int i = 0; i < sz; i++) {
1118     in[i + 2] = p[i];
1119   }
1120   in[sz + 2] = p[sz - 1];
1121 
1122   // interpolate half-sample edge positions
1123   p[-2] = in[0];
1124   for (int i = 0; i < sz; i++) {
1125     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1126     s = (s + 8) >> 4;
1127     s = clip_pixel_highbd(s, bd);
1128     p[2 * i - 1] = s;
1129     p[2 * i] = in[i + 2];
1130   }
1131 }
1132 #if CONFIG_AV1_HIGHBITDEPTH
build_intra_predictors_high(const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int p_angle,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type,int bit_depth)1133 static void build_intra_predictors_high(
1134     const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1135     PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1136     TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1137     int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1138     int bit_depth) {
1139   int i;
1140   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1141   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1142   DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1143   DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1144   uint16_t *const above_row = above_data + 16;
1145   uint16_t *const left_col = left_data + 16;
1146   const int txwpx = tx_size_wide[tx_size];
1147   const int txhpx = tx_size_high[tx_size];
1148   int need_left = extend_modes[mode] & NEED_LEFT;
1149   int need_above = extend_modes[mode] & NEED_ABOVE;
1150   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1151   const uint16_t *above_ref = ref - ref_stride;
1152   const uint16_t *left_ref = ref - 1;
1153   const int is_dr_mode = av1_is_directional_mode(mode);
1154   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1155   int base = 128 << (bit_depth - 8);
1156   // The left_data, above_data buffers must be zeroed to fix some intermittent
1157   // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1158   // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1159   // seen to be the potential reason for this issue.
1160   aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1161   aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1162 
1163   // The default values if ref pixels are not available:
1164   // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1165   // base+1   A      B  ..     Y      Z
1166   // base+1   C      D  ..     W      X
1167   // base+1   E      F  ..     U      V
1168   // base+1   G      H  ..     S      T      T      T      T      T
1169 
1170   if (is_dr_mode) {
1171     if (p_angle <= 90)
1172       need_above = 1, need_left = 0, need_above_left = 1;
1173     else if (p_angle < 180)
1174       need_above = 1, need_left = 1, need_above_left = 1;
1175     else
1176       need_above = 0, need_left = 1, need_above_left = 1;
1177   }
1178   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1179 
1180   assert(n_top_px >= 0);
1181   assert(n_topright_px >= -1);
1182   assert(n_left_px >= 0);
1183   assert(n_bottomleft_px >= -1);
1184 
1185   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1186     int val;
1187     if (need_left) {
1188       val = (n_top_px > 0) ? above_ref[0] : base + 1;
1189     } else {
1190       val = (n_left_px > 0) ? left_ref[0] : base - 1;
1191     }
1192     for (i = 0; i < txhpx; ++i) {
1193       aom_memset16(dst, val, txwpx);
1194       dst += dst_stride;
1195     }
1196     return;
1197   }
1198 
1199   // NEED_LEFT
1200   if (need_left) {
1201     const int num_left_pixels_needed =
1202         txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1203     i = 0;
1204     if (n_left_px > 0) {
1205       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1206       if (n_bottomleft_px > 0) {
1207         assert(i == txhpx);
1208         for (; i < txhpx + n_bottomleft_px; i++)
1209           left_col[i] = left_ref[i * ref_stride];
1210       }
1211       if (i < num_left_pixels_needed)
1212         aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1213     } else if (n_top_px > 0) {
1214       aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1215     }
1216   }
1217 
1218   // NEED_ABOVE
1219   if (need_above) {
1220     const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1221     if (n_top_px > 0) {
1222       memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1223       i = n_top_px;
1224       if (n_topright_px > 0) {
1225         assert(n_top_px == txwpx);
1226         memcpy(above_row + txwpx, above_ref + txwpx,
1227                n_topright_px * sizeof(above_ref[0]));
1228         i += n_topright_px;
1229       }
1230       if (i < num_top_pixels_needed)
1231         aom_memset16(&above_row[i], above_row[i - 1],
1232                      num_top_pixels_needed - i);
1233     } else if (n_left_px > 0) {
1234       aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1235     }
1236   }
1237 
1238   if (need_above_left) {
1239     if (n_top_px > 0 && n_left_px > 0) {
1240       above_row[-1] = above_ref[-1];
1241     } else if (n_top_px > 0) {
1242       above_row[-1] = above_ref[0];
1243     } else if (n_left_px > 0) {
1244       above_row[-1] = left_ref[0];
1245     } else {
1246       above_row[-1] = base;
1247     }
1248     left_col[-1] = above_row[-1];
1249   }
1250 
1251   if (use_filter_intra) {
1252     highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1253                                   filter_intra_mode, bit_depth);
1254     return;
1255   }
1256 
1257   if (is_dr_mode) {
1258     int upsample_above = 0;
1259     int upsample_left = 0;
1260     if (!disable_edge_filter) {
1261       const int need_right = p_angle < 90;
1262       const int need_bottom = p_angle > 180;
1263       if (p_angle != 90 && p_angle != 180) {
1264         const int ab_le = need_above_left ? 1 : 0;
1265         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1266           filter_intra_edge_corner_high(above_row, left_col);
1267         }
1268         if (need_above && n_top_px > 0) {
1269           const int strength = intra_edge_filter_strength(
1270               txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1271           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1272           av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1273         }
1274         if (need_left && n_left_px > 0) {
1275           const int strength = intra_edge_filter_strength(
1276               txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1277           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1278           av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1279         }
1280       }
1281       upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1282                                                    intra_edge_filter_type);
1283       if (need_above && upsample_above) {
1284         const int n_px = txwpx + (need_right ? txhpx : 0);
1285         av1_upsample_intra_edge_high(above_row, n_px, bit_depth);
1286       }
1287       upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1288                                                   intra_edge_filter_type);
1289       if (need_left && upsample_left) {
1290         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1291         av1_upsample_intra_edge_high(left_col, n_px, bit_depth);
1292       }
1293     }
1294     highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1295                         upsample_above, upsample_left, p_angle, bit_depth);
1296     return;
1297   }
1298 
1299   // predict
1300   if (mode == DC_PRED) {
1301     dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1302         dst, dst_stride, above_row, left_col, bit_depth);
1303   } else {
1304     pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1305   }
1306 }
1307 #endif  // CONFIG_AV1_HIGHBITDEPTH
1308 
build_intra_predictors(const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int p_angle,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type)1309 static void build_intra_predictors(
1310     const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1311     PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1312     TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1313     int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1314   int i;
1315   const uint8_t *above_ref = ref - ref_stride;
1316   const uint8_t *left_ref = ref - 1;
1317   DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1318   DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1319   uint8_t *const above_row = above_data + 16;
1320   uint8_t *const left_col = left_data + 16;
1321   const int txwpx = tx_size_wide[tx_size];
1322   const int txhpx = tx_size_high[tx_size];
1323   int need_left = extend_modes[mode] & NEED_LEFT;
1324   int need_above = extend_modes[mode] & NEED_ABOVE;
1325   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1326   const int is_dr_mode = av1_is_directional_mode(mode);
1327   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1328   // The left_data, above_data buffers must be zeroed to fix some intermittent
1329   // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1330   // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1331   // be the potential reason for this issue.
1332   memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1333   memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1334 
1335   // The default values if ref pixels are not available:
1336   // 128 127 127 .. 127 127 127 127 127 127
1337   // 129  A   B  ..  Y   Z
1338   // 129  C   D  ..  W   X
1339   // 129  E   F  ..  U   V
1340   // 129  G   H  ..  S   T   T   T   T   T
1341   // ..
1342 
1343   if (is_dr_mode) {
1344     if (p_angle <= 90)
1345       need_above = 1, need_left = 0, need_above_left = 1;
1346     else if (p_angle < 180)
1347       need_above = 1, need_left = 1, need_above_left = 1;
1348     else
1349       need_above = 0, need_left = 1, need_above_left = 1;
1350   }
1351   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1352 
1353   assert(n_top_px >= 0);
1354   assert(n_topright_px >= -1);
1355   assert(n_left_px >= 0);
1356   assert(n_bottomleft_px >= -1);
1357 
1358   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1359     int val;
1360     if (need_left) {
1361       val = (n_top_px > 0) ? above_ref[0] : 129;
1362     } else {
1363       val = (n_left_px > 0) ? left_ref[0] : 127;
1364     }
1365     for (i = 0; i < txhpx; ++i) {
1366       memset(dst, val, txwpx);
1367       dst += dst_stride;
1368     }
1369     return;
1370   }
1371 
1372   // NEED_LEFT
1373   if (need_left) {
1374     const int num_left_pixels_needed =
1375         txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1376     i = 0;
1377     if (n_left_px > 0) {
1378       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1379       if (n_bottomleft_px > 0) {
1380         assert(i == txhpx);
1381         for (; i < txhpx + n_bottomleft_px; i++)
1382           left_col[i] = left_ref[i * ref_stride];
1383       }
1384       if (i < num_left_pixels_needed)
1385         memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1386     } else if (n_top_px > 0) {
1387       memset(left_col, above_ref[0], num_left_pixels_needed);
1388     }
1389   }
1390 
1391   // NEED_ABOVE
1392   if (need_above) {
1393     const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1394     if (n_top_px > 0) {
1395       memcpy(above_row, above_ref, n_top_px);
1396       i = n_top_px;
1397       if (n_topright_px > 0) {
1398         assert(n_top_px == txwpx);
1399         memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1400         i += n_topright_px;
1401       }
1402       if (i < num_top_pixels_needed)
1403         memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1404     } else if (n_left_px > 0) {
1405       memset(above_row, left_ref[0], num_top_pixels_needed);
1406     }
1407   }
1408 
1409   if (need_above_left) {
1410     if (n_top_px > 0 && n_left_px > 0) {
1411       above_row[-1] = above_ref[-1];
1412     } else if (n_top_px > 0) {
1413       above_row[-1] = above_ref[0];
1414     } else if (n_left_px > 0) {
1415       above_row[-1] = left_ref[0];
1416     } else {
1417       above_row[-1] = 128;
1418     }
1419     left_col[-1] = above_row[-1];
1420   }
1421 
1422   if (use_filter_intra) {
1423     av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1424                                filter_intra_mode);
1425     return;
1426   }
1427 
1428   if (is_dr_mode) {
1429     int upsample_above = 0;
1430     int upsample_left = 0;
1431     if (!disable_edge_filter) {
1432       const int need_right = p_angle < 90;
1433       const int need_bottom = p_angle > 180;
1434       if (p_angle != 90 && p_angle != 180) {
1435         const int ab_le = need_above_left ? 1 : 0;
1436         if (need_above && need_left && (txwpx + txhpx >= 24)) {
1437           filter_intra_edge_corner(above_row, left_col);
1438         }
1439         if (need_above && n_top_px > 0) {
1440           const int strength = intra_edge_filter_strength(
1441               txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1442           const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1443           av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1444         }
1445         if (need_left && n_left_px > 0) {
1446           const int strength = intra_edge_filter_strength(
1447               txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1448           const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1449           av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1450         }
1451       }
1452       upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1453                                                    intra_edge_filter_type);
1454       if (need_above && upsample_above) {
1455         const int n_px = txwpx + (need_right ? txhpx : 0);
1456         av1_upsample_intra_edge(above_row, n_px);
1457       }
1458       upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1459                                                   intra_edge_filter_type);
1460       if (need_left && upsample_left) {
1461         const int n_px = txhpx + (need_bottom ? txwpx : 0);
1462         av1_upsample_intra_edge(left_col, n_px);
1463       }
1464     }
1465     dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1466                  upsample_left, p_angle);
1467     return;
1468   }
1469 
1470   // predict
1471   if (mode == DC_PRED) {
1472     dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1473                                                   left_col);
1474   } else {
1475     pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1476   }
1477 }
1478 
scale_chroma_bsize(BLOCK_SIZE bsize,int subsampling_x,int subsampling_y)1479 static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1480                                             int subsampling_y) {
1481   assert(subsampling_x >= 0 && subsampling_x < 2);
1482   assert(subsampling_y >= 0 && subsampling_y < 2);
1483   BLOCK_SIZE bs = bsize;
1484   switch (bsize) {
1485     case BLOCK_4X4:
1486       if (subsampling_x == 1 && subsampling_y == 1)
1487         bs = BLOCK_8X8;
1488       else if (subsampling_x == 1)
1489         bs = BLOCK_8X4;
1490       else if (subsampling_y == 1)
1491         bs = BLOCK_4X8;
1492       break;
1493     case BLOCK_4X8:
1494       if (subsampling_x == 1 && subsampling_y == 1)
1495         bs = BLOCK_8X8;
1496       else if (subsampling_x == 1)
1497         bs = BLOCK_8X8;
1498       else if (subsampling_y == 1)
1499         bs = BLOCK_4X8;
1500       break;
1501     case BLOCK_8X4:
1502       if (subsampling_x == 1 && subsampling_y == 1)
1503         bs = BLOCK_8X8;
1504       else if (subsampling_x == 1)
1505         bs = BLOCK_8X4;
1506       else if (subsampling_y == 1)
1507         bs = BLOCK_8X8;
1508       break;
1509     case BLOCK_4X16:
1510       if (subsampling_x == 1 && subsampling_y == 1)
1511         bs = BLOCK_8X16;
1512       else if (subsampling_x == 1)
1513         bs = BLOCK_8X16;
1514       else if (subsampling_y == 1)
1515         bs = BLOCK_4X16;
1516       break;
1517     case BLOCK_16X4:
1518       if (subsampling_x == 1 && subsampling_y == 1)
1519         bs = BLOCK_16X8;
1520       else if (subsampling_x == 1)
1521         bs = BLOCK_16X4;
1522       else if (subsampling_y == 1)
1523         bs = BLOCK_16X8;
1524       break;
1525     default: break;
1526   }
1527   return bs;
1528 }
1529 
av1_predict_intra_block(const MACROBLOCKD * xd,BLOCK_SIZE sb_size,int enable_intra_edge_filter,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1530 void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1531                              int enable_intra_edge_filter, int wpx, int hpx,
1532                              TX_SIZE tx_size, PREDICTION_MODE mode,
1533                              int angle_delta, int use_palette,
1534                              FILTER_INTRA_MODE filter_intra_mode,
1535                              const uint8_t *ref, int ref_stride, uint8_t *dst,
1536                              int dst_stride, int col_off, int row_off,
1537                              int plane) {
1538   const MB_MODE_INFO *const mbmi = xd->mi[0];
1539   const int txwpx = tx_size_wide[tx_size];
1540   const int txhpx = tx_size_high[tx_size];
1541   const int x = col_off << MI_SIZE_LOG2;
1542   const int y = row_off << MI_SIZE_LOG2;
1543 
1544   if (use_palette) {
1545     int r, c;
1546     const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1547                                xd->color_index_map_offset[plane != 0];
1548     const uint16_t *const palette =
1549         mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1550     if (is_cur_buf_hbd(xd)) {
1551       uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1552       for (r = 0; r < txhpx; ++r) {
1553         for (c = 0; c < txwpx; ++c) {
1554           dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1555         }
1556       }
1557     } else {
1558       for (r = 0; r < txhpx; ++r) {
1559         for (c = 0; c < txwpx; ++c) {
1560           dst[r * dst_stride + c] =
1561               (uint8_t)palette[map[(r + y) * wpx + c + x]];
1562         }
1563       }
1564     }
1565     return;
1566   }
1567 
1568   const struct macroblockd_plane *const pd = &xd->plane[plane];
1569   const int txw = tx_size_wide_unit[tx_size];
1570   const int txh = tx_size_high_unit[tx_size];
1571   const int ss_x = pd->subsampling_x;
1572   const int ss_y = pd->subsampling_y;
1573   const int have_top =
1574       row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1575   const int have_left =
1576       col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1577   const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1578   const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1579 
1580   // Distance between the right edge of this prediction block to
1581   // the frame right edge
1582   const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1583   // Distance between the bottom edge of this prediction block to
1584   // the frame bottom edge
1585   const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1586   const int right_available =
1587       mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1588   const int bottom_available =
1589       (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1590 
1591   const PARTITION_TYPE partition = mbmi->partition;
1592 
1593   BLOCK_SIZE bsize = mbmi->bsize;
1594   // force 4x4 chroma component block size.
1595   if (ss_x || ss_y) {
1596     bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1597   }
1598 
1599   const int is_dr_mode = av1_is_directional_mode(mode);
1600   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1601   int p_angle = 0;
1602   int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1603   int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1604 
1605   if (use_filter_intra) {
1606     need_top_right = 0;
1607     need_bottom_left = 0;
1608   }
1609   if (is_dr_mode) {
1610     p_angle = mode_to_angle_map[mode] + angle_delta;
1611     need_top_right = p_angle < 90;
1612     need_bottom_left = p_angle > 180;
1613   }
1614 
1615   // Possible states for have_top_right(TR) and have_bottom_left(BL)
1616   // -1 : TR and BL are not needed
1617   //  0 : TR and BL are needed but not available
1618   // > 0 : TR and BL are needed and pixels are available
1619   const int have_top_right =
1620       need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1621                                      right_available, partition, tx_size,
1622                                      row_off, col_off, ss_x, ss_y)
1623                      : -1;
1624   const int have_bottom_left =
1625       need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1626                                          bottom_available, have_left, partition,
1627                                          tx_size, row_off, col_off, ss_x, ss_y)
1628                        : -1;
1629 
1630   const int disable_edge_filter = !enable_intra_edge_filter;
1631   const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1632 #if CONFIG_AV1_HIGHBITDEPTH
1633   if (is_cur_buf_hbd(xd)) {
1634     build_intra_predictors_high(
1635         ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1636         tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1637         have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right,
1638         have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1639         have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left,
1640         intra_edge_filter_type, xd->bd);
1641     return;
1642   }
1643 #endif
1644   build_intra_predictors(
1645       ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1646       tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1647       have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right,
1648       have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1649       have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left,
1650       intra_edge_filter_type);
1651 }
1652 
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1653 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1654                                     int plane, int blk_col, int blk_row,
1655                                     TX_SIZE tx_size) {
1656   const MB_MODE_INFO *const mbmi = xd->mi[0];
1657   struct macroblockd_plane *const pd = &xd->plane[plane];
1658   const int dst_stride = pd->dst.stride;
1659   uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1660   const PREDICTION_MODE mode =
1661       (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1662   const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1663   const FILTER_INTRA_MODE filter_intra_mode =
1664       (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1665           ? mbmi->filter_intra_mode_info.filter_intra_mode
1666           : FILTER_INTRA_MODES;
1667   const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1668   const SequenceHeader *seq_params = cm->seq_params;
1669 
1670   if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1671 #if CONFIG_DEBUG
1672     assert(is_cfl_allowed(xd));
1673     const BLOCK_SIZE plane_bsize =
1674         get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1675     (void)plane_bsize;
1676     assert(plane_bsize < BLOCK_SIZES_ALL);
1677     if (!xd->lossless[mbmi->segment_id]) {
1678       assert(blk_col == 0);
1679       assert(blk_row == 0);
1680       assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1681       assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1682     }
1683 #endif
1684     CFL_CTX *const cfl = &xd->cfl;
1685     CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1686     if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1687       av1_predict_intra_block(xd, seq_params->sb_size,
1688                               seq_params->enable_intra_edge_filter, pd->width,
1689                               pd->height, tx_size, mode, angle_delta,
1690                               use_palette, filter_intra_mode, dst, dst_stride,
1691                               dst, dst_stride, blk_col, blk_row, plane);
1692       if (cfl->use_dc_pred_cache) {
1693         cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1694         cfl->dc_pred_is_cached[pred_plane] = 1;
1695       }
1696     } else {
1697       cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1698     }
1699     cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1700     return;
1701   }
1702   av1_predict_intra_block(
1703       xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1704       pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1705       dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1706 }
1707 
av1_init_intra_predictors(void)1708 void av1_init_intra_predictors(void) {
1709   aom_once(init_intra_predictors_internal);
1710 }
1711