1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "av1/common/av1_common_int.h"
23 #include "av1/common/cfl.h"
24 #include "av1/common/reconintra.h"
25
26 enum {
27 NEED_LEFT = 1 << 1,
28 NEED_ABOVE = 1 << 2,
29 NEED_ABOVERIGHT = 1 << 3,
30 NEED_ABOVELEFT = 1 << 4,
31 NEED_BOTTOMLEFT = 1 << 5,
32 };
33
34 #define INTRA_EDGE_FILT 3
35 #define INTRA_EDGE_TAPS 5
36 #define MAX_UPSAMPLE_SZ 16
37 #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
38
39 static const uint8_t extend_modes[INTRA_MODES] = {
40 NEED_ABOVE | NEED_LEFT, // DC
41 NEED_ABOVE, // V
42 NEED_LEFT, // H
43 NEED_ABOVE | NEED_ABOVERIGHT, // D45
44 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
45 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
46 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
47 NEED_LEFT | NEED_BOTTOMLEFT, // D203
48 NEED_ABOVE | NEED_ABOVERIGHT, // D67
49 NEED_LEFT | NEED_ABOVE, // SMOOTH
50 NEED_LEFT | NEED_ABOVE, // SMOOTH_V
51 NEED_LEFT | NEED_ABOVE, // SMOOTH_H
52 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH
53 };
54
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 // . . . .
61 // . . . .
62 // . . o .
63 // . . . .
64 static uint8_t has_tr_4x4[128] = {
65 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92 255, 255, 119, 119, 127, 127, 119, 119,
93 255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99 255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113 127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120 255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123 15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129 // 4X4
130 has_tr_4x4,
131 // 4X8, 8X4, 8X8
132 has_tr_4x8, has_tr_8x4, has_tr_8x8,
133 // 8X16, 16X8, 16X16
134 has_tr_8x16, has_tr_16x8, has_tr_16x16,
135 // 16X32, 32X16, 32X32
136 has_tr_16x32, has_tr_32x16, has_tr_32x32,
137 // 32X64, 64X32, 64X64
138 has_tr_32x64, has_tr_64x32, has_tr_64x64,
139 // 64x128, 128x64, 128x128
140 has_tr_64x128, has_tr_128x64, has_tr_128x128,
141 // 4x16, 16x4, 8x32
142 has_tr_4x16, has_tr_16x4, has_tr_8x32,
143 // 32x8, 16x64, 64x16
144 has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146
147 static uint8_t has_tr_vert_8x8[32] = {
148 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152 255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167 // 4X4
168 NULL,
169 // 4X8, 8X4, 8X8
170 has_tr_4x8, NULL, has_tr_vert_8x8,
171 // 8X16, 16X8, 16X16
172 has_tr_8x16, NULL, has_tr_vert_16x16,
173 // 16X32, 32X16, 32X32
174 has_tr_16x32, NULL, has_tr_vert_32x32,
175 // 32X64, 64X32, 64X64
176 has_tr_32x64, NULL, has_tr_vert_64x64,
177 // 64x128, 128x64, 128x128
178 has_tr_64x128, NULL, has_tr_128x128
179 };
180
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182 BLOCK_SIZE bsize) {
183 const uint8_t *ret = NULL;
184 // If this is a mixed vertical partition, look up bsize in orders_vert.
185 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186 assert(bsize < BLOCK_SIZES);
187 ret = has_tr_vert_tables[bsize];
188 } else {
189 ret = has_tr_tables[bsize];
190 }
191 assert(ret);
192 return ret;
193 }
194
has_top_right(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
196 int mi_col, int top_available, int right_available,
197 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198 int col_off, int ss_x, int ss_y) {
199 if (!top_available || !right_available) return 0;
200
201 const int bw_unit = mi_size_wide[bsize];
202 const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203 const int top_right_count_unit = tx_size_wide_unit[txsz];
204
205 if (row_off > 0) { // Just need to check if enough pixels on the right.
206 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207 // Special case: For 128x128 blocks, the transform unit whose
208 // top-right corner is at the center of the block does in fact have
209 // pixels available at its top-right corner.
210 if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211 col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212 return 1;
213 }
214 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215 const int col_off_64 = col_off % plane_bw_unit_64;
216 return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217 }
218 return col_off + top_right_count_unit < plane_bw_unit;
219 } else {
220 // All top-right pixels are in the block above, which is already available.
221 if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222
223 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225 const int sb_mi_size = mi_size_high[sb_size];
226 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228
229 // Top row of superblock: so top-right pixels are in the top and/or
230 // top-right superblocks, both of which are already available.
231 if (blk_row_in_sb == 0) return 1;
232
233 // Rightmost column of superblock (and not the top row): so top-right pixels
234 // fall in the right superblock, which is not available yet.
235 if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236 return 0;
237 }
238
239 // General case (neither top row nor rightmost column): check if the
240 // top-right block is coded before the current block.
241 const int this_blk_index =
242 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243 blk_col_in_sb + 0;
244 const int idx1 = this_blk_index / 8;
245 const int idx2 = this_blk_index % 8;
246 const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247 return (has_tr_table[idx1] >> idx2) & 1;
248 }
249 }
250
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85,
255 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17,
256 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84,
257 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1,
259 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85,
260 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285 84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305 0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308 238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314 // 4X4
315 has_bl_4x4,
316 // 4X8, 8X4, 8X8
317 has_bl_4x8, has_bl_8x4, has_bl_8x8,
318 // 8X16, 16X8, 16X16
319 has_bl_8x16, has_bl_16x8, has_bl_16x16,
320 // 16X32, 32X16, 32X32
321 has_bl_16x32, has_bl_32x16, has_bl_32x32,
322 // 32X64, 64X32, 64X64
323 has_bl_32x64, has_bl_64x32, has_bl_64x64,
324 // 64x128, 128x64, 128x128
325 has_bl_64x128, has_bl_128x64, has_bl_128x128,
326 // 4x16, 16x4, 8x32
327 has_bl_4x16, has_bl_16x4, has_bl_8x32,
328 // 32x8, 16x64, 64x16
329 has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331
332 static uint8_t has_bl_vert_8x8[32] = {
333 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337 254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352 // 4X4
353 NULL,
354 // 4X8, 8X4, 8X8
355 has_bl_4x8, NULL, has_bl_vert_8x8,
356 // 8X16, 16X8, 16X16
357 has_bl_8x16, NULL, has_bl_vert_16x16,
358 // 16X32, 32X16, 32X32
359 has_bl_16x32, NULL, has_bl_vert_32x32,
360 // 32X64, 64X32, 64X64
361 has_bl_32x64, NULL, has_bl_vert_64x64,
362 // 64x128, 128x64, 128x128
363 has_bl_64x128, NULL, has_bl_128x128
364 };
365
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367 BLOCK_SIZE bsize) {
368 const uint8_t *ret = NULL;
369 // If this is a mixed vertical partition, look up bsize in orders_vert.
370 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371 assert(bsize < BLOCK_SIZES);
372 ret = has_bl_vert_tables[bsize];
373 } else {
374 ret = has_bl_tables[bsize];
375 }
376 assert(ret);
377 return ret;
378 }
379
has_bottom_left(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
381 int mi_col, int bottom_available, int left_available,
382 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383 int col_off, int ss_x, int ss_y) {
384 if (!bottom_available || !left_available) return 0;
385
386 // Special case for 128x* blocks, when col_off is half the block width.
387 // This is needed because 128x* superblocks are divided into 64x* blocks in
388 // raster order
389 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391 const int col_off_64 = col_off % plane_bw_unit_64;
392 if (col_off_64 == 0) {
393 // We are at the left edge of top-right or bottom-right 64x* block.
394 const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395 const int row_off_64 = row_off % plane_bh_unit_64;
396 const int plane_bh_unit =
397 AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398 // Check if all bottom-left pixels are in the left 64x* block (which is
399 // already coded).
400 return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401 }
402 }
403
404 if (col_off > 0) {
405 // Bottom-left pixels are in the bottom-left block, which is not available.
406 return 0;
407 } else {
408 const int bh_unit = mi_size_high[bsize];
409 const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410 const int bottom_left_count_unit = tx_size_high_unit[txsz];
411
412 // All bottom-left pixels are in the left block, which is already available.
413 if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414
415 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417 const int sb_mi_size = mi_size_high[sb_size];
418 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420
421 // Leftmost column of superblock: so bottom-left pixels maybe in the left
422 // and/or bottom-left superblocks. But only the left superblock is
423 // available, so check if all required pixels fall in that superblock.
424 if (blk_col_in_sb == 0) {
425 const int blk_start_row_off =
426 blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427 ss_y;
428 const int row_off_in_sb = blk_start_row_off + row_off;
429 const int sb_height_unit = sb_mi_size >> ss_y;
430 return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431 }
432
433 // Bottom row of superblock (and not the leftmost column): so bottom-left
434 // pixels fall in the bottom superblock, which is not available yet.
435 if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436
437 // General case (neither leftmost column nor bottom row): check if the
438 // bottom-left block is coded before the current block.
439 const int this_blk_index =
440 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441 blk_col_in_sb + 0;
442 const int idx1 = this_blk_index / 8;
443 const int idx2 = this_blk_index % 8;
444 const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445 return (has_bl_table[idx1] >> idx2) & 1;
446 }
447 }
448
449 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450 const uint8_t *above, const uint8_t *left);
451
452 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454
455 #if CONFIG_AV1_HIGHBITDEPTH
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457 const uint16_t *above, const uint16_t *left,
458 int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461 #endif
462
init_intra_predictors_internal(void)463 static void init_intra_predictors_internal(void) {
464 assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465
466 #define INIT_RECTANGULAR(p, type) \
467 p[TX_4X8] = aom_##type##_predictor_4x8; \
468 p[TX_8X4] = aom_##type##_predictor_8x4; \
469 p[TX_8X16] = aom_##type##_predictor_8x16; \
470 p[TX_16X8] = aom_##type##_predictor_16x8; \
471 p[TX_16X32] = aom_##type##_predictor_16x32; \
472 p[TX_32X16] = aom_##type##_predictor_32x16; \
473 p[TX_32X64] = aom_##type##_predictor_32x64; \
474 p[TX_64X32] = aom_##type##_predictor_64x32; \
475 p[TX_4X16] = aom_##type##_predictor_4x16; \
476 p[TX_16X4] = aom_##type##_predictor_16x4; \
477 p[TX_8X32] = aom_##type##_predictor_8x32; \
478 p[TX_32X8] = aom_##type##_predictor_32x8; \
479 p[TX_16X64] = aom_##type##_predictor_16x64; \
480 p[TX_64X16] = aom_##type##_predictor_64x16;
481
482 #define INIT_NO_4X4(p, type) \
483 p[TX_8X8] = aom_##type##_predictor_8x8; \
484 p[TX_16X16] = aom_##type##_predictor_16x16; \
485 p[TX_32X32] = aom_##type##_predictor_32x32; \
486 p[TX_64X64] = aom_##type##_predictor_64x64; \
487 INIT_RECTANGULAR(p, type)
488
489 #define INIT_ALL_SIZES(p, type) \
490 p[TX_4X4] = aom_##type##_predictor_4x4; \
491 INIT_NO_4X4(p, type)
492
493 INIT_ALL_SIZES(pred[V_PRED], v)
494 INIT_ALL_SIZES(pred[H_PRED], h)
495 INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
496 INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
497 INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
498 INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
499 INIT_ALL_SIZES(dc_pred[0][0], dc_128)
500 INIT_ALL_SIZES(dc_pred[0][1], dc_top)
501 INIT_ALL_SIZES(dc_pred[1][0], dc_left)
502 INIT_ALL_SIZES(dc_pred[1][1], dc)
503 #if CONFIG_AV1_HIGHBITDEPTH
504 INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
505 INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
506 INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
507 INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
508 INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
509 INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
510 INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
511 INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
512 INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
513 INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
514 #endif
515 #undef intra_pred_allsizes
516 }
517
518 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)519 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
520 const uint8_t *above, const uint8_t *left,
521 int upsample_above, int dx, int dy) {
522 int r, c, x, base, shift, val;
523
524 (void)left;
525 (void)dy;
526 assert(dy == 1);
527 assert(dx > 0);
528
529 const int max_base_x = ((bw + bh) - 1) << upsample_above;
530 const int frac_bits = 6 - upsample_above;
531 const int base_inc = 1 << upsample_above;
532 x = dx;
533 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
534 base = x >> frac_bits;
535 shift = ((x << upsample_above) & 0x3F) >> 1;
536
537 if (base >= max_base_x) {
538 for (int i = r; i < bh; ++i) {
539 memset(dst, above[max_base_x], bw * sizeof(dst[0]));
540 dst += stride;
541 }
542 return;
543 }
544
545 for (c = 0; c < bw; ++c, base += base_inc) {
546 if (base < max_base_x) {
547 val = above[base] * (32 - shift) + above[base + 1] * shift;
548 dst[c] = ROUND_POWER_OF_TWO(val, 5);
549 } else {
550 dst[c] = above[max_base_x];
551 }
552 }
553 }
554 }
555
556 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)557 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
558 const uint8_t *above, const uint8_t *left,
559 int upsample_above, int upsample_left, int dx,
560 int dy) {
561 assert(dx > 0);
562 assert(dy > 0);
563
564 const int min_base_x = -(1 << upsample_above);
565 const int min_base_y = -(1 << upsample_left);
566 (void)min_base_y;
567 const int frac_bits_x = 6 - upsample_above;
568 const int frac_bits_y = 6 - upsample_left;
569
570 for (int r = 0; r < bh; ++r) {
571 for (int c = 0; c < bw; ++c) {
572 int val;
573 int y = r + 1;
574 int x = (c << 6) - y * dx;
575 const int base_x = x >> frac_bits_x;
576 if (base_x >= min_base_x) {
577 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
578 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
579 val = ROUND_POWER_OF_TWO(val, 5);
580 } else {
581 x = c + 1;
582 y = (r << 6) - x * dy;
583 const int base_y = y >> frac_bits_y;
584 assert(base_y >= min_base_y);
585 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
586 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
587 val = ROUND_POWER_OF_TWO(val, 5);
588 }
589 dst[c] = val;
590 }
591 dst += stride;
592 }
593 }
594
595 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)596 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
597 const uint8_t *above, const uint8_t *left,
598 int upsample_left, int dx, int dy) {
599 int r, c, y, base, shift, val;
600
601 (void)above;
602 (void)dx;
603
604 assert(dx == 1);
605 assert(dy > 0);
606
607 const int max_base_y = (bw + bh - 1) << upsample_left;
608 const int frac_bits = 6 - upsample_left;
609 const int base_inc = 1 << upsample_left;
610 y = dy;
611 for (c = 0; c < bw; ++c, y += dy) {
612 base = y >> frac_bits;
613 shift = ((y << upsample_left) & 0x3F) >> 1;
614
615 for (r = 0; r < bh; ++r, base += base_inc) {
616 if (base < max_base_y) {
617 val = left[base] * (32 - shift) + left[base + 1] * shift;
618 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
619 } else {
620 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
621 break;
622 }
623 }
624 }
625 }
626
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)627 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
628 const uint8_t *above, const uint8_t *left,
629 int upsample_above, int upsample_left, int angle) {
630 const int dx = av1_get_dx(angle);
631 const int dy = av1_get_dy(angle);
632 const int bw = tx_size_wide[tx_size];
633 const int bh = tx_size_high[tx_size];
634 assert(angle > 0 && angle < 270);
635
636 if (angle > 0 && angle < 90) {
637 av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
638 dy);
639 } else if (angle > 90 && angle < 180) {
640 av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
641 upsample_left, dx, dy);
642 } else if (angle > 180 && angle < 270) {
643 av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
644 dy);
645 } else if (angle == 90) {
646 pred[V_PRED][tx_size](dst, stride, above, left);
647 } else if (angle == 180) {
648 pred[H_PRED][tx_size](dst, stride, above, left);
649 }
650 }
651
652 #if CONFIG_AV1_HIGHBITDEPTH
653 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)654 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
655 int bh, const uint16_t *above,
656 const uint16_t *left, int upsample_above,
657 int dx, int dy, int bd) {
658 int r, c, x, base, shift, val;
659
660 (void)left;
661 (void)dy;
662 (void)bd;
663 assert(dy == 1);
664 assert(dx > 0);
665
666 const int max_base_x = ((bw + bh) - 1) << upsample_above;
667 const int frac_bits = 6 - upsample_above;
668 const int base_inc = 1 << upsample_above;
669 x = dx;
670 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
671 base = x >> frac_bits;
672 shift = ((x << upsample_above) & 0x3F) >> 1;
673
674 if (base >= max_base_x) {
675 for (int i = r; i < bh; ++i) {
676 aom_memset16(dst, above[max_base_x], bw);
677 dst += stride;
678 }
679 return;
680 }
681
682 for (c = 0; c < bw; ++c, base += base_inc) {
683 if (base < max_base_x) {
684 val = above[base] * (32 - shift) + above[base + 1] * shift;
685 dst[c] = ROUND_POWER_OF_TWO(val, 5);
686 } else {
687 dst[c] = above[max_base_x];
688 }
689 }
690 }
691 }
692
693 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)694 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
695 int bh, const uint16_t *above,
696 const uint16_t *left, int upsample_above,
697 int upsample_left, int dx, int dy, int bd) {
698 (void)bd;
699 assert(dx > 0);
700 assert(dy > 0);
701
702 const int min_base_x = -(1 << upsample_above);
703 const int min_base_y = -(1 << upsample_left);
704 (void)min_base_y;
705 const int frac_bits_x = 6 - upsample_above;
706 const int frac_bits_y = 6 - upsample_left;
707
708 for (int r = 0; r < bh; ++r) {
709 for (int c = 0; c < bw; ++c) {
710 int val;
711 int y = r + 1;
712 int x = (c << 6) - y * dx;
713 const int base_x = x >> frac_bits_x;
714 if (base_x >= min_base_x) {
715 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
716 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
717 val = ROUND_POWER_OF_TWO(val, 5);
718 } else {
719 x = c + 1;
720 y = (r << 6) - x * dy;
721 const int base_y = y >> frac_bits_y;
722 assert(base_y >= min_base_y);
723 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
724 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
725 val = ROUND_POWER_OF_TWO(val, 5);
726 }
727 dst[c] = val;
728 }
729 dst += stride;
730 }
731 }
732
733 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)734 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
735 int bh, const uint16_t *above,
736 const uint16_t *left, int upsample_left,
737 int dx, int dy, int bd) {
738 int r, c, y, base, shift, val;
739
740 (void)above;
741 (void)dx;
742 (void)bd;
743 assert(dx == 1);
744 assert(dy > 0);
745
746 const int max_base_y = (bw + bh - 1) << upsample_left;
747 const int frac_bits = 6 - upsample_left;
748 const int base_inc = 1 << upsample_left;
749 y = dy;
750 for (c = 0; c < bw; ++c, y += dy) {
751 base = y >> frac_bits;
752 shift = ((y << upsample_left) & 0x3F) >> 1;
753
754 for (r = 0; r < bh; ++r, base += base_inc) {
755 if (base < max_base_y) {
756 val = left[base] * (32 - shift) + left[base + 1] * shift;
757 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
758 } else {
759 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
760 break;
761 }
762 }
763 }
764 }
765
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)766 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
767 TX_SIZE tx_size, const uint16_t *above,
768 const uint16_t *left, int upsample_above,
769 int upsample_left, int angle, int bd) {
770 const int dx = av1_get_dx(angle);
771 const int dy = av1_get_dy(angle);
772 const int bw = tx_size_wide[tx_size];
773 const int bh = tx_size_high[tx_size];
774 assert(angle > 0 && angle < 270);
775
776 if (angle > 0 && angle < 90) {
777 av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
778 upsample_above, dx, dy, bd);
779 } else if (angle > 90 && angle < 180) {
780 av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
781 upsample_above, upsample_left, dx, dy, bd);
782 } else if (angle > 180 && angle < 270) {
783 av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
784 dx, dy, bd);
785 } else if (angle == 90) {
786 pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
787 } else if (angle == 180) {
788 pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
789 }
790 }
791 #endif // CONFIG_AV1_HIGHBITDEPTH
792
793 DECLARE_ALIGNED(16, const int8_t,
794 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
795 {
796 { -6, 10, 0, 0, 0, 12, 0, 0 },
797 { -5, 2, 10, 0, 0, 9, 0, 0 },
798 { -3, 1, 1, 10, 0, 7, 0, 0 },
799 { -3, 1, 1, 2, 10, 5, 0, 0 },
800 { -4, 6, 0, 0, 0, 2, 12, 0 },
801 { -3, 2, 6, 0, 0, 2, 9, 0 },
802 { -3, 2, 2, 6, 0, 2, 7, 0 },
803 { -3, 1, 2, 2, 6, 3, 5, 0 },
804 },
805 {
806 { -10, 16, 0, 0, 0, 10, 0, 0 },
807 { -6, 0, 16, 0, 0, 6, 0, 0 },
808 { -4, 0, 0, 16, 0, 4, 0, 0 },
809 { -2, 0, 0, 0, 16, 2, 0, 0 },
810 { -10, 16, 0, 0, 0, 0, 10, 0 },
811 { -6, 0, 16, 0, 0, 0, 6, 0 },
812 { -4, 0, 0, 16, 0, 0, 4, 0 },
813 { -2, 0, 0, 0, 16, 0, 2, 0 },
814 },
815 {
816 { -8, 8, 0, 0, 0, 16, 0, 0 },
817 { -8, 0, 8, 0, 0, 16, 0, 0 },
818 { -8, 0, 0, 8, 0, 16, 0, 0 },
819 { -8, 0, 0, 0, 8, 16, 0, 0 },
820 { -4, 4, 0, 0, 0, 0, 16, 0 },
821 { -4, 0, 4, 0, 0, 0, 16, 0 },
822 { -4, 0, 0, 4, 0, 0, 16, 0 },
823 { -4, 0, 0, 0, 4, 0, 16, 0 },
824 },
825 {
826 { -2, 8, 0, 0, 0, 10, 0, 0 },
827 { -1, 3, 8, 0, 0, 6, 0, 0 },
828 { -1, 2, 3, 8, 0, 4, 0, 0 },
829 { 0, 1, 2, 3, 8, 2, 0, 0 },
830 { -1, 4, 0, 0, 0, 3, 10, 0 },
831 { -1, 3, 4, 0, 0, 4, 6, 0 },
832 { -1, 2, 3, 4, 0, 4, 4, 0 },
833 { -1, 2, 2, 3, 4, 3, 3, 0 },
834 },
835 {
836 { -12, 14, 0, 0, 0, 14, 0, 0 },
837 { -10, 0, 14, 0, 0, 12, 0, 0 },
838 { -9, 0, 0, 14, 0, 11, 0, 0 },
839 { -8, 0, 0, 0, 14, 10, 0, 0 },
840 { -10, 12, 0, 0, 0, 0, 14, 0 },
841 { -9, 1, 12, 0, 0, 0, 12, 0 },
842 { -8, 0, 0, 12, 0, 1, 11, 0 },
843 { -7, 0, 0, 1, 12, 1, 9, 0 },
844 },
845 };
846
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)847 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
848 TX_SIZE tx_size, const uint8_t *above,
849 const uint8_t *left, int mode) {
850 int r, c;
851 uint8_t buffer[33][33];
852 const int bw = tx_size_wide[tx_size];
853 const int bh = tx_size_high[tx_size];
854
855 assert(bw <= 32 && bh <= 32);
856
857 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
858 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
859
860 for (r = 1; r < bh + 1; r += 2)
861 for (c = 1; c < bw + 1; c += 4) {
862 const uint8_t p0 = buffer[r - 1][c - 1];
863 const uint8_t p1 = buffer[r - 1][c];
864 const uint8_t p2 = buffer[r - 1][c + 1];
865 const uint8_t p3 = buffer[r - 1][c + 2];
866 const uint8_t p4 = buffer[r - 1][c + 3];
867 const uint8_t p5 = buffer[r][c - 1];
868 const uint8_t p6 = buffer[r + 1][c - 1];
869 for (int k = 0; k < 8; ++k) {
870 int r_offset = k >> 2;
871 int c_offset = k & 0x03;
872 int pr = av1_filter_intra_taps[mode][k][0] * p0 +
873 av1_filter_intra_taps[mode][k][1] * p1 +
874 av1_filter_intra_taps[mode][k][2] * p2 +
875 av1_filter_intra_taps[mode][k][3] * p3 +
876 av1_filter_intra_taps[mode][k][4] * p4 +
877 av1_filter_intra_taps[mode][k][5] * p5 +
878 av1_filter_intra_taps[mode][k][6] * p6;
879 // Section 7.11.2.3 specifies the right-hand side of the assignment as
880 // Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
881 // Since Clip1() clips a negative value to 0, it is safe to replace
882 // Round2Signed() with Round2().
883 buffer[r + r_offset][c + c_offset] =
884 clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
885 }
886 }
887
888 for (r = 0; r < bh; ++r) {
889 memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
890 dst += stride;
891 }
892 }
893
894 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)895 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
896 TX_SIZE tx_size,
897 const uint16_t *above,
898 const uint16_t *left, int mode,
899 int bd) {
900 int r, c;
901 uint16_t buffer[33][33];
902 const int bw = tx_size_wide[tx_size];
903 const int bh = tx_size_high[tx_size];
904
905 assert(bw <= 32 && bh <= 32);
906
907 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
908 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
909
910 for (r = 1; r < bh + 1; r += 2)
911 for (c = 1; c < bw + 1; c += 4) {
912 const uint16_t p0 = buffer[r - 1][c - 1];
913 const uint16_t p1 = buffer[r - 1][c];
914 const uint16_t p2 = buffer[r - 1][c + 1];
915 const uint16_t p3 = buffer[r - 1][c + 2];
916 const uint16_t p4 = buffer[r - 1][c + 3];
917 const uint16_t p5 = buffer[r][c - 1];
918 const uint16_t p6 = buffer[r + 1][c - 1];
919 for (int k = 0; k < 8; ++k) {
920 int r_offset = k >> 2;
921 int c_offset = k & 0x03;
922 int pr = av1_filter_intra_taps[mode][k][0] * p0 +
923 av1_filter_intra_taps[mode][k][1] * p1 +
924 av1_filter_intra_taps[mode][k][2] * p2 +
925 av1_filter_intra_taps[mode][k][3] * p3 +
926 av1_filter_intra_taps[mode][k][4] * p4 +
927 av1_filter_intra_taps[mode][k][5] * p5 +
928 av1_filter_intra_taps[mode][k][6] * p6;
929 // Section 7.11.2.3 specifies the right-hand side of the assignment as
930 // Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
931 // Since Clip1() clips a negative value to 0, it is safe to replace
932 // Round2Signed() with Round2().
933 buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
934 ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
935 }
936 }
937
938 for (r = 0; r < bh; ++r) {
939 memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
940 dst += stride;
941 }
942 }
943 #endif // CONFIG_AV1_HIGHBITDEPTH
944
is_smooth(const MB_MODE_INFO * mbmi,int plane)945 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
946 if (plane == 0) {
947 const PREDICTION_MODE mode = mbmi->mode;
948 return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
949 mode == SMOOTH_H_PRED);
950 } else {
951 // uv_mode is not set for inter blocks, so need to explicitly
952 // detect that case.
953 if (is_inter_block(mbmi)) return 0;
954
955 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
956 return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
957 uv_mode == UV_SMOOTH_H_PRED);
958 }
959 }
960
get_intra_edge_filter_type(const MACROBLOCKD * xd,int plane)961 static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
962 int ab_sm, le_sm;
963
964 if (plane == 0) {
965 const MB_MODE_INFO *ab = xd->above_mbmi;
966 const MB_MODE_INFO *le = xd->left_mbmi;
967 ab_sm = ab ? is_smooth(ab, plane) : 0;
968 le_sm = le ? is_smooth(le, plane) : 0;
969 } else {
970 const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
971 const MB_MODE_INFO *le = xd->chroma_left_mbmi;
972 ab_sm = ab ? is_smooth(ab, plane) : 0;
973 le_sm = le ? is_smooth(le, plane) : 0;
974 }
975
976 return (ab_sm || le_sm) ? 1 : 0;
977 }
978
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)979 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
980 const int d = abs(delta);
981 int strength = 0;
982
983 const int blk_wh = bs0 + bs1;
984 if (type == 0) {
985 if (blk_wh <= 8) {
986 if (d >= 56) strength = 1;
987 } else if (blk_wh <= 12) {
988 if (d >= 40) strength = 1;
989 } else if (blk_wh <= 16) {
990 if (d >= 40) strength = 1;
991 } else if (blk_wh <= 24) {
992 if (d >= 8) strength = 1;
993 if (d >= 16) strength = 2;
994 if (d >= 32) strength = 3;
995 } else if (blk_wh <= 32) {
996 if (d >= 1) strength = 1;
997 if (d >= 4) strength = 2;
998 if (d >= 32) strength = 3;
999 } else {
1000 if (d >= 1) strength = 3;
1001 }
1002 } else {
1003 if (blk_wh <= 8) {
1004 if (d >= 40) strength = 1;
1005 if (d >= 64) strength = 2;
1006 } else if (blk_wh <= 16) {
1007 if (d >= 20) strength = 1;
1008 if (d >= 48) strength = 2;
1009 } else if (blk_wh <= 24) {
1010 if (d >= 4) strength = 3;
1011 } else {
1012 if (d >= 1) strength = 3;
1013 }
1014 }
1015 return strength;
1016 }
1017
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1018 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1019 if (!strength) return;
1020
1021 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1022 { 0, 5, 6, 5, 0 },
1023 { 2, 4, 4, 4, 2 } };
1024 const int filt = strength - 1;
1025 uint8_t edge[129];
1026
1027 memcpy(edge, p, sz * sizeof(*p));
1028 for (int i = 1; i < sz; i++) {
1029 int s = 0;
1030 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1031 int k = i - 2 + j;
1032 k = (k < 0) ? 0 : k;
1033 k = (k > sz - 1) ? sz - 1 : k;
1034 s += edge[k] * kernel[filt][j];
1035 }
1036 s = (s + 8) >> 4;
1037 p[i] = s;
1038 }
1039 }
1040
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1041 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1042 const int kernel[3] = { 5, 6, 5 };
1043
1044 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1045 (p_above[0] * kernel[2]);
1046 s = (s + 8) >> 4;
1047 p_above[-1] = s;
1048 p_left[-1] = s;
1049 }
1050
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1051 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1052 if (!strength) return;
1053
1054 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1055 { 0, 5, 6, 5, 0 },
1056 { 2, 4, 4, 4, 2 } };
1057 const int filt = strength - 1;
1058 uint16_t edge[129];
1059
1060 memcpy(edge, p, sz * sizeof(*p));
1061 for (int i = 1; i < sz; i++) {
1062 int s = 0;
1063 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1064 int k = i - 2 + j;
1065 k = (k < 0) ? 0 : k;
1066 k = (k > sz - 1) ? sz - 1 : k;
1067 s += edge[k] * kernel[filt][j];
1068 }
1069 s = (s + 8) >> 4;
1070 p[i] = s;
1071 }
1072 }
1073
1074 #if CONFIG_AV1_HIGHBITDEPTH
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1075 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1076 const int kernel[3] = { 5, 6, 5 };
1077
1078 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1079 (p_above[0] * kernel[2]);
1080 s = (s + 8) >> 4;
1081 p_above[-1] = s;
1082 p_left[-1] = s;
1083 }
1084 #endif
1085
av1_upsample_intra_edge_c(uint8_t * p,int sz)1086 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1087 // interpolate half-sample positions
1088 assert(sz <= MAX_UPSAMPLE_SZ);
1089
1090 uint8_t in[MAX_UPSAMPLE_SZ + 3];
1091 // copy p[-1..(sz-1)] and extend first and last samples
1092 in[0] = p[-1];
1093 in[1] = p[-1];
1094 for (int i = 0; i < sz; i++) {
1095 in[i + 2] = p[i];
1096 }
1097 in[sz + 2] = p[sz - 1];
1098
1099 // interpolate half-sample edge positions
1100 p[-2] = in[0];
1101 for (int i = 0; i < sz; i++) {
1102 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1103 s = clip_pixel((s + 8) >> 4);
1104 p[2 * i - 1] = s;
1105 p[2 * i] = in[i + 2];
1106 }
1107 }
1108
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1109 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1110 // interpolate half-sample positions
1111 assert(sz <= MAX_UPSAMPLE_SZ);
1112
1113 uint16_t in[MAX_UPSAMPLE_SZ + 3];
1114 // copy p[-1..(sz-1)] and extend first and last samples
1115 in[0] = p[-1];
1116 in[1] = p[-1];
1117 for (int i = 0; i < sz; i++) {
1118 in[i + 2] = p[i];
1119 }
1120 in[sz + 2] = p[sz - 1];
1121
1122 // interpolate half-sample edge positions
1123 p[-2] = in[0];
1124 for (int i = 0; i < sz; i++) {
1125 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1126 s = (s + 8) >> 4;
1127 s = clip_pixel_highbd(s, bd);
1128 p[2 * i - 1] = s;
1129 p[2 * i] = in[i + 2];
1130 }
1131 }
1132 #if CONFIG_AV1_HIGHBITDEPTH
build_intra_predictors_high(const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int p_angle,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type,int bit_depth)1133 static void build_intra_predictors_high(
1134 const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1135 PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1136 TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1137 int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1138 int bit_depth) {
1139 int i;
1140 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1141 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1142 DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1143 DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1144 uint16_t *const above_row = above_data + 16;
1145 uint16_t *const left_col = left_data + 16;
1146 const int txwpx = tx_size_wide[tx_size];
1147 const int txhpx = tx_size_high[tx_size];
1148 int need_left = extend_modes[mode] & NEED_LEFT;
1149 int need_above = extend_modes[mode] & NEED_ABOVE;
1150 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1151 const uint16_t *above_ref = ref - ref_stride;
1152 const uint16_t *left_ref = ref - 1;
1153 const int is_dr_mode = av1_is_directional_mode(mode);
1154 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1155 int base = 128 << (bit_depth - 8);
1156 // The left_data, above_data buffers must be zeroed to fix some intermittent
1157 // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1158 // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1159 // seen to be the potential reason for this issue.
1160 aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1161 aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1162
1163 // The default values if ref pixels are not available:
1164 // base base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1165 // base+1 A B .. Y Z
1166 // base+1 C D .. W X
1167 // base+1 E F .. U V
1168 // base+1 G H .. S T T T T T
1169
1170 if (is_dr_mode) {
1171 if (p_angle <= 90)
1172 need_above = 1, need_left = 0, need_above_left = 1;
1173 else if (p_angle < 180)
1174 need_above = 1, need_left = 1, need_above_left = 1;
1175 else
1176 need_above = 0, need_left = 1, need_above_left = 1;
1177 }
1178 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1179
1180 assert(n_top_px >= 0);
1181 assert(n_topright_px >= -1);
1182 assert(n_left_px >= 0);
1183 assert(n_bottomleft_px >= -1);
1184
1185 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1186 int val;
1187 if (need_left) {
1188 val = (n_top_px > 0) ? above_ref[0] : base + 1;
1189 } else {
1190 val = (n_left_px > 0) ? left_ref[0] : base - 1;
1191 }
1192 for (i = 0; i < txhpx; ++i) {
1193 aom_memset16(dst, val, txwpx);
1194 dst += dst_stride;
1195 }
1196 return;
1197 }
1198
1199 // NEED_LEFT
1200 if (need_left) {
1201 const int num_left_pixels_needed =
1202 txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1203 i = 0;
1204 if (n_left_px > 0) {
1205 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1206 if (n_bottomleft_px > 0) {
1207 assert(i == txhpx);
1208 for (; i < txhpx + n_bottomleft_px; i++)
1209 left_col[i] = left_ref[i * ref_stride];
1210 }
1211 if (i < num_left_pixels_needed)
1212 aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1213 } else if (n_top_px > 0) {
1214 aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1215 }
1216 }
1217
1218 // NEED_ABOVE
1219 if (need_above) {
1220 const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1221 if (n_top_px > 0) {
1222 memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1223 i = n_top_px;
1224 if (n_topright_px > 0) {
1225 assert(n_top_px == txwpx);
1226 memcpy(above_row + txwpx, above_ref + txwpx,
1227 n_topright_px * sizeof(above_ref[0]));
1228 i += n_topright_px;
1229 }
1230 if (i < num_top_pixels_needed)
1231 aom_memset16(&above_row[i], above_row[i - 1],
1232 num_top_pixels_needed - i);
1233 } else if (n_left_px > 0) {
1234 aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1235 }
1236 }
1237
1238 if (need_above_left) {
1239 if (n_top_px > 0 && n_left_px > 0) {
1240 above_row[-1] = above_ref[-1];
1241 } else if (n_top_px > 0) {
1242 above_row[-1] = above_ref[0];
1243 } else if (n_left_px > 0) {
1244 above_row[-1] = left_ref[0];
1245 } else {
1246 above_row[-1] = base;
1247 }
1248 left_col[-1] = above_row[-1];
1249 }
1250
1251 if (use_filter_intra) {
1252 highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1253 filter_intra_mode, bit_depth);
1254 return;
1255 }
1256
1257 if (is_dr_mode) {
1258 int upsample_above = 0;
1259 int upsample_left = 0;
1260 if (!disable_edge_filter) {
1261 const int need_right = p_angle < 90;
1262 const int need_bottom = p_angle > 180;
1263 if (p_angle != 90 && p_angle != 180) {
1264 const int ab_le = need_above_left ? 1 : 0;
1265 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1266 filter_intra_edge_corner_high(above_row, left_col);
1267 }
1268 if (need_above && n_top_px > 0) {
1269 const int strength = intra_edge_filter_strength(
1270 txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1271 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1272 av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1273 }
1274 if (need_left && n_left_px > 0) {
1275 const int strength = intra_edge_filter_strength(
1276 txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1277 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1278 av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1279 }
1280 }
1281 upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1282 intra_edge_filter_type);
1283 if (need_above && upsample_above) {
1284 const int n_px = txwpx + (need_right ? txhpx : 0);
1285 av1_upsample_intra_edge_high(above_row, n_px, bit_depth);
1286 }
1287 upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1288 intra_edge_filter_type);
1289 if (need_left && upsample_left) {
1290 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1291 av1_upsample_intra_edge_high(left_col, n_px, bit_depth);
1292 }
1293 }
1294 highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1295 upsample_above, upsample_left, p_angle, bit_depth);
1296 return;
1297 }
1298
1299 // predict
1300 if (mode == DC_PRED) {
1301 dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1302 dst, dst_stride, above_row, left_col, bit_depth);
1303 } else {
1304 pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1305 }
1306 }
1307 #endif // CONFIG_AV1_HIGHBITDEPTH
1308
build_intra_predictors(const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int p_angle,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type)1309 static void build_intra_predictors(
1310 const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1311 PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1312 TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1313 int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1314 int i;
1315 const uint8_t *above_ref = ref - ref_stride;
1316 const uint8_t *left_ref = ref - 1;
1317 DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1318 DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1319 uint8_t *const above_row = above_data + 16;
1320 uint8_t *const left_col = left_data + 16;
1321 const int txwpx = tx_size_wide[tx_size];
1322 const int txhpx = tx_size_high[tx_size];
1323 int need_left = extend_modes[mode] & NEED_LEFT;
1324 int need_above = extend_modes[mode] & NEED_ABOVE;
1325 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1326 const int is_dr_mode = av1_is_directional_mode(mode);
1327 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1328 // The left_data, above_data buffers must be zeroed to fix some intermittent
1329 // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1330 // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1331 // be the potential reason for this issue.
1332 memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1333 memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1334
1335 // The default values if ref pixels are not available:
1336 // 128 127 127 .. 127 127 127 127 127 127
1337 // 129 A B .. Y Z
1338 // 129 C D .. W X
1339 // 129 E F .. U V
1340 // 129 G H .. S T T T T T
1341 // ..
1342
1343 if (is_dr_mode) {
1344 if (p_angle <= 90)
1345 need_above = 1, need_left = 0, need_above_left = 1;
1346 else if (p_angle < 180)
1347 need_above = 1, need_left = 1, need_above_left = 1;
1348 else
1349 need_above = 0, need_left = 1, need_above_left = 1;
1350 }
1351 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1352
1353 assert(n_top_px >= 0);
1354 assert(n_topright_px >= -1);
1355 assert(n_left_px >= 0);
1356 assert(n_bottomleft_px >= -1);
1357
1358 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1359 int val;
1360 if (need_left) {
1361 val = (n_top_px > 0) ? above_ref[0] : 129;
1362 } else {
1363 val = (n_left_px > 0) ? left_ref[0] : 127;
1364 }
1365 for (i = 0; i < txhpx; ++i) {
1366 memset(dst, val, txwpx);
1367 dst += dst_stride;
1368 }
1369 return;
1370 }
1371
1372 // NEED_LEFT
1373 if (need_left) {
1374 const int num_left_pixels_needed =
1375 txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1376 i = 0;
1377 if (n_left_px > 0) {
1378 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1379 if (n_bottomleft_px > 0) {
1380 assert(i == txhpx);
1381 for (; i < txhpx + n_bottomleft_px; i++)
1382 left_col[i] = left_ref[i * ref_stride];
1383 }
1384 if (i < num_left_pixels_needed)
1385 memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1386 } else if (n_top_px > 0) {
1387 memset(left_col, above_ref[0], num_left_pixels_needed);
1388 }
1389 }
1390
1391 // NEED_ABOVE
1392 if (need_above) {
1393 const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1394 if (n_top_px > 0) {
1395 memcpy(above_row, above_ref, n_top_px);
1396 i = n_top_px;
1397 if (n_topright_px > 0) {
1398 assert(n_top_px == txwpx);
1399 memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1400 i += n_topright_px;
1401 }
1402 if (i < num_top_pixels_needed)
1403 memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1404 } else if (n_left_px > 0) {
1405 memset(above_row, left_ref[0], num_top_pixels_needed);
1406 }
1407 }
1408
1409 if (need_above_left) {
1410 if (n_top_px > 0 && n_left_px > 0) {
1411 above_row[-1] = above_ref[-1];
1412 } else if (n_top_px > 0) {
1413 above_row[-1] = above_ref[0];
1414 } else if (n_left_px > 0) {
1415 above_row[-1] = left_ref[0];
1416 } else {
1417 above_row[-1] = 128;
1418 }
1419 left_col[-1] = above_row[-1];
1420 }
1421
1422 if (use_filter_intra) {
1423 av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1424 filter_intra_mode);
1425 return;
1426 }
1427
1428 if (is_dr_mode) {
1429 int upsample_above = 0;
1430 int upsample_left = 0;
1431 if (!disable_edge_filter) {
1432 const int need_right = p_angle < 90;
1433 const int need_bottom = p_angle > 180;
1434 if (p_angle != 90 && p_angle != 180) {
1435 const int ab_le = need_above_left ? 1 : 0;
1436 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1437 filter_intra_edge_corner(above_row, left_col);
1438 }
1439 if (need_above && n_top_px > 0) {
1440 const int strength = intra_edge_filter_strength(
1441 txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1442 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1443 av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1444 }
1445 if (need_left && n_left_px > 0) {
1446 const int strength = intra_edge_filter_strength(
1447 txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1448 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1449 av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1450 }
1451 }
1452 upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1453 intra_edge_filter_type);
1454 if (need_above && upsample_above) {
1455 const int n_px = txwpx + (need_right ? txhpx : 0);
1456 av1_upsample_intra_edge(above_row, n_px);
1457 }
1458 upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1459 intra_edge_filter_type);
1460 if (need_left && upsample_left) {
1461 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1462 av1_upsample_intra_edge(left_col, n_px);
1463 }
1464 }
1465 dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1466 upsample_left, p_angle);
1467 return;
1468 }
1469
1470 // predict
1471 if (mode == DC_PRED) {
1472 dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1473 left_col);
1474 } else {
1475 pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1476 }
1477 }
1478
scale_chroma_bsize(BLOCK_SIZE bsize,int subsampling_x,int subsampling_y)1479 static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1480 int subsampling_y) {
1481 assert(subsampling_x >= 0 && subsampling_x < 2);
1482 assert(subsampling_y >= 0 && subsampling_y < 2);
1483 BLOCK_SIZE bs = bsize;
1484 switch (bsize) {
1485 case BLOCK_4X4:
1486 if (subsampling_x == 1 && subsampling_y == 1)
1487 bs = BLOCK_8X8;
1488 else if (subsampling_x == 1)
1489 bs = BLOCK_8X4;
1490 else if (subsampling_y == 1)
1491 bs = BLOCK_4X8;
1492 break;
1493 case BLOCK_4X8:
1494 if (subsampling_x == 1 && subsampling_y == 1)
1495 bs = BLOCK_8X8;
1496 else if (subsampling_x == 1)
1497 bs = BLOCK_8X8;
1498 else if (subsampling_y == 1)
1499 bs = BLOCK_4X8;
1500 break;
1501 case BLOCK_8X4:
1502 if (subsampling_x == 1 && subsampling_y == 1)
1503 bs = BLOCK_8X8;
1504 else if (subsampling_x == 1)
1505 bs = BLOCK_8X4;
1506 else if (subsampling_y == 1)
1507 bs = BLOCK_8X8;
1508 break;
1509 case BLOCK_4X16:
1510 if (subsampling_x == 1 && subsampling_y == 1)
1511 bs = BLOCK_8X16;
1512 else if (subsampling_x == 1)
1513 bs = BLOCK_8X16;
1514 else if (subsampling_y == 1)
1515 bs = BLOCK_4X16;
1516 break;
1517 case BLOCK_16X4:
1518 if (subsampling_x == 1 && subsampling_y == 1)
1519 bs = BLOCK_16X8;
1520 else if (subsampling_x == 1)
1521 bs = BLOCK_16X4;
1522 else if (subsampling_y == 1)
1523 bs = BLOCK_16X8;
1524 break;
1525 default: break;
1526 }
1527 return bs;
1528 }
1529
av1_predict_intra_block(const MACROBLOCKD * xd,BLOCK_SIZE sb_size,int enable_intra_edge_filter,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1530 void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1531 int enable_intra_edge_filter, int wpx, int hpx,
1532 TX_SIZE tx_size, PREDICTION_MODE mode,
1533 int angle_delta, int use_palette,
1534 FILTER_INTRA_MODE filter_intra_mode,
1535 const uint8_t *ref, int ref_stride, uint8_t *dst,
1536 int dst_stride, int col_off, int row_off,
1537 int plane) {
1538 const MB_MODE_INFO *const mbmi = xd->mi[0];
1539 const int txwpx = tx_size_wide[tx_size];
1540 const int txhpx = tx_size_high[tx_size];
1541 const int x = col_off << MI_SIZE_LOG2;
1542 const int y = row_off << MI_SIZE_LOG2;
1543
1544 if (use_palette) {
1545 int r, c;
1546 const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1547 xd->color_index_map_offset[plane != 0];
1548 const uint16_t *const palette =
1549 mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1550 if (is_cur_buf_hbd(xd)) {
1551 uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1552 for (r = 0; r < txhpx; ++r) {
1553 for (c = 0; c < txwpx; ++c) {
1554 dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1555 }
1556 }
1557 } else {
1558 for (r = 0; r < txhpx; ++r) {
1559 for (c = 0; c < txwpx; ++c) {
1560 dst[r * dst_stride + c] =
1561 (uint8_t)palette[map[(r + y) * wpx + c + x]];
1562 }
1563 }
1564 }
1565 return;
1566 }
1567
1568 const struct macroblockd_plane *const pd = &xd->plane[plane];
1569 const int txw = tx_size_wide_unit[tx_size];
1570 const int txh = tx_size_high_unit[tx_size];
1571 const int ss_x = pd->subsampling_x;
1572 const int ss_y = pd->subsampling_y;
1573 const int have_top =
1574 row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1575 const int have_left =
1576 col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1577 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1578 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1579
1580 // Distance between the right edge of this prediction block to
1581 // the frame right edge
1582 const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1583 // Distance between the bottom edge of this prediction block to
1584 // the frame bottom edge
1585 const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1586 const int right_available =
1587 mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1588 const int bottom_available =
1589 (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1590
1591 const PARTITION_TYPE partition = mbmi->partition;
1592
1593 BLOCK_SIZE bsize = mbmi->bsize;
1594 // force 4x4 chroma component block size.
1595 if (ss_x || ss_y) {
1596 bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1597 }
1598
1599 const int is_dr_mode = av1_is_directional_mode(mode);
1600 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1601 int p_angle = 0;
1602 int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1603 int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1604
1605 if (use_filter_intra) {
1606 need_top_right = 0;
1607 need_bottom_left = 0;
1608 }
1609 if (is_dr_mode) {
1610 p_angle = mode_to_angle_map[mode] + angle_delta;
1611 need_top_right = p_angle < 90;
1612 need_bottom_left = p_angle > 180;
1613 }
1614
1615 // Possible states for have_top_right(TR) and have_bottom_left(BL)
1616 // -1 : TR and BL are not needed
1617 // 0 : TR and BL are needed but not available
1618 // > 0 : TR and BL are needed and pixels are available
1619 const int have_top_right =
1620 need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1621 right_available, partition, tx_size,
1622 row_off, col_off, ss_x, ss_y)
1623 : -1;
1624 const int have_bottom_left =
1625 need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1626 bottom_available, have_left, partition,
1627 tx_size, row_off, col_off, ss_x, ss_y)
1628 : -1;
1629
1630 const int disable_edge_filter = !enable_intra_edge_filter;
1631 const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1632 #if CONFIG_AV1_HIGHBITDEPTH
1633 if (is_cur_buf_hbd(xd)) {
1634 build_intra_predictors_high(
1635 ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1636 tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1637 have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right,
1638 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1639 have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left,
1640 intra_edge_filter_type, xd->bd);
1641 return;
1642 }
1643 #endif
1644 build_intra_predictors(
1645 ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1646 tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1647 have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right,
1648 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1649 have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left,
1650 intra_edge_filter_type);
1651 }
1652
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1653 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1654 int plane, int blk_col, int blk_row,
1655 TX_SIZE tx_size) {
1656 const MB_MODE_INFO *const mbmi = xd->mi[0];
1657 struct macroblockd_plane *const pd = &xd->plane[plane];
1658 const int dst_stride = pd->dst.stride;
1659 uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1660 const PREDICTION_MODE mode =
1661 (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1662 const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1663 const FILTER_INTRA_MODE filter_intra_mode =
1664 (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1665 ? mbmi->filter_intra_mode_info.filter_intra_mode
1666 : FILTER_INTRA_MODES;
1667 const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1668 const SequenceHeader *seq_params = cm->seq_params;
1669
1670 if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1671 #if CONFIG_DEBUG
1672 assert(is_cfl_allowed(xd));
1673 const BLOCK_SIZE plane_bsize =
1674 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1675 (void)plane_bsize;
1676 assert(plane_bsize < BLOCK_SIZES_ALL);
1677 if (!xd->lossless[mbmi->segment_id]) {
1678 assert(blk_col == 0);
1679 assert(blk_row == 0);
1680 assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1681 assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1682 }
1683 #endif
1684 CFL_CTX *const cfl = &xd->cfl;
1685 CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1686 if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1687 av1_predict_intra_block(xd, seq_params->sb_size,
1688 seq_params->enable_intra_edge_filter, pd->width,
1689 pd->height, tx_size, mode, angle_delta,
1690 use_palette, filter_intra_mode, dst, dst_stride,
1691 dst, dst_stride, blk_col, blk_row, plane);
1692 if (cfl->use_dc_pred_cache) {
1693 cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1694 cfl->dc_pred_is_cached[pred_plane] = 1;
1695 }
1696 } else {
1697 cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1698 }
1699 cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1700 return;
1701 }
1702 av1_predict_intra_block(
1703 xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1704 pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1705 dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1706 }
1707
av1_init_intra_predictors(void)1708 void av1_init_intra_predictors(void) {
1709 aom_once(init_intra_predictors_internal);
1710 }
1711