1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "av1/common/av1_common_int.h"
23 #include "av1/common/cfl.h"
24 #include "av1/common/reconintra.h"
25
26 enum {
27 NEED_LEFT = 1 << 1,
28 NEED_ABOVE = 1 << 2,
29 NEED_ABOVERIGHT = 1 << 3,
30 NEED_ABOVELEFT = 1 << 4,
31 NEED_BOTTOMLEFT = 1 << 5,
32 };
33
34 #define INTRA_EDGE_FILT 3
35 #define INTRA_EDGE_TAPS 5
36 #define MAX_UPSAMPLE_SZ 16
37 #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
38
39 static const uint8_t extend_modes[INTRA_MODES] = {
40 NEED_ABOVE | NEED_LEFT, // DC
41 NEED_ABOVE, // V
42 NEED_LEFT, // H
43 NEED_ABOVE | NEED_ABOVERIGHT, // D45
44 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
45 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
46 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
47 NEED_LEFT | NEED_BOTTOMLEFT, // D203
48 NEED_ABOVE | NEED_ABOVERIGHT, // D67
49 NEED_LEFT | NEED_ABOVE, // SMOOTH
50 NEED_LEFT | NEED_ABOVE, // SMOOTH_V
51 NEED_LEFT | NEED_ABOVE, // SMOOTH_H
52 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH
53 };
54
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 // . . . .
61 // . . . .
62 // . . o .
63 // . . . .
64 static uint8_t has_tr_4x4[128] = {
65 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92 255, 255, 119, 119, 127, 127, 119, 119,
93 255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99 255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113 127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120 255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123 15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129 // 4X4
130 has_tr_4x4,
131 // 4X8, 8X4, 8X8
132 has_tr_4x8, has_tr_8x4, has_tr_8x8,
133 // 8X16, 16X8, 16X16
134 has_tr_8x16, has_tr_16x8, has_tr_16x16,
135 // 16X32, 32X16, 32X32
136 has_tr_16x32, has_tr_32x16, has_tr_32x32,
137 // 32X64, 64X32, 64X64
138 has_tr_32x64, has_tr_64x32, has_tr_64x64,
139 // 64x128, 128x64, 128x128
140 has_tr_64x128, has_tr_128x64, has_tr_128x128,
141 // 4x16, 16x4, 8x32
142 has_tr_4x16, has_tr_16x4, has_tr_8x32,
143 // 32x8, 16x64, 64x16
144 has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146
147 static uint8_t has_tr_vert_8x8[32] = {
148 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152 255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167 // 4X4
168 NULL,
169 // 4X8, 8X4, 8X8
170 has_tr_4x8, NULL, has_tr_vert_8x8,
171 // 8X16, 16X8, 16X16
172 has_tr_8x16, NULL, has_tr_vert_16x16,
173 // 16X32, 32X16, 32X32
174 has_tr_16x32, NULL, has_tr_vert_32x32,
175 // 32X64, 64X32, 64X64
176 has_tr_32x64, NULL, has_tr_vert_64x64,
177 // 64x128, 128x64, 128x128
178 has_tr_64x128, NULL, has_tr_128x128
179 };
180
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182 BLOCK_SIZE bsize) {
183 const uint8_t *ret = NULL;
184 // If this is a mixed vertical partition, look up bsize in orders_vert.
185 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186 assert(bsize < BLOCK_SIZES);
187 ret = has_tr_vert_tables[bsize];
188 } else {
189 ret = has_tr_tables[bsize];
190 }
191 assert(ret);
192 return ret;
193 }
194
has_top_right(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
196 int mi_col, int top_available, int right_available,
197 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198 int col_off, int ss_x, int ss_y) {
199 if (!top_available || !right_available) return 0;
200
201 const int bw_unit = mi_size_wide[bsize];
202 const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203 const int top_right_count_unit = tx_size_wide_unit[txsz];
204
205 if (row_off > 0) { // Just need to check if enough pixels on the right.
206 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207 // Special case: For 128x128 blocks, the transform unit whose
208 // top-right corner is at the center of the block does in fact have
209 // pixels available at its top-right corner.
210 if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211 col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212 return 1;
213 }
214 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215 const int col_off_64 = col_off % plane_bw_unit_64;
216 return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217 }
218 return col_off + top_right_count_unit < plane_bw_unit;
219 } else {
220 // All top-right pixels are in the block above, which is already available.
221 if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222
223 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225 const int sb_mi_size = mi_size_high[sb_size];
226 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228
229 // Top row of superblock: so top-right pixels are in the top and/or
230 // top-right superblocks, both of which are already available.
231 if (blk_row_in_sb == 0) return 1;
232
233 // Rightmost column of superblock (and not the top row): so top-right pixels
234 // fall in the right superblock, which is not available yet.
235 if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236 return 0;
237 }
238
239 // General case (neither top row nor rightmost column): check if the
240 // top-right block is coded before the current block.
241 const int this_blk_index =
242 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243 blk_col_in_sb + 0;
244 const int idx1 = this_blk_index / 8;
245 const int idx2 = this_blk_index % 8;
246 const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247 return (has_tr_table[idx1] >> idx2) & 1;
248 }
249 }
250
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85,
255 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17,
256 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84,
257 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1,
259 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85,
260 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285 84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305 0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308 238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314 // 4X4
315 has_bl_4x4,
316 // 4X8, 8X4, 8X8
317 has_bl_4x8, has_bl_8x4, has_bl_8x8,
318 // 8X16, 16X8, 16X16
319 has_bl_8x16, has_bl_16x8, has_bl_16x16,
320 // 16X32, 32X16, 32X32
321 has_bl_16x32, has_bl_32x16, has_bl_32x32,
322 // 32X64, 64X32, 64X64
323 has_bl_32x64, has_bl_64x32, has_bl_64x64,
324 // 64x128, 128x64, 128x128
325 has_bl_64x128, has_bl_128x64, has_bl_128x128,
326 // 4x16, 16x4, 8x32
327 has_bl_4x16, has_bl_16x4, has_bl_8x32,
328 // 32x8, 16x64, 64x16
329 has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331
332 static uint8_t has_bl_vert_8x8[32] = {
333 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337 254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352 // 4X4
353 NULL,
354 // 4X8, 8X4, 8X8
355 has_bl_4x8, NULL, has_bl_vert_8x8,
356 // 8X16, 16X8, 16X16
357 has_bl_8x16, NULL, has_bl_vert_16x16,
358 // 16X32, 32X16, 32X32
359 has_bl_16x32, NULL, has_bl_vert_32x32,
360 // 32X64, 64X32, 64X64
361 has_bl_32x64, NULL, has_bl_vert_64x64,
362 // 64x128, 128x64, 128x128
363 has_bl_64x128, NULL, has_bl_128x128
364 };
365
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367 BLOCK_SIZE bsize) {
368 const uint8_t *ret = NULL;
369 // If this is a mixed vertical partition, look up bsize in orders_vert.
370 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371 assert(bsize < BLOCK_SIZES);
372 ret = has_bl_vert_tables[bsize];
373 } else {
374 ret = has_bl_tables[bsize];
375 }
376 assert(ret);
377 return ret;
378 }
379
has_bottom_left(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
381 int mi_col, int bottom_available, int left_available,
382 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383 int col_off, int ss_x, int ss_y) {
384 if (!bottom_available || !left_available) return 0;
385
386 // Special case for 128x* blocks, when col_off is half the block width.
387 // This is needed because 128x* superblocks are divided into 64x* blocks in
388 // raster order
389 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391 const int col_off_64 = col_off % plane_bw_unit_64;
392 if (col_off_64 == 0) {
393 // We are at the left edge of top-right or bottom-right 64x* block.
394 const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395 const int row_off_64 = row_off % plane_bh_unit_64;
396 const int plane_bh_unit =
397 AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398 // Check if all bottom-left pixels are in the left 64x* block (which is
399 // already coded).
400 return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401 }
402 }
403
404 if (col_off > 0) {
405 // Bottom-left pixels are in the bottom-left block, which is not available.
406 return 0;
407 } else {
408 const int bh_unit = mi_size_high[bsize];
409 const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410 const int bottom_left_count_unit = tx_size_high_unit[txsz];
411
412 // All bottom-left pixels are in the left block, which is already available.
413 if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414
415 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417 const int sb_mi_size = mi_size_high[sb_size];
418 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420
421 // Leftmost column of superblock: so bottom-left pixels maybe in the left
422 // and/or bottom-left superblocks. But only the left superblock is
423 // available, so check if all required pixels fall in that superblock.
424 if (blk_col_in_sb == 0) {
425 const int blk_start_row_off =
426 blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427 ss_y;
428 const int row_off_in_sb = blk_start_row_off + row_off;
429 const int sb_height_unit = sb_mi_size >> ss_y;
430 return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431 }
432
433 // Bottom row of superblock (and not the leftmost column): so bottom-left
434 // pixels fall in the bottom superblock, which is not available yet.
435 if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436
437 // General case (neither leftmost column nor bottom row): check if the
438 // bottom-left block is coded before the current block.
439 const int this_blk_index =
440 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441 blk_col_in_sb + 0;
442 const int idx1 = this_blk_index / 8;
443 const int idx2 = this_blk_index % 8;
444 const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445 return (has_bl_table[idx1] >> idx2) & 1;
446 }
447 }
448
449 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450 const uint8_t *above, const uint8_t *left);
451
452 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454
455 #if CONFIG_AV1_HIGHBITDEPTH
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457 const uint16_t *above, const uint16_t *left,
458 int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461 #endif
462
init_intra_predictors_internal(void)463 static void init_intra_predictors_internal(void) {
464 assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465
466 #if CONFIG_REALTIME_ONLY
467 #define INIT_RECTANGULAR(p, type) \
468 p[TX_4X8] = aom_##type##_predictor_4x8; \
469 p[TX_8X4] = aom_##type##_predictor_8x4; \
470 p[TX_8X16] = aom_##type##_predictor_8x16; \
471 p[TX_16X8] = aom_##type##_predictor_16x8; \
472 p[TX_16X32] = aom_##type##_predictor_16x32; \
473 p[TX_32X16] = aom_##type##_predictor_32x16; \
474 p[TX_32X64] = aom_##type##_predictor_32x64; \
475 p[TX_64X32] = aom_##type##_predictor_64x32;
476 #else
477 #define INIT_RECTANGULAR(p, type) \
478 p[TX_4X8] = aom_##type##_predictor_4x8; \
479 p[TX_8X4] = aom_##type##_predictor_8x4; \
480 p[TX_8X16] = aom_##type##_predictor_8x16; \
481 p[TX_16X8] = aom_##type##_predictor_16x8; \
482 p[TX_16X32] = aom_##type##_predictor_16x32; \
483 p[TX_32X16] = aom_##type##_predictor_32x16; \
484 p[TX_32X64] = aom_##type##_predictor_32x64; \
485 p[TX_64X32] = aom_##type##_predictor_64x32; \
486 p[TX_4X16] = aom_##type##_predictor_4x16; \
487 p[TX_16X4] = aom_##type##_predictor_16x4; \
488 p[TX_8X32] = aom_##type##_predictor_8x32; \
489 p[TX_32X8] = aom_##type##_predictor_32x8; \
490 p[TX_16X64] = aom_##type##_predictor_16x64; \
491 p[TX_64X16] = aom_##type##_predictor_64x16;
492 #endif
493
494 #define INIT_NO_4X4(p, type) \
495 p[TX_8X8] = aom_##type##_predictor_8x8; \
496 p[TX_16X16] = aom_##type##_predictor_16x16; \
497 p[TX_32X32] = aom_##type##_predictor_32x32; \
498 p[TX_64X64] = aom_##type##_predictor_64x64; \
499 INIT_RECTANGULAR(p, type)
500
501 #define INIT_ALL_SIZES(p, type) \
502 p[TX_4X4] = aom_##type##_predictor_4x4; \
503 INIT_NO_4X4(p, type)
504
505 INIT_ALL_SIZES(pred[V_PRED], v);
506 INIT_ALL_SIZES(pred[H_PRED], h);
507 INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
508 INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
509 INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
510 INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
511 INIT_ALL_SIZES(dc_pred[0][0], dc_128);
512 INIT_ALL_SIZES(dc_pred[0][1], dc_top);
513 INIT_ALL_SIZES(dc_pred[1][0], dc_left);
514 INIT_ALL_SIZES(dc_pred[1][1], dc);
515 #if CONFIG_AV1_HIGHBITDEPTH
516 INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
517 INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
518 INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
519 INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
520 INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
521 INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
522 INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
523 INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
524 INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
525 INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
526 #endif
527 #undef intra_pred_allsizes
528 }
529
530 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)531 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
532 const uint8_t *above, const uint8_t *left,
533 int upsample_above, int dx, int dy) {
534 int r, c, x, base, shift, val;
535
536 (void)left;
537 (void)dy;
538 assert(dy == 1);
539 assert(dx > 0);
540
541 const int max_base_x = ((bw + bh) - 1) << upsample_above;
542 const int frac_bits = 6 - upsample_above;
543 const int base_inc = 1 << upsample_above;
544 x = dx;
545 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
546 base = x >> frac_bits;
547 shift = ((x << upsample_above) & 0x3F) >> 1;
548
549 if (base >= max_base_x) {
550 for (int i = r; i < bh; ++i) {
551 memset(dst, above[max_base_x], bw * sizeof(dst[0]));
552 dst += stride;
553 }
554 return;
555 }
556
557 for (c = 0; c < bw; ++c, base += base_inc) {
558 if (base < max_base_x) {
559 val = above[base] * (32 - shift) + above[base + 1] * shift;
560 dst[c] = ROUND_POWER_OF_TWO(val, 5);
561 } else {
562 dst[c] = above[max_base_x];
563 }
564 }
565 }
566 }
567
568 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)569 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
570 const uint8_t *above, const uint8_t *left,
571 int upsample_above, int upsample_left, int dx,
572 int dy) {
573 assert(dx > 0);
574 assert(dy > 0);
575
576 const int min_base_x = -(1 << upsample_above);
577 const int min_base_y = -(1 << upsample_left);
578 (void)min_base_y;
579 const int frac_bits_x = 6 - upsample_above;
580 const int frac_bits_y = 6 - upsample_left;
581
582 for (int r = 0; r < bh; ++r) {
583 for (int c = 0; c < bw; ++c) {
584 int val;
585 int y = r + 1;
586 int x = (c << 6) - y * dx;
587 const int base_x = x >> frac_bits_x;
588 if (base_x >= min_base_x) {
589 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
590 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
591 val = ROUND_POWER_OF_TWO(val, 5);
592 } else {
593 x = c + 1;
594 y = (r << 6) - x * dy;
595 const int base_y = y >> frac_bits_y;
596 assert(base_y >= min_base_y);
597 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
598 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
599 val = ROUND_POWER_OF_TWO(val, 5);
600 }
601 dst[c] = val;
602 }
603 dst += stride;
604 }
605 }
606
607 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)608 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
609 const uint8_t *above, const uint8_t *left,
610 int upsample_left, int dx, int dy) {
611 int r, c, y, base, shift, val;
612
613 (void)above;
614 (void)dx;
615
616 assert(dx == 1);
617 assert(dy > 0);
618
619 const int max_base_y = (bw + bh - 1) << upsample_left;
620 const int frac_bits = 6 - upsample_left;
621 const int base_inc = 1 << upsample_left;
622 y = dy;
623 for (c = 0; c < bw; ++c, y += dy) {
624 base = y >> frac_bits;
625 shift = ((y << upsample_left) & 0x3F) >> 1;
626
627 for (r = 0; r < bh; ++r, base += base_inc) {
628 if (base < max_base_y) {
629 val = left[base] * (32 - shift) + left[base + 1] * shift;
630 dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
631 } else {
632 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
633 break;
634 }
635 }
636 }
637 }
638
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)639 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
640 const uint8_t *above, const uint8_t *left,
641 int upsample_above, int upsample_left, int angle) {
642 const int dx = av1_get_dx(angle);
643 const int dy = av1_get_dy(angle);
644 const int bw = tx_size_wide[tx_size];
645 const int bh = tx_size_high[tx_size];
646 assert(angle > 0 && angle < 270);
647
648 if (angle > 0 && angle < 90) {
649 av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
650 dy);
651 } else if (angle > 90 && angle < 180) {
652 av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
653 upsample_left, dx, dy);
654 } else if (angle > 180 && angle < 270) {
655 av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
656 dy);
657 } else if (angle == 90) {
658 pred[V_PRED][tx_size](dst, stride, above, left);
659 } else if (angle == 180) {
660 pred[H_PRED][tx_size](dst, stride, above, left);
661 }
662 }
663
664 #if CONFIG_AV1_HIGHBITDEPTH
665 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)666 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
667 int bh, const uint16_t *above,
668 const uint16_t *left, int upsample_above,
669 int dx, int dy, int bd) {
670 int r, c, x, base, shift, val;
671
672 (void)left;
673 (void)dy;
674 (void)bd;
675 assert(dy == 1);
676 assert(dx > 0);
677
678 const int max_base_x = ((bw + bh) - 1) << upsample_above;
679 const int frac_bits = 6 - upsample_above;
680 const int base_inc = 1 << upsample_above;
681 x = dx;
682 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
683 base = x >> frac_bits;
684 shift = ((x << upsample_above) & 0x3F) >> 1;
685
686 if (base >= max_base_x) {
687 for (int i = r; i < bh; ++i) {
688 aom_memset16(dst, above[max_base_x], bw);
689 dst += stride;
690 }
691 return;
692 }
693
694 for (c = 0; c < bw; ++c, base += base_inc) {
695 if (base < max_base_x) {
696 val = above[base] * (32 - shift) + above[base + 1] * shift;
697 dst[c] = ROUND_POWER_OF_TWO(val, 5);
698 } else {
699 dst[c] = above[max_base_x];
700 }
701 }
702 }
703 }
704
705 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)706 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
707 int bh, const uint16_t *above,
708 const uint16_t *left, int upsample_above,
709 int upsample_left, int dx, int dy, int bd) {
710 (void)bd;
711 assert(dx > 0);
712 assert(dy > 0);
713
714 const int min_base_x = -(1 << upsample_above);
715 const int min_base_y = -(1 << upsample_left);
716 (void)min_base_y;
717 const int frac_bits_x = 6 - upsample_above;
718 const int frac_bits_y = 6 - upsample_left;
719
720 for (int r = 0; r < bh; ++r) {
721 for (int c = 0; c < bw; ++c) {
722 int val;
723 int y = r + 1;
724 int x = (c << 6) - y * dx;
725 const int base_x = x >> frac_bits_x;
726 if (base_x >= min_base_x) {
727 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
728 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
729 val = ROUND_POWER_OF_TWO(val, 5);
730 } else {
731 x = c + 1;
732 y = (r << 6) - x * dy;
733 const int base_y = y >> frac_bits_y;
734 assert(base_y >= min_base_y);
735 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
736 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
737 val = ROUND_POWER_OF_TWO(val, 5);
738 }
739 dst[c] = val;
740 }
741 dst += stride;
742 }
743 }
744
745 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)746 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
747 int bh, const uint16_t *above,
748 const uint16_t *left, int upsample_left,
749 int dx, int dy, int bd) {
750 int r, c, y, base, shift, val;
751
752 (void)above;
753 (void)dx;
754 (void)bd;
755 assert(dx == 1);
756 assert(dy > 0);
757
758 const int max_base_y = (bw + bh - 1) << upsample_left;
759 const int frac_bits = 6 - upsample_left;
760 const int base_inc = 1 << upsample_left;
761 y = dy;
762 for (c = 0; c < bw; ++c, y += dy) {
763 base = y >> frac_bits;
764 shift = ((y << upsample_left) & 0x3F) >> 1;
765
766 for (r = 0; r < bh; ++r, base += base_inc) {
767 if (base < max_base_y) {
768 val = left[base] * (32 - shift) + left[base + 1] * shift;
769 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
770 } else {
771 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
772 break;
773 }
774 }
775 }
776 }
777
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)778 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
779 TX_SIZE tx_size, const uint16_t *above,
780 const uint16_t *left, int upsample_above,
781 int upsample_left, int angle, int bd) {
782 const int dx = av1_get_dx(angle);
783 const int dy = av1_get_dy(angle);
784 const int bw = tx_size_wide[tx_size];
785 const int bh = tx_size_high[tx_size];
786 assert(angle > 0 && angle < 270);
787
788 if (angle > 0 && angle < 90) {
789 av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
790 upsample_above, dx, dy, bd);
791 } else if (angle > 90 && angle < 180) {
792 av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
793 upsample_above, upsample_left, dx, dy, bd);
794 } else if (angle > 180 && angle < 270) {
795 av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
796 dx, dy, bd);
797 } else if (angle == 90) {
798 pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
799 } else if (angle == 180) {
800 pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
801 }
802 }
803 #endif // CONFIG_AV1_HIGHBITDEPTH
804
805 DECLARE_ALIGNED(16, const int8_t,
806 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
807 {
808 { -6, 10, 0, 0, 0, 12, 0, 0 },
809 { -5, 2, 10, 0, 0, 9, 0, 0 },
810 { -3, 1, 1, 10, 0, 7, 0, 0 },
811 { -3, 1, 1, 2, 10, 5, 0, 0 },
812 { -4, 6, 0, 0, 0, 2, 12, 0 },
813 { -3, 2, 6, 0, 0, 2, 9, 0 },
814 { -3, 2, 2, 6, 0, 2, 7, 0 },
815 { -3, 1, 2, 2, 6, 3, 5, 0 },
816 },
817 {
818 { -10, 16, 0, 0, 0, 10, 0, 0 },
819 { -6, 0, 16, 0, 0, 6, 0, 0 },
820 { -4, 0, 0, 16, 0, 4, 0, 0 },
821 { -2, 0, 0, 0, 16, 2, 0, 0 },
822 { -10, 16, 0, 0, 0, 0, 10, 0 },
823 { -6, 0, 16, 0, 0, 0, 6, 0 },
824 { -4, 0, 0, 16, 0, 0, 4, 0 },
825 { -2, 0, 0, 0, 16, 0, 2, 0 },
826 },
827 {
828 { -8, 8, 0, 0, 0, 16, 0, 0 },
829 { -8, 0, 8, 0, 0, 16, 0, 0 },
830 { -8, 0, 0, 8, 0, 16, 0, 0 },
831 { -8, 0, 0, 0, 8, 16, 0, 0 },
832 { -4, 4, 0, 0, 0, 0, 16, 0 },
833 { -4, 0, 4, 0, 0, 0, 16, 0 },
834 { -4, 0, 0, 4, 0, 0, 16, 0 },
835 { -4, 0, 0, 0, 4, 0, 16, 0 },
836 },
837 {
838 { -2, 8, 0, 0, 0, 10, 0, 0 },
839 { -1, 3, 8, 0, 0, 6, 0, 0 },
840 { -1, 2, 3, 8, 0, 4, 0, 0 },
841 { 0, 1, 2, 3, 8, 2, 0, 0 },
842 { -1, 4, 0, 0, 0, 3, 10, 0 },
843 { -1, 3, 4, 0, 0, 4, 6, 0 },
844 { -1, 2, 3, 4, 0, 4, 4, 0 },
845 { -1, 2, 2, 3, 4, 3, 3, 0 },
846 },
847 {
848 { -12, 14, 0, 0, 0, 14, 0, 0 },
849 { -10, 0, 14, 0, 0, 12, 0, 0 },
850 { -9, 0, 0, 14, 0, 11, 0, 0 },
851 { -8, 0, 0, 0, 14, 10, 0, 0 },
852 { -10, 12, 0, 0, 0, 0, 14, 0 },
853 { -9, 1, 12, 0, 0, 0, 12, 0 },
854 { -8, 0, 0, 12, 0, 1, 11, 0 },
855 { -7, 0, 0, 1, 12, 1, 9, 0 },
856 },
857 };
858
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)859 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
860 TX_SIZE tx_size, const uint8_t *above,
861 const uint8_t *left, int mode) {
862 int r, c;
863 uint8_t buffer[33][33];
864 const int bw = tx_size_wide[tx_size];
865 const int bh = tx_size_high[tx_size];
866
867 assert(bw <= 32 && bh <= 32);
868
869 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
870 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
871
872 for (r = 1; r < bh + 1; r += 2)
873 for (c = 1; c < bw + 1; c += 4) {
874 const uint8_t p0 = buffer[r - 1][c - 1];
875 const uint8_t p1 = buffer[r - 1][c];
876 const uint8_t p2 = buffer[r - 1][c + 1];
877 const uint8_t p3 = buffer[r - 1][c + 2];
878 const uint8_t p4 = buffer[r - 1][c + 3];
879 const uint8_t p5 = buffer[r][c - 1];
880 const uint8_t p6 = buffer[r + 1][c - 1];
881 for (int k = 0; k < 8; ++k) {
882 int r_offset = k >> 2;
883 int c_offset = k & 0x03;
884 int pr = av1_filter_intra_taps[mode][k][0] * p0 +
885 av1_filter_intra_taps[mode][k][1] * p1 +
886 av1_filter_intra_taps[mode][k][2] * p2 +
887 av1_filter_intra_taps[mode][k][3] * p3 +
888 av1_filter_intra_taps[mode][k][4] * p4 +
889 av1_filter_intra_taps[mode][k][5] * p5 +
890 av1_filter_intra_taps[mode][k][6] * p6;
891 // Section 7.11.2.3 specifies the right-hand side of the assignment as
892 // Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
893 // Since Clip1() clips a negative value to 0, it is safe to replace
894 // Round2Signed() with Round2().
895 buffer[r + r_offset][c + c_offset] =
896 clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
897 }
898 }
899
900 for (r = 0; r < bh; ++r) {
901 memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
902 dst += stride;
903 }
904 }
905
906 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)907 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
908 TX_SIZE tx_size,
909 const uint16_t *above,
910 const uint16_t *left, int mode,
911 int bd) {
912 int r, c;
913 uint16_t buffer[33][33];
914 const int bw = tx_size_wide[tx_size];
915 const int bh = tx_size_high[tx_size];
916
917 assert(bw <= 32 && bh <= 32);
918
919 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
920 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
921
922 for (r = 1; r < bh + 1; r += 2)
923 for (c = 1; c < bw + 1; c += 4) {
924 const uint16_t p0 = buffer[r - 1][c - 1];
925 const uint16_t p1 = buffer[r - 1][c];
926 const uint16_t p2 = buffer[r - 1][c + 1];
927 const uint16_t p3 = buffer[r - 1][c + 2];
928 const uint16_t p4 = buffer[r - 1][c + 3];
929 const uint16_t p5 = buffer[r][c - 1];
930 const uint16_t p6 = buffer[r + 1][c - 1];
931 for (int k = 0; k < 8; ++k) {
932 int r_offset = k >> 2;
933 int c_offset = k & 0x03;
934 int pr = av1_filter_intra_taps[mode][k][0] * p0 +
935 av1_filter_intra_taps[mode][k][1] * p1 +
936 av1_filter_intra_taps[mode][k][2] * p2 +
937 av1_filter_intra_taps[mode][k][3] * p3 +
938 av1_filter_intra_taps[mode][k][4] * p4 +
939 av1_filter_intra_taps[mode][k][5] * p5 +
940 av1_filter_intra_taps[mode][k][6] * p6;
941 // Section 7.11.2.3 specifies the right-hand side of the assignment as
942 // Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
943 // Since Clip1() clips a negative value to 0, it is safe to replace
944 // Round2Signed() with Round2().
945 buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
946 ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
947 }
948 }
949
950 for (r = 0; r < bh; ++r) {
951 memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
952 dst += stride;
953 }
954 }
955 #endif // CONFIG_AV1_HIGHBITDEPTH
956
is_smooth(const MB_MODE_INFO * mbmi,int plane)957 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
958 if (plane == 0) {
959 const PREDICTION_MODE mode = mbmi->mode;
960 return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
961 mode == SMOOTH_H_PRED);
962 } else {
963 // uv_mode is not set for inter blocks, so need to explicitly
964 // detect that case.
965 if (is_inter_block(mbmi)) return 0;
966
967 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
968 return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
969 uv_mode == UV_SMOOTH_H_PRED);
970 }
971 }
972
get_intra_edge_filter_type(const MACROBLOCKD * xd,int plane)973 static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
974 int ab_sm, le_sm;
975
976 if (plane == 0) {
977 const MB_MODE_INFO *ab = xd->above_mbmi;
978 const MB_MODE_INFO *le = xd->left_mbmi;
979 ab_sm = ab ? is_smooth(ab, plane) : 0;
980 le_sm = le ? is_smooth(le, plane) : 0;
981 } else {
982 const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
983 const MB_MODE_INFO *le = xd->chroma_left_mbmi;
984 ab_sm = ab ? is_smooth(ab, plane) : 0;
985 le_sm = le ? is_smooth(le, plane) : 0;
986 }
987
988 return (ab_sm || le_sm) ? 1 : 0;
989 }
990
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)991 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
992 const int d = abs(delta);
993 int strength = 0;
994
995 const int blk_wh = bs0 + bs1;
996 if (type == 0) {
997 if (blk_wh <= 8) {
998 if (d >= 56) strength = 1;
999 } else if (blk_wh <= 12) {
1000 if (d >= 40) strength = 1;
1001 } else if (blk_wh <= 16) {
1002 if (d >= 40) strength = 1;
1003 } else if (blk_wh <= 24) {
1004 if (d >= 8) strength = 1;
1005 if (d >= 16) strength = 2;
1006 if (d >= 32) strength = 3;
1007 } else if (blk_wh <= 32) {
1008 if (d >= 1) strength = 1;
1009 if (d >= 4) strength = 2;
1010 if (d >= 32) strength = 3;
1011 } else {
1012 if (d >= 1) strength = 3;
1013 }
1014 } else {
1015 if (blk_wh <= 8) {
1016 if (d >= 40) strength = 1;
1017 if (d >= 64) strength = 2;
1018 } else if (blk_wh <= 16) {
1019 if (d >= 20) strength = 1;
1020 if (d >= 48) strength = 2;
1021 } else if (blk_wh <= 24) {
1022 if (d >= 4) strength = 3;
1023 } else {
1024 if (d >= 1) strength = 3;
1025 }
1026 }
1027 return strength;
1028 }
1029
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1030 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1031 if (!strength) return;
1032
1033 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1034 { 0, 5, 6, 5, 0 },
1035 { 2, 4, 4, 4, 2 } };
1036 const int filt = strength - 1;
1037 uint8_t edge[129];
1038
1039 memcpy(edge, p, sz * sizeof(*p));
1040 for (int i = 1; i < sz; i++) {
1041 int s = 0;
1042 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1043 int k = i - 2 + j;
1044 k = (k < 0) ? 0 : k;
1045 k = (k > sz - 1) ? sz - 1 : k;
1046 s += edge[k] * kernel[filt][j];
1047 }
1048 s = (s + 8) >> 4;
1049 p[i] = s;
1050 }
1051 }
1052
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1053 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1054 const int kernel[3] = { 5, 6, 5 };
1055
1056 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1057 (p_above[0] * kernel[2]);
1058 s = (s + 8) >> 4;
1059 p_above[-1] = s;
1060 p_left[-1] = s;
1061 }
1062
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1063 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1064 if (!strength) return;
1065
1066 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1067 { 0, 5, 6, 5, 0 },
1068 { 2, 4, 4, 4, 2 } };
1069 const int filt = strength - 1;
1070 uint16_t edge[129];
1071
1072 memcpy(edge, p, sz * sizeof(*p));
1073 for (int i = 1; i < sz; i++) {
1074 int s = 0;
1075 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1076 int k = i - 2 + j;
1077 k = (k < 0) ? 0 : k;
1078 k = (k > sz - 1) ? sz - 1 : k;
1079 s += edge[k] * kernel[filt][j];
1080 }
1081 s = (s + 8) >> 4;
1082 p[i] = s;
1083 }
1084 }
1085
1086 #if CONFIG_AV1_HIGHBITDEPTH
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1087 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1088 const int kernel[3] = { 5, 6, 5 };
1089
1090 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1091 (p_above[0] * kernel[2]);
1092 s = (s + 8) >> 4;
1093 p_above[-1] = s;
1094 p_left[-1] = s;
1095 }
1096 #endif
1097
av1_upsample_intra_edge_c(uint8_t * p,int sz)1098 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1099 // interpolate half-sample positions
1100 assert(sz <= MAX_UPSAMPLE_SZ);
1101
1102 uint8_t in[MAX_UPSAMPLE_SZ + 3];
1103 // copy p[-1..(sz-1)] and extend first and last samples
1104 in[0] = p[-1];
1105 in[1] = p[-1];
1106 for (int i = 0; i < sz; i++) {
1107 in[i + 2] = p[i];
1108 }
1109 in[sz + 2] = p[sz - 1];
1110
1111 // interpolate half-sample edge positions
1112 p[-2] = in[0];
1113 for (int i = 0; i < sz; i++) {
1114 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1115 s = clip_pixel((s + 8) >> 4);
1116 p[2 * i - 1] = s;
1117 p[2 * i] = in[i + 2];
1118 }
1119 }
1120
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1121 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1122 // interpolate half-sample positions
1123 assert(sz <= MAX_UPSAMPLE_SZ);
1124
1125 uint16_t in[MAX_UPSAMPLE_SZ + 3];
1126 // copy p[-1..(sz-1)] and extend first and last samples
1127 in[0] = p[-1];
1128 in[1] = p[-1];
1129 for (int i = 0; i < sz; i++) {
1130 in[i + 2] = p[i];
1131 }
1132 in[sz + 2] = p[sz - 1];
1133
1134 // interpolate half-sample edge positions
1135 p[-2] = in[0];
1136 for (int i = 0; i < sz; i++) {
1137 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1138 s = (s + 8) >> 4;
1139 s = clip_pixel_highbd(s, bd);
1140 p[2 * i - 1] = s;
1141 p[2 * i] = in[i + 2];
1142 }
1143 }
1144 #if CONFIG_AV1_HIGHBITDEPTH
build_intra_predictors_high(const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type,int bit_depth)1145 static void build_intra_predictors_high(
1146 const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1147 PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1148 TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1149 int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1150 int bit_depth) {
1151 int i;
1152 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1153 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1154 DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1155 DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1156 uint16_t *const above_row = above_data + 16;
1157 uint16_t *const left_col = left_data + 16;
1158 const int txwpx = tx_size_wide[tx_size];
1159 const int txhpx = tx_size_high[tx_size];
1160 int need_left = extend_modes[mode] & NEED_LEFT;
1161 int need_above = extend_modes[mode] & NEED_ABOVE;
1162 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1163 const uint16_t *above_ref = ref - ref_stride;
1164 const uint16_t *left_ref = ref - 1;
1165 int p_angle = 0;
1166 const int is_dr_mode = av1_is_directional_mode(mode);
1167 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1168 int base = 128 << (bit_depth - 8);
1169 // The left_data, above_data buffers must be zeroed to fix some intermittent
1170 // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1171 // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1172 // seen to be the potential reason for this issue.
1173 aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1174 aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1175
1176 // The default values if ref pixels are not available:
1177 // base base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1178 // base+1 A B .. Y Z
1179 // base+1 C D .. W X
1180 // base+1 E F .. U V
1181 // base+1 G H .. S T T T T T
1182
1183 if (is_dr_mode) {
1184 p_angle = mode_to_angle_map[mode] + angle_delta;
1185 if (p_angle <= 90)
1186 need_above = 1, need_left = 0, need_above_left = 1;
1187 else if (p_angle < 180)
1188 need_above = 1, need_left = 1, need_above_left = 1;
1189 else
1190 need_above = 0, need_left = 1, need_above_left = 1;
1191 }
1192 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1193
1194 assert(n_top_px >= 0);
1195 assert(n_topright_px >= 0);
1196 assert(n_left_px >= 0);
1197 assert(n_bottomleft_px >= 0);
1198
1199 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1200 int val;
1201 if (need_left) {
1202 val = (n_top_px > 0) ? above_ref[0] : base + 1;
1203 } else {
1204 val = (n_left_px > 0) ? left_ref[0] : base - 1;
1205 }
1206 for (i = 0; i < txhpx; ++i) {
1207 aom_memset16(dst, val, txwpx);
1208 dst += dst_stride;
1209 }
1210 return;
1211 }
1212
1213 // NEED_LEFT
1214 if (need_left) {
1215 int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1216 if (use_filter_intra) need_bottom = 0;
1217 if (is_dr_mode) need_bottom = p_angle > 180;
1218 const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1219 i = 0;
1220 if (n_left_px > 0) {
1221 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1222 if (need_bottom && n_bottomleft_px > 0) {
1223 assert(i == txhpx);
1224 for (; i < txhpx + n_bottomleft_px; i++)
1225 left_col[i] = left_ref[i * ref_stride];
1226 }
1227 if (i < num_left_pixels_needed)
1228 aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1229 } else if (n_top_px > 0) {
1230 aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1231 }
1232 }
1233
1234 // NEED_ABOVE
1235 if (need_above) {
1236 int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1237 if (use_filter_intra) need_right = 0;
1238 if (is_dr_mode) need_right = p_angle < 90;
1239 const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1240 if (n_top_px > 0) {
1241 memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1242 i = n_top_px;
1243 if (need_right && n_topright_px > 0) {
1244 assert(n_top_px == txwpx);
1245 memcpy(above_row + txwpx, above_ref + txwpx,
1246 n_topright_px * sizeof(above_ref[0]));
1247 i += n_topright_px;
1248 }
1249 if (i < num_top_pixels_needed)
1250 aom_memset16(&above_row[i], above_row[i - 1],
1251 num_top_pixels_needed - i);
1252 } else if (n_left_px > 0) {
1253 aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1254 }
1255 }
1256
1257 if (need_above_left) {
1258 if (n_top_px > 0 && n_left_px > 0) {
1259 above_row[-1] = above_ref[-1];
1260 } else if (n_top_px > 0) {
1261 above_row[-1] = above_ref[0];
1262 } else if (n_left_px > 0) {
1263 above_row[-1] = left_ref[0];
1264 } else {
1265 above_row[-1] = base;
1266 }
1267 left_col[-1] = above_row[-1];
1268 }
1269
1270 if (use_filter_intra) {
1271 highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1272 filter_intra_mode, bit_depth);
1273 return;
1274 }
1275
1276 if (is_dr_mode) {
1277 int upsample_above = 0;
1278 int upsample_left = 0;
1279 if (!disable_edge_filter) {
1280 const int need_right = p_angle < 90;
1281 const int need_bottom = p_angle > 180;
1282 if (p_angle != 90 && p_angle != 180) {
1283 const int ab_le = need_above_left ? 1 : 0;
1284 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1285 filter_intra_edge_corner_high(above_row, left_col);
1286 }
1287 if (need_above && n_top_px > 0) {
1288 const int strength = intra_edge_filter_strength(
1289 txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1290 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1291 av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1292 }
1293 if (need_left && n_left_px > 0) {
1294 const int strength = intra_edge_filter_strength(
1295 txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1296 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1297 av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1298 }
1299 }
1300 upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1301 intra_edge_filter_type);
1302 if (need_above && upsample_above) {
1303 const int n_px = txwpx + (need_right ? txhpx : 0);
1304 av1_upsample_intra_edge_high(above_row, n_px, bit_depth);
1305 }
1306 upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1307 intra_edge_filter_type);
1308 if (need_left && upsample_left) {
1309 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1310 av1_upsample_intra_edge_high(left_col, n_px, bit_depth);
1311 }
1312 }
1313 highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1314 upsample_above, upsample_left, p_angle, bit_depth);
1315 return;
1316 }
1317
1318 // predict
1319 if (mode == DC_PRED) {
1320 dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1321 dst, dst_stride, above_row, left_col, bit_depth);
1322 } else {
1323 pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1324 }
1325 }
1326 #endif // CONFIG_AV1_HIGHBITDEPTH
1327
build_intra_predictors(const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type)1328 static void build_intra_predictors(
1329 const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1330 PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
1331 TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1332 int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1333 int i;
1334 const uint8_t *above_ref = ref - ref_stride;
1335 const uint8_t *left_ref = ref - 1;
1336 DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1337 DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1338 uint8_t *const above_row = above_data + 16;
1339 uint8_t *const left_col = left_data + 16;
1340 const int txwpx = tx_size_wide[tx_size];
1341 const int txhpx = tx_size_high[tx_size];
1342 int need_left = extend_modes[mode] & NEED_LEFT;
1343 int need_above = extend_modes[mode] & NEED_ABOVE;
1344 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1345 int p_angle = 0;
1346 const int is_dr_mode = av1_is_directional_mode(mode);
1347 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1348 // The left_data, above_data buffers must be zeroed to fix some intermittent
1349 // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1350 // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1351 // be the potential reason for this issue.
1352 memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1353 memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1354
1355 // The default values if ref pixels are not available:
1356 // 128 127 127 .. 127 127 127 127 127 127
1357 // 129 A B .. Y Z
1358 // 129 C D .. W X
1359 // 129 E F .. U V
1360 // 129 G H .. S T T T T T
1361 // ..
1362
1363 if (is_dr_mode) {
1364 p_angle = mode_to_angle_map[mode] + angle_delta;
1365 if (p_angle <= 90)
1366 need_above = 1, need_left = 0, need_above_left = 1;
1367 else if (p_angle < 180)
1368 need_above = 1, need_left = 1, need_above_left = 1;
1369 else
1370 need_above = 0, need_left = 1, need_above_left = 1;
1371 }
1372 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1373
1374 assert(n_top_px >= 0);
1375 assert(n_topright_px >= 0);
1376 assert(n_left_px >= 0);
1377 assert(n_bottomleft_px >= 0);
1378
1379 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1380 int val;
1381 if (need_left) {
1382 val = (n_top_px > 0) ? above_ref[0] : 129;
1383 } else {
1384 val = (n_left_px > 0) ? left_ref[0] : 127;
1385 }
1386 for (i = 0; i < txhpx; ++i) {
1387 memset(dst, val, txwpx);
1388 dst += dst_stride;
1389 }
1390 return;
1391 }
1392
1393 // NEED_LEFT
1394 if (need_left) {
1395 int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1396 if (use_filter_intra) need_bottom = 0;
1397 if (is_dr_mode) need_bottom = p_angle > 180;
1398 const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1399 i = 0;
1400 if (n_left_px > 0) {
1401 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1402 if (need_bottom && n_bottomleft_px > 0) {
1403 assert(i == txhpx);
1404 for (; i < txhpx + n_bottomleft_px; i++)
1405 left_col[i] = left_ref[i * ref_stride];
1406 }
1407 if (i < num_left_pixels_needed)
1408 memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1409 } else if (n_top_px > 0) {
1410 memset(left_col, above_ref[0], num_left_pixels_needed);
1411 }
1412 }
1413
1414 // NEED_ABOVE
1415 if (need_above) {
1416 int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1417 if (use_filter_intra) need_right = 0;
1418 if (is_dr_mode) need_right = p_angle < 90;
1419 const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1420 if (n_top_px > 0) {
1421 memcpy(above_row, above_ref, n_top_px);
1422 i = n_top_px;
1423 if (need_right && n_topright_px > 0) {
1424 assert(n_top_px == txwpx);
1425 memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1426 i += n_topright_px;
1427 }
1428 if (i < num_top_pixels_needed)
1429 memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1430 } else if (n_left_px > 0) {
1431 memset(above_row, left_ref[0], num_top_pixels_needed);
1432 }
1433 }
1434
1435 if (need_above_left) {
1436 if (n_top_px > 0 && n_left_px > 0) {
1437 above_row[-1] = above_ref[-1];
1438 } else if (n_top_px > 0) {
1439 above_row[-1] = above_ref[0];
1440 } else if (n_left_px > 0) {
1441 above_row[-1] = left_ref[0];
1442 } else {
1443 above_row[-1] = 128;
1444 }
1445 left_col[-1] = above_row[-1];
1446 }
1447
1448 if (use_filter_intra) {
1449 av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1450 filter_intra_mode);
1451 return;
1452 }
1453
1454 if (is_dr_mode) {
1455 int upsample_above = 0;
1456 int upsample_left = 0;
1457 if (!disable_edge_filter) {
1458 const int need_right = p_angle < 90;
1459 const int need_bottom = p_angle > 180;
1460 if (p_angle != 90 && p_angle != 180) {
1461 const int ab_le = need_above_left ? 1 : 0;
1462 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1463 filter_intra_edge_corner(above_row, left_col);
1464 }
1465 if (need_above && n_top_px > 0) {
1466 const int strength = intra_edge_filter_strength(
1467 txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1468 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1469 av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1470 }
1471 if (need_left && n_left_px > 0) {
1472 const int strength = intra_edge_filter_strength(
1473 txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1474 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1475 av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1476 }
1477 }
1478 upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1479 intra_edge_filter_type);
1480 if (need_above && upsample_above) {
1481 const int n_px = txwpx + (need_right ? txhpx : 0);
1482 av1_upsample_intra_edge(above_row, n_px);
1483 }
1484 upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1485 intra_edge_filter_type);
1486 if (need_left && upsample_left) {
1487 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1488 av1_upsample_intra_edge(left_col, n_px);
1489 }
1490 }
1491 dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1492 upsample_left, p_angle);
1493 return;
1494 }
1495
1496 // predict
1497 if (mode == DC_PRED) {
1498 dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1499 left_col);
1500 } else {
1501 pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1502 }
1503 }
1504
scale_chroma_bsize(BLOCK_SIZE bsize,int subsampling_x,int subsampling_y)1505 static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1506 int subsampling_y) {
1507 assert(subsampling_x >= 0 && subsampling_x < 2);
1508 assert(subsampling_y >= 0 && subsampling_y < 2);
1509 BLOCK_SIZE bs = bsize;
1510 switch (bsize) {
1511 case BLOCK_4X4:
1512 if (subsampling_x == 1 && subsampling_y == 1)
1513 bs = BLOCK_8X8;
1514 else if (subsampling_x == 1)
1515 bs = BLOCK_8X4;
1516 else if (subsampling_y == 1)
1517 bs = BLOCK_4X8;
1518 break;
1519 case BLOCK_4X8:
1520 if (subsampling_x == 1 && subsampling_y == 1)
1521 bs = BLOCK_8X8;
1522 else if (subsampling_x == 1)
1523 bs = BLOCK_8X8;
1524 else if (subsampling_y == 1)
1525 bs = BLOCK_4X8;
1526 break;
1527 case BLOCK_8X4:
1528 if (subsampling_x == 1 && subsampling_y == 1)
1529 bs = BLOCK_8X8;
1530 else if (subsampling_x == 1)
1531 bs = BLOCK_8X4;
1532 else if (subsampling_y == 1)
1533 bs = BLOCK_8X8;
1534 break;
1535 case BLOCK_4X16:
1536 if (subsampling_x == 1 && subsampling_y == 1)
1537 bs = BLOCK_8X16;
1538 else if (subsampling_x == 1)
1539 bs = BLOCK_8X16;
1540 else if (subsampling_y == 1)
1541 bs = BLOCK_4X16;
1542 break;
1543 case BLOCK_16X4:
1544 if (subsampling_x == 1 && subsampling_y == 1)
1545 bs = BLOCK_16X8;
1546 else if (subsampling_x == 1)
1547 bs = BLOCK_16X4;
1548 else if (subsampling_y == 1)
1549 bs = BLOCK_16X8;
1550 break;
1551 default: break;
1552 }
1553 return bs;
1554 }
1555
av1_predict_intra_block(const MACROBLOCKD * xd,BLOCK_SIZE sb_size,int enable_intra_edge_filter,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1556 void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1557 int enable_intra_edge_filter, int wpx, int hpx,
1558 TX_SIZE tx_size, PREDICTION_MODE mode,
1559 int angle_delta, int use_palette,
1560 FILTER_INTRA_MODE filter_intra_mode,
1561 const uint8_t *ref, int ref_stride, uint8_t *dst,
1562 int dst_stride, int col_off, int row_off,
1563 int plane) {
1564 const MB_MODE_INFO *const mbmi = xd->mi[0];
1565 const int txwpx = tx_size_wide[tx_size];
1566 const int txhpx = tx_size_high[tx_size];
1567 const int x = col_off << MI_SIZE_LOG2;
1568 const int y = row_off << MI_SIZE_LOG2;
1569
1570 if (use_palette) {
1571 int r, c;
1572 const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1573 xd->color_index_map_offset[plane != 0];
1574 const uint16_t *const palette =
1575 mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1576 if (is_cur_buf_hbd(xd)) {
1577 uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1578 for (r = 0; r < txhpx; ++r) {
1579 for (c = 0; c < txwpx; ++c) {
1580 dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1581 }
1582 }
1583 } else {
1584 for (r = 0; r < txhpx; ++r) {
1585 for (c = 0; c < txwpx; ++c) {
1586 dst[r * dst_stride + c] =
1587 (uint8_t)palette[map[(r + y) * wpx + c + x]];
1588 }
1589 }
1590 }
1591 return;
1592 }
1593
1594 const struct macroblockd_plane *const pd = &xd->plane[plane];
1595 const int txw = tx_size_wide_unit[tx_size];
1596 const int txh = tx_size_high_unit[tx_size];
1597 const int ss_x = pd->subsampling_x;
1598 const int ss_y = pd->subsampling_y;
1599 const int have_top =
1600 row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1601 const int have_left =
1602 col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1603 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1604 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1605
1606 // Distance between the right edge of this prediction block to
1607 // the frame right edge
1608 const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1609 // Distance between the bottom edge of this prediction block to
1610 // the frame bottom edge
1611 const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1612 const int right_available =
1613 mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1614 const int bottom_available =
1615 (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1616
1617 const PARTITION_TYPE partition = mbmi->partition;
1618
1619 BLOCK_SIZE bsize = mbmi->bsize;
1620 // force 4x4 chroma component block size.
1621 if (ss_x || ss_y) {
1622 bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1623 }
1624
1625 const int have_top_right =
1626 has_top_right(sb_size, bsize, mi_row, mi_col, have_top, right_available,
1627 partition, tx_size, row_off, col_off, ss_x, ss_y);
1628 const int have_bottom_left = has_bottom_left(
1629 sb_size, bsize, mi_row, mi_col, bottom_available, have_left, partition,
1630 tx_size, row_off, col_off, ss_x, ss_y);
1631
1632 const int disable_edge_filter = !enable_intra_edge_filter;
1633 const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1634 #if CONFIG_AV1_HIGHBITDEPTH
1635 if (is_cur_buf_hbd(xd)) {
1636 build_intra_predictors_high(
1637 ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1638 tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1639 have_top_right ? AOMMIN(txwpx, xr) : 0,
1640 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1641 have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type,
1642 xd->bd);
1643 return;
1644 }
1645 #endif
1646 build_intra_predictors(
1647 ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
1648 tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1649 have_top_right ? AOMMIN(txwpx, xr) : 0,
1650 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1651 have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type);
1652 }
1653
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1654 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1655 int plane, int blk_col, int blk_row,
1656 TX_SIZE tx_size) {
1657 const MB_MODE_INFO *const mbmi = xd->mi[0];
1658 struct macroblockd_plane *const pd = &xd->plane[plane];
1659 const int dst_stride = pd->dst.stride;
1660 uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1661 const PREDICTION_MODE mode =
1662 (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1663 const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1664 const FILTER_INTRA_MODE filter_intra_mode =
1665 (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1666 ? mbmi->filter_intra_mode_info.filter_intra_mode
1667 : FILTER_INTRA_MODES;
1668 const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1669 const SequenceHeader *seq_params = cm->seq_params;
1670
1671 if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1672 #if CONFIG_DEBUG
1673 assert(is_cfl_allowed(xd));
1674 const BLOCK_SIZE plane_bsize =
1675 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1676 (void)plane_bsize;
1677 assert(plane_bsize < BLOCK_SIZES_ALL);
1678 if (!xd->lossless[mbmi->segment_id]) {
1679 assert(blk_col == 0);
1680 assert(blk_row == 0);
1681 assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1682 assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1683 }
1684 #endif
1685 CFL_CTX *const cfl = &xd->cfl;
1686 CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1687 if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1688 av1_predict_intra_block(xd, seq_params->sb_size,
1689 seq_params->enable_intra_edge_filter, pd->width,
1690 pd->height, tx_size, mode, angle_delta,
1691 use_palette, filter_intra_mode, dst, dst_stride,
1692 dst, dst_stride, blk_col, blk_row, plane);
1693 if (cfl->use_dc_pred_cache) {
1694 cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1695 cfl->dc_pred_is_cached[pred_plane] = 1;
1696 }
1697 } else {
1698 cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1699 }
1700 cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1701 return;
1702 }
1703 av1_predict_intra_block(
1704 xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1705 pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1706 dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1707 }
1708
av1_init_intra_predictors(void)1709 void av1_init_intra_predictors(void) {
1710 aom_once(init_intra_predictors_internal);
1711 }
1712