1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14
15 #include "config/aom_config.h"
16 #include "config/aom_dsp_rtcd.h"
17 #include "config/av1_rtcd.h"
18
19 #include "aom_dsp/aom_dsp_common.h"
20 #include "aom_mem/aom_mem.h"
21 #include "aom_ports/aom_once.h"
22 #include "aom_ports/mem.h"
23 #include "av1/common/av1_common_int.h"
24 #include "av1/common/cfl.h"
25 #include "av1/common/reconintra.h"
26
27 enum {
28 NEED_LEFT = 1 << 1,
29 NEED_ABOVE = 1 << 2,
30 NEED_ABOVERIGHT = 1 << 3,
31 NEED_ABOVELEFT = 1 << 4,
32 NEED_BOTTOMLEFT = 1 << 5,
33 };
34
35 #define INTRA_EDGE_FILT 3
36 #define INTRA_EDGE_TAPS 5
37 #define MAX_UPSAMPLE_SZ 16
38 #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
39
40 static const uint8_t extend_modes[INTRA_MODES] = {
41 NEED_ABOVE | NEED_LEFT, // DC
42 NEED_ABOVE, // V
43 NEED_LEFT, // H
44 NEED_ABOVE | NEED_ABOVERIGHT, // D45
45 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
46 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
47 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
48 NEED_LEFT | NEED_BOTTOMLEFT, // D203
49 NEED_ABOVE | NEED_ABOVERIGHT, // D67
50 NEED_LEFT | NEED_ABOVE, // SMOOTH
51 NEED_LEFT | NEED_ABOVE, // SMOOTH_V
52 NEED_LEFT | NEED_ABOVE, // SMOOTH_H
53 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH
54 };
55
56 // Tables to store if the top-right reference pixels are available. The flags
57 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
58 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
59 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
60 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
61 // . . . .
62 // . . . .
63 // . . o .
64 // . . . .
65 static uint8_t has_tr_4x4[128] = {
66 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
74 };
75 static uint8_t has_tr_4x8[64] = {
76 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
77 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
78 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
79 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
80 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
81 };
82 static uint8_t has_tr_8x4[64] = {
83 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
87 };
88 static uint8_t has_tr_8x8[32] = {
89 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
91 };
92 static uint8_t has_tr_8x16[16] = {
93 255, 255, 119, 119, 127, 127, 119, 119,
94 255, 127, 119, 119, 127, 127, 119, 119,
95 };
96 static uint8_t has_tr_16x8[16] = {
97 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
98 };
99 static uint8_t has_tr_16x16[8] = {
100 255, 85, 119, 85, 127, 85, 119, 85,
101 };
102 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
103 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
104 static uint8_t has_tr_32x32[2] = { 95, 87 };
105 static uint8_t has_tr_32x64[1] = { 127 };
106 static uint8_t has_tr_64x32[1] = { 19 };
107 static uint8_t has_tr_64x64[1] = { 7 };
108 static uint8_t has_tr_64x128[1] = { 3 };
109 static uint8_t has_tr_128x64[1] = { 1 };
110 static uint8_t has_tr_128x128[1] = { 1 };
111 static uint8_t has_tr_4x16[32] = {
112 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
113 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
114 127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
115 };
116 static uint8_t has_tr_16x4[32] = {
117 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
119 };
120 static uint8_t has_tr_8x32[8] = {
121 255, 255, 127, 127, 255, 127, 127, 127,
122 };
123 static uint8_t has_tr_32x8[8] = {
124 15, 0, 5, 0, 7, 0, 5, 0,
125 };
126 static uint8_t has_tr_16x64[2] = { 255, 127 };
127 static uint8_t has_tr_64x16[2] = { 3, 1 };
128
129 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
130 // 4X4
131 has_tr_4x4,
132 // 4X8, 8X4, 8X8
133 has_tr_4x8, has_tr_8x4, has_tr_8x8,
134 // 8X16, 16X8, 16X16
135 has_tr_8x16, has_tr_16x8, has_tr_16x16,
136 // 16X32, 32X16, 32X32
137 has_tr_16x32, has_tr_32x16, has_tr_32x32,
138 // 32X64, 64X32, 64X64
139 has_tr_32x64, has_tr_64x32, has_tr_64x64,
140 // 64x128, 128x64, 128x128
141 has_tr_64x128, has_tr_128x64, has_tr_128x128,
142 // 4x16, 16x4, 8x32
143 has_tr_4x16, has_tr_16x4, has_tr_8x32,
144 // 32x8, 16x64, 64x16
145 has_tr_32x8, has_tr_16x64, has_tr_64x16
146 };
147
148 static uint8_t has_tr_vert_8x8[32] = {
149 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
151 };
152 static uint8_t has_tr_vert_16x16[8] = {
153 255, 0, 119, 0, 127, 0, 119, 0,
154 };
155 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
156 static uint8_t has_tr_vert_64x64[1] = { 3 };
157
158 // The _vert_* tables are like the ordinary tables above, but describe the
159 // order we visit square blocks when doing a PARTITION_VERT_A or
160 // PARTITION_VERT_B. This is the same order as normal except for on the last
161 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
162 // as a pair of squares, which means that these tables work correctly for both
163 // mixed vertical partition types.
164 //
165 // There are tables for each of the square sizes. Vertical rectangles (like
166 // BLOCK_16X32) use their respective "non-vert" table
167 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
168 // 4X4
169 NULL,
170 // 4X8, 8X4, 8X8
171 has_tr_4x8, NULL, has_tr_vert_8x8,
172 // 8X16, 16X8, 16X16
173 has_tr_8x16, NULL, has_tr_vert_16x16,
174 // 16X32, 32X16, 32X32
175 has_tr_16x32, NULL, has_tr_vert_32x32,
176 // 32X64, 64X32, 64X64
177 has_tr_32x64, NULL, has_tr_vert_64x64,
178 // 64x128, 128x64, 128x128
179 has_tr_64x128, NULL, has_tr_128x128
180 };
181
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)182 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
183 BLOCK_SIZE bsize) {
184 const uint8_t *ret = NULL;
185 // If this is a mixed vertical partition, look up bsize in orders_vert.
186 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
187 assert(bsize < BLOCK_SIZES);
188 ret = has_tr_vert_tables[bsize];
189 } else {
190 ret = has_tr_tables[bsize];
191 }
192 assert(ret);
193 return ret;
194 }
195
has_top_right(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)196 static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
197 int mi_col, int top_available, int right_available,
198 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
199 int col_off, int ss_x, int ss_y) {
200 if (!top_available || !right_available) return 0;
201
202 const int bw_unit = mi_size_wide[bsize];
203 const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
204 const int top_right_count_unit = tx_size_wide_unit[txsz];
205
206 if (row_off > 0) { // Just need to check if enough pixels on the right.
207 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
208 // Special case: For 128x128 blocks, the transform unit whose
209 // top-right corner is at the center of the block does in fact have
210 // pixels available at its top-right corner.
211 if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
212 col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
213 return 1;
214 }
215 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
216 const int col_off_64 = col_off % plane_bw_unit_64;
217 return col_off_64 + top_right_count_unit < plane_bw_unit_64;
218 }
219 return col_off + top_right_count_unit < plane_bw_unit;
220 } else {
221 // All top-right pixels are in the block above, which is already available.
222 if (col_off + top_right_count_unit < plane_bw_unit) return 1;
223
224 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
225 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
226 const int sb_mi_size = mi_size_high[sb_size];
227 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
228 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
229
230 // Top row of superblock: so top-right pixels are in the top and/or
231 // top-right superblocks, both of which are already available.
232 if (blk_row_in_sb == 0) return 1;
233
234 // Rightmost column of superblock (and not the top row): so top-right pixels
235 // fall in the right superblock, which is not available yet.
236 if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
237 return 0;
238 }
239
240 // General case (neither top row nor rightmost column): check if the
241 // top-right block is coded before the current block.
242 const int this_blk_index =
243 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
244 blk_col_in_sb + 0;
245 const int idx1 = this_blk_index / 8;
246 const int idx2 = this_blk_index % 8;
247 const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
248 return (has_tr_table[idx1] >> idx2) & 1;
249 }
250 }
251
252 // Similar to the has_tr_* tables, but store if the bottom-left reference
253 // pixels are available.
254 static uint8_t has_bl_4x4[128] = {
255 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85,
256 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17,
257 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84,
258 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
259 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1,
260 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85,
261 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0,
262 };
263 static uint8_t has_bl_4x8[64] = {
264 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
265 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
266 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
267 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
268 };
269 static uint8_t has_bl_8x4[64] = {
270 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
271 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
272 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
273 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
274 };
275 static uint8_t has_bl_8x8[32] = {
276 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
278 };
279 static uint8_t has_bl_8x16[16] = {
280 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
281 };
282 static uint8_t has_bl_16x8[16] = {
283 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
284 };
285 static uint8_t has_bl_16x16[8] = {
286 84, 16, 84, 0, 84, 16, 84, 0,
287 };
288 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
289 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
290 static uint8_t has_bl_32x32[2] = { 4, 4 };
291 static uint8_t has_bl_32x64[1] = { 0 };
292 static uint8_t has_bl_64x32[1] = { 34 };
293 static uint8_t has_bl_64x64[1] = { 0 };
294 static uint8_t has_bl_64x128[1] = { 0 };
295 static uint8_t has_bl_128x64[1] = { 0 };
296 static uint8_t has_bl_128x128[1] = { 0 };
297 static uint8_t has_bl_4x16[32] = {
298 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
300 };
301 static uint8_t has_bl_16x4[32] = {
302 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
304 };
305 static uint8_t has_bl_8x32[8] = {
306 0, 1, 0, 0, 0, 1, 0, 0,
307 };
308 static uint8_t has_bl_32x8[8] = {
309 238, 78, 238, 14, 238, 78, 238, 14,
310 };
311 static uint8_t has_bl_16x64[2] = { 0, 0 };
312 static uint8_t has_bl_64x16[2] = { 42, 42 };
313
314 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
315 // 4X4
316 has_bl_4x4,
317 // 4X8, 8X4, 8X8
318 has_bl_4x8, has_bl_8x4, has_bl_8x8,
319 // 8X16, 16X8, 16X16
320 has_bl_8x16, has_bl_16x8, has_bl_16x16,
321 // 16X32, 32X16, 32X32
322 has_bl_16x32, has_bl_32x16, has_bl_32x32,
323 // 32X64, 64X32, 64X64
324 has_bl_32x64, has_bl_64x32, has_bl_64x64,
325 // 64x128, 128x64, 128x128
326 has_bl_64x128, has_bl_128x64, has_bl_128x128,
327 // 4x16, 16x4, 8x32
328 has_bl_4x16, has_bl_16x4, has_bl_8x32,
329 // 32x8, 16x64, 64x16
330 has_bl_32x8, has_bl_16x64, has_bl_64x16
331 };
332
333 static uint8_t has_bl_vert_8x8[32] = {
334 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
336 };
337 static uint8_t has_bl_vert_16x16[8] = {
338 254, 16, 254, 0, 254, 16, 254, 0,
339 };
340 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
341 static uint8_t has_bl_vert_64x64[1] = { 2 };
342
343 // The _vert_* tables are like the ordinary tables above, but describe the
344 // order we visit square blocks when doing a PARTITION_VERT_A or
345 // PARTITION_VERT_B. This is the same order as normal except for on the last
346 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
347 // as a pair of squares, which means that these tables work correctly for both
348 // mixed vertical partition types.
349 //
350 // There are tables for each of the square sizes. Vertical rectangles (like
351 // BLOCK_16X32) use their respective "non-vert" table
352 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
353 // 4X4
354 NULL,
355 // 4X8, 8X4, 8X8
356 has_bl_4x8, NULL, has_bl_vert_8x8,
357 // 8X16, 16X8, 16X16
358 has_bl_8x16, NULL, has_bl_vert_16x16,
359 // 16X32, 32X16, 32X32
360 has_bl_16x32, NULL, has_bl_vert_32x32,
361 // 32X64, 64X32, 64X64
362 has_bl_32x64, NULL, has_bl_vert_64x64,
363 // 64x128, 128x64, 128x128
364 has_bl_64x128, NULL, has_bl_128x128
365 };
366
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)367 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
368 BLOCK_SIZE bsize) {
369 const uint8_t *ret = NULL;
370 // If this is a mixed vertical partition, look up bsize in orders_vert.
371 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
372 assert(bsize < BLOCK_SIZES);
373 ret = has_bl_vert_tables[bsize];
374 } else {
375 ret = has_bl_tables[bsize];
376 }
377 assert(ret);
378 return ret;
379 }
380
has_bottom_left(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)381 static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
382 int mi_col, int bottom_available, int left_available,
383 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
384 int col_off, int ss_x, int ss_y) {
385 if (!bottom_available || !left_available) return 0;
386
387 // Special case for 128x* blocks, when col_off is half the block width.
388 // This is needed because 128x* superblocks are divided into 64x* blocks in
389 // raster order
390 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
391 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
392 const int col_off_64 = col_off % plane_bw_unit_64;
393 if (col_off_64 == 0) {
394 // We are at the left edge of top-right or bottom-right 64x* block.
395 const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
396 const int row_off_64 = row_off % plane_bh_unit_64;
397 const int plane_bh_unit =
398 AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
399 // Check if all bottom-left pixels are in the left 64x* block (which is
400 // already coded).
401 return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
402 }
403 }
404
405 if (col_off > 0) {
406 // Bottom-left pixels are in the bottom-left block, which is not available.
407 return 0;
408 } else {
409 const int bh_unit = mi_size_high[bsize];
410 const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
411 const int bottom_left_count_unit = tx_size_high_unit[txsz];
412
413 // All bottom-left pixels are in the left block, which is already available.
414 if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
415
416 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
417 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
418 const int sb_mi_size = mi_size_high[sb_size];
419 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
420 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
421
422 // Leftmost column of superblock: so bottom-left pixels maybe in the left
423 // and/or bottom-left superblocks. But only the left superblock is
424 // available, so check if all required pixels fall in that superblock.
425 if (blk_col_in_sb == 0) {
426 const int blk_start_row_off =
427 blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
428 ss_y;
429 const int row_off_in_sb = blk_start_row_off + row_off;
430 const int sb_height_unit = sb_mi_size >> ss_y;
431 return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432 }
433
434 // Bottom row of superblock (and not the leftmost column): so bottom-left
435 // pixels fall in the bottom superblock, which is not available yet.
436 if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437
438 // General case (neither leftmost column nor bottom row): check if the
439 // bottom-left block is coded before the current block.
440 const int this_blk_index =
441 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442 blk_col_in_sb + 0;
443 const int idx1 = this_blk_index / 8;
444 const int idx2 = this_blk_index % 8;
445 const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446 return (has_bl_table[idx1] >> idx2) & 1;
447 }
448 }
449
450 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451 const uint8_t *above, const uint8_t *left);
452
453 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455
456 #if CONFIG_AV1_HIGHBITDEPTH
457 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
458 const uint16_t *above, const uint16_t *left,
459 int bd);
460 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
461 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
462 #endif
463
init_intra_predictors_internal(void)464 static void init_intra_predictors_internal(void) {
465 assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
466
467 #define INIT_RECTANGULAR(p, type) \
468 p[TX_4X8] = aom_##type##_predictor_4x8; \
469 p[TX_8X4] = aom_##type##_predictor_8x4; \
470 p[TX_8X16] = aom_##type##_predictor_8x16; \
471 p[TX_16X8] = aom_##type##_predictor_16x8; \
472 p[TX_16X32] = aom_##type##_predictor_16x32; \
473 p[TX_32X16] = aom_##type##_predictor_32x16; \
474 p[TX_32X64] = aom_##type##_predictor_32x64; \
475 p[TX_64X32] = aom_##type##_predictor_64x32; \
476 p[TX_4X16] = aom_##type##_predictor_4x16; \
477 p[TX_16X4] = aom_##type##_predictor_16x4; \
478 p[TX_8X32] = aom_##type##_predictor_8x32; \
479 p[TX_32X8] = aom_##type##_predictor_32x8; \
480 p[TX_16X64] = aom_##type##_predictor_16x64; \
481 p[TX_64X16] = aom_##type##_predictor_64x16;
482
483 #define INIT_NO_4X4(p, type) \
484 p[TX_8X8] = aom_##type##_predictor_8x8; \
485 p[TX_16X16] = aom_##type##_predictor_16x16; \
486 p[TX_32X32] = aom_##type##_predictor_32x32; \
487 p[TX_64X64] = aom_##type##_predictor_64x64; \
488 INIT_RECTANGULAR(p, type)
489
490 #define INIT_ALL_SIZES(p, type) \
491 p[TX_4X4] = aom_##type##_predictor_4x4; \
492 INIT_NO_4X4(p, type)
493
494 INIT_ALL_SIZES(pred[V_PRED], v)
495 INIT_ALL_SIZES(pred[H_PRED], h)
496 INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
497 INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
498 INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
499 INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
500 INIT_ALL_SIZES(dc_pred[0][0], dc_128)
501 INIT_ALL_SIZES(dc_pred[0][1], dc_top)
502 INIT_ALL_SIZES(dc_pred[1][0], dc_left)
503 INIT_ALL_SIZES(dc_pred[1][1], dc)
504 #if CONFIG_AV1_HIGHBITDEPTH
505 INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
506 INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
507 INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
508 INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
509 INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
510 INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
511 INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
512 INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
513 INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
514 INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
515 #endif
516 #undef intra_pred_allsizes
517 }
518
519 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)520 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
521 const uint8_t *above, const uint8_t *left,
522 int upsample_above, int dx, int dy) {
523 int r, c, x, base, shift, val;
524
525 (void)left;
526 (void)dy;
527 assert(dy == 1);
528 assert(dx > 0);
529
530 const int max_base_x = ((bw + bh) - 1) << upsample_above;
531 const int frac_bits = 6 - upsample_above;
532 const int base_inc = 1 << upsample_above;
533 x = dx;
534 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
535 base = x >> frac_bits;
536 shift = ((x << upsample_above) & 0x3F) >> 1;
537
538 if (base >= max_base_x) {
539 for (int i = r; i < bh; ++i) {
540 memset(dst, above[max_base_x], bw * sizeof(dst[0]));
541 dst += stride;
542 }
543 return;
544 }
545
546 for (c = 0; c < bw; ++c, base += base_inc) {
547 if (base < max_base_x) {
548 val = above[base] * (32 - shift) + above[base + 1] * shift;
549 dst[c] = ROUND_POWER_OF_TWO(val, 5);
550 } else {
551 dst[c] = above[max_base_x];
552 }
553 }
554 }
555 }
556
557 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)558 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
559 const uint8_t *above, const uint8_t *left,
560 int upsample_above, int upsample_left, int dx,
561 int dy) {
562 assert(dx > 0);
563 assert(dy > 0);
564
565 const int min_base_x = -(1 << upsample_above);
566 const int min_base_y = -(1 << upsample_left);
567 (void)min_base_y;
568 const int frac_bits_x = 6 - upsample_above;
569 const int frac_bits_y = 6 - upsample_left;
570
571 for (int r = 0; r < bh; ++r) {
572 for (int c = 0; c < bw; ++c) {
573 int val;
574 int y = r + 1;
575 int x = (c << 6) - y * dx;
576 const int base_x = x >> frac_bits_x;
577 if (base_x >= min_base_x) {
578 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
579 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
580 val = ROUND_POWER_OF_TWO(val, 5);
581 } else {
582 x = c + 1;
583 y = (r << 6) - x * dy;
584 const int base_y = y >> frac_bits_y;
585 assert(base_y >= min_base_y);
586 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
587 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
588 val = ROUND_POWER_OF_TWO(val, 5);
589 }
590 dst[c] = val;
591 }
592 dst += stride;
593 }
594 }
595
596 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)597 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
598 const uint8_t *above, const uint8_t *left,
599 int upsample_left, int dx, int dy) {
600 int r, c, y, base, shift, val;
601
602 (void)above;
603 (void)dx;
604
605 assert(dx == 1);
606 assert(dy > 0);
607
608 const int max_base_y = (bw + bh - 1) << upsample_left;
609 const int frac_bits = 6 - upsample_left;
610 const int base_inc = 1 << upsample_left;
611 y = dy;
612 for (c = 0; c < bw; ++c, y += dy) {
613 base = y >> frac_bits;
614 shift = ((y << upsample_left) & 0x3F) >> 1;
615
616 for (r = 0; r < bh; ++r, base += base_inc) {
617 if (base < max_base_y) {
618 val = left[base] * (32 - shift) + left[base + 1] * shift;
619 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
620 } else {
621 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
622 break;
623 }
624 }
625 }
626 }
627
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)628 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
629 const uint8_t *above, const uint8_t *left,
630 int upsample_above, int upsample_left, int angle) {
631 const int dx = av1_get_dx(angle);
632 const int dy = av1_get_dy(angle);
633 const int bw = tx_size_wide[tx_size];
634 const int bh = tx_size_high[tx_size];
635 assert(angle > 0 && angle < 270);
636
637 if (angle > 0 && angle < 90) {
638 av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
639 dy);
640 } else if (angle > 90 && angle < 180) {
641 av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
642 upsample_left, dx, dy);
643 } else if (angle > 180 && angle < 270) {
644 av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
645 dy);
646 } else if (angle == 90) {
647 pred[V_PRED][tx_size](dst, stride, above, left);
648 } else if (angle == 180) {
649 pred[H_PRED][tx_size](dst, stride, above, left);
650 }
651 }
652
653 #if CONFIG_AV1_HIGHBITDEPTH
654 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)655 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
656 int bh, const uint16_t *above,
657 const uint16_t *left, int upsample_above,
658 int dx, int dy, int bd) {
659 int r, c, x, base, shift, val;
660
661 (void)left;
662 (void)dy;
663 (void)bd;
664 assert(dy == 1);
665 assert(dx > 0);
666
667 const int max_base_x = ((bw + bh) - 1) << upsample_above;
668 const int frac_bits = 6 - upsample_above;
669 const int base_inc = 1 << upsample_above;
670 x = dx;
671 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
672 base = x >> frac_bits;
673 shift = ((x << upsample_above) & 0x3F) >> 1;
674
675 if (base >= max_base_x) {
676 for (int i = r; i < bh; ++i) {
677 aom_memset16(dst, above[max_base_x], bw);
678 dst += stride;
679 }
680 return;
681 }
682
683 for (c = 0; c < bw; ++c, base += base_inc) {
684 if (base < max_base_x) {
685 val = above[base] * (32 - shift) + above[base + 1] * shift;
686 dst[c] = ROUND_POWER_OF_TWO(val, 5);
687 } else {
688 dst[c] = above[max_base_x];
689 }
690 }
691 }
692 }
693
694 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)695 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
696 int bh, const uint16_t *above,
697 const uint16_t *left, int upsample_above,
698 int upsample_left, int dx, int dy, int bd) {
699 (void)bd;
700 assert(dx > 0);
701 assert(dy > 0);
702
703 const int min_base_x = -(1 << upsample_above);
704 const int min_base_y = -(1 << upsample_left);
705 (void)min_base_y;
706 const int frac_bits_x = 6 - upsample_above;
707 const int frac_bits_y = 6 - upsample_left;
708
709 for (int r = 0; r < bh; ++r) {
710 for (int c = 0; c < bw; ++c) {
711 int val;
712 int y = r + 1;
713 int x = (c << 6) - y * dx;
714 const int base_x = x >> frac_bits_x;
715 if (base_x >= min_base_x) {
716 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
717 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
718 val = ROUND_POWER_OF_TWO(val, 5);
719 } else {
720 x = c + 1;
721 y = (r << 6) - x * dy;
722 const int base_y = y >> frac_bits_y;
723 assert(base_y >= min_base_y);
724 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
725 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
726 val = ROUND_POWER_OF_TWO(val, 5);
727 }
728 dst[c] = val;
729 }
730 dst += stride;
731 }
732 }
733
734 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)735 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
736 int bh, const uint16_t *above,
737 const uint16_t *left, int upsample_left,
738 int dx, int dy, int bd) {
739 int r, c, y, base, shift, val;
740
741 (void)above;
742 (void)dx;
743 (void)bd;
744 assert(dx == 1);
745 assert(dy > 0);
746
747 const int max_base_y = (bw + bh - 1) << upsample_left;
748 const int frac_bits = 6 - upsample_left;
749 const int base_inc = 1 << upsample_left;
750 y = dy;
751 for (c = 0; c < bw; ++c, y += dy) {
752 base = y >> frac_bits;
753 shift = ((y << upsample_left) & 0x3F) >> 1;
754
755 for (r = 0; r < bh; ++r, base += base_inc) {
756 if (base < max_base_y) {
757 val = left[base] * (32 - shift) + left[base + 1] * shift;
758 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
759 } else {
760 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
761 break;
762 }
763 }
764 }
765 }
766
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)767 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
768 TX_SIZE tx_size, const uint16_t *above,
769 const uint16_t *left, int upsample_above,
770 int upsample_left, int angle, int bd) {
771 const int dx = av1_get_dx(angle);
772 const int dy = av1_get_dy(angle);
773 const int bw = tx_size_wide[tx_size];
774 const int bh = tx_size_high[tx_size];
775 assert(angle > 0 && angle < 270);
776
777 if (angle > 0 && angle < 90) {
778 av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
779 upsample_above, dx, dy, bd);
780 } else if (angle > 90 && angle < 180) {
781 av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
782 upsample_above, upsample_left, dx, dy, bd);
783 } else if (angle > 180 && angle < 270) {
784 av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
785 dx, dy, bd);
786 } else if (angle == 90) {
787 pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
788 } else if (angle == 180) {
789 pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
790 }
791 }
792 #endif // CONFIG_AV1_HIGHBITDEPTH
793
794 DECLARE_ALIGNED(16, const int8_t,
795 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
796 {
797 { -6, 10, 0, 0, 0, 12, 0, 0 },
798 { -5, 2, 10, 0, 0, 9, 0, 0 },
799 { -3, 1, 1, 10, 0, 7, 0, 0 },
800 { -3, 1, 1, 2, 10, 5, 0, 0 },
801 { -4, 6, 0, 0, 0, 2, 12, 0 },
802 { -3, 2, 6, 0, 0, 2, 9, 0 },
803 { -3, 2, 2, 6, 0, 2, 7, 0 },
804 { -3, 1, 2, 2, 6, 3, 5, 0 },
805 },
806 {
807 { -10, 16, 0, 0, 0, 10, 0, 0 },
808 { -6, 0, 16, 0, 0, 6, 0, 0 },
809 { -4, 0, 0, 16, 0, 4, 0, 0 },
810 { -2, 0, 0, 0, 16, 2, 0, 0 },
811 { -10, 16, 0, 0, 0, 0, 10, 0 },
812 { -6, 0, 16, 0, 0, 0, 6, 0 },
813 { -4, 0, 0, 16, 0, 0, 4, 0 },
814 { -2, 0, 0, 0, 16, 0, 2, 0 },
815 },
816 {
817 { -8, 8, 0, 0, 0, 16, 0, 0 },
818 { -8, 0, 8, 0, 0, 16, 0, 0 },
819 { -8, 0, 0, 8, 0, 16, 0, 0 },
820 { -8, 0, 0, 0, 8, 16, 0, 0 },
821 { -4, 4, 0, 0, 0, 0, 16, 0 },
822 { -4, 0, 4, 0, 0, 0, 16, 0 },
823 { -4, 0, 0, 4, 0, 0, 16, 0 },
824 { -4, 0, 0, 0, 4, 0, 16, 0 },
825 },
826 {
827 { -2, 8, 0, 0, 0, 10, 0, 0 },
828 { -1, 3, 8, 0, 0, 6, 0, 0 },
829 { -1, 2, 3, 8, 0, 4, 0, 0 },
830 { 0, 1, 2, 3, 8, 2, 0, 0 },
831 { -1, 4, 0, 0, 0, 3, 10, 0 },
832 { -1, 3, 4, 0, 0, 4, 6, 0 },
833 { -1, 2, 3, 4, 0, 4, 4, 0 },
834 { -1, 2, 2, 3, 4, 3, 3, 0 },
835 },
836 {
837 { -12, 14, 0, 0, 0, 14, 0, 0 },
838 { -10, 0, 14, 0, 0, 12, 0, 0 },
839 { -9, 0, 0, 14, 0, 11, 0, 0 },
840 { -8, 0, 0, 0, 14, 10, 0, 0 },
841 { -10, 12, 0, 0, 0, 0, 14, 0 },
842 { -9, 1, 12, 0, 0, 0, 12, 0 },
843 { -8, 0, 0, 12, 0, 1, 11, 0 },
844 { -7, 0, 0, 1, 12, 1, 9, 0 },
845 },
846 };
847
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)848 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
849 TX_SIZE tx_size, const uint8_t *above,
850 const uint8_t *left, int mode) {
851 int r, c;
852 uint8_t buffer[33][33];
853 const int bw = tx_size_wide[tx_size];
854 const int bh = tx_size_high[tx_size];
855
856 assert(bw <= 32 && bh <= 32);
857
858 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
859 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
860
861 for (r = 1; r < bh + 1; r += 2)
862 for (c = 1; c < bw + 1; c += 4) {
863 const uint8_t p0 = buffer[r - 1][c - 1];
864 const uint8_t p1 = buffer[r - 1][c];
865 const uint8_t p2 = buffer[r - 1][c + 1];
866 const uint8_t p3 = buffer[r - 1][c + 2];
867 const uint8_t p4 = buffer[r - 1][c + 3];
868 const uint8_t p5 = buffer[r][c - 1];
869 const uint8_t p6 = buffer[r + 1][c - 1];
870 for (int k = 0; k < 8; ++k) {
871 int r_offset = k >> 2;
872 int c_offset = k & 0x03;
873 int pr = av1_filter_intra_taps[mode][k][0] * p0 +
874 av1_filter_intra_taps[mode][k][1] * p1 +
875 av1_filter_intra_taps[mode][k][2] * p2 +
876 av1_filter_intra_taps[mode][k][3] * p3 +
877 av1_filter_intra_taps[mode][k][4] * p4 +
878 av1_filter_intra_taps[mode][k][5] * p5 +
879 av1_filter_intra_taps[mode][k][6] * p6;
880 // Section 7.11.2.3 specifies the right-hand side of the assignment as
881 // Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
882 // Since Clip1() clips a negative value to 0, it is safe to replace
883 // Round2Signed() with Round2().
884 buffer[r + r_offset][c + c_offset] =
885 clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
886 }
887 }
888
889 for (r = 0; r < bh; ++r) {
890 memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
891 dst += stride;
892 }
893 }
894
895 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)896 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
897 TX_SIZE tx_size,
898 const uint16_t *above,
899 const uint16_t *left, int mode,
900 int bd) {
901 int r, c;
902 uint16_t buffer[33][33];
903 const int bw = tx_size_wide[tx_size];
904 const int bh = tx_size_high[tx_size];
905
906 assert(bw <= 32 && bh <= 32);
907
908 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
909 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
910
911 for (r = 1; r < bh + 1; r += 2)
912 for (c = 1; c < bw + 1; c += 4) {
913 const uint16_t p0 = buffer[r - 1][c - 1];
914 const uint16_t p1 = buffer[r - 1][c];
915 const uint16_t p2 = buffer[r - 1][c + 1];
916 const uint16_t p3 = buffer[r - 1][c + 2];
917 const uint16_t p4 = buffer[r - 1][c + 3];
918 const uint16_t p5 = buffer[r][c - 1];
919 const uint16_t p6 = buffer[r + 1][c - 1];
920 for (int k = 0; k < 8; ++k) {
921 int r_offset = k >> 2;
922 int c_offset = k & 0x03;
923 int pr = av1_filter_intra_taps[mode][k][0] * p0 +
924 av1_filter_intra_taps[mode][k][1] * p1 +
925 av1_filter_intra_taps[mode][k][2] * p2 +
926 av1_filter_intra_taps[mode][k][3] * p3 +
927 av1_filter_intra_taps[mode][k][4] * p4 +
928 av1_filter_intra_taps[mode][k][5] * p5 +
929 av1_filter_intra_taps[mode][k][6] * p6;
930 // Section 7.11.2.3 specifies the right-hand side of the assignment as
931 // Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
932 // Since Clip1() clips a negative value to 0, it is safe to replace
933 // Round2Signed() with Round2().
934 buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
935 ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
936 }
937 }
938
939 for (r = 0; r < bh; ++r) {
940 memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
941 dst += stride;
942 }
943 }
944 #endif // CONFIG_AV1_HIGHBITDEPTH
945
is_smooth(const MB_MODE_INFO * mbmi,int plane)946 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
947 if (plane == 0) {
948 const PREDICTION_MODE mode = mbmi->mode;
949 return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
950 mode == SMOOTH_H_PRED);
951 } else {
952 // uv_mode is not set for inter blocks, so need to explicitly
953 // detect that case.
954 if (is_inter_block(mbmi)) return 0;
955
956 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
957 return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
958 uv_mode == UV_SMOOTH_H_PRED);
959 }
960 }
961
get_intra_edge_filter_type(const MACROBLOCKD * xd,int plane)962 static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
963 const MB_MODE_INFO *above;
964 const MB_MODE_INFO *left;
965
966 if (plane == 0) {
967 above = xd->above_mbmi;
968 left = xd->left_mbmi;
969 } else {
970 above = xd->chroma_above_mbmi;
971 left = xd->chroma_left_mbmi;
972 }
973
974 return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane));
975 }
976
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)977 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
978 const int d = abs(delta);
979 int strength = 0;
980
981 const int blk_wh = bs0 + bs1;
982 if (type == 0) {
983 if (blk_wh <= 8) {
984 if (d >= 56) strength = 1;
985 } else if (blk_wh <= 12) {
986 if (d >= 40) strength = 1;
987 } else if (blk_wh <= 16) {
988 if (d >= 40) strength = 1;
989 } else if (blk_wh <= 24) {
990 if (d >= 8) strength = 1;
991 if (d >= 16) strength = 2;
992 if (d >= 32) strength = 3;
993 } else if (blk_wh <= 32) {
994 if (d >= 1) strength = 1;
995 if (d >= 4) strength = 2;
996 if (d >= 32) strength = 3;
997 } else {
998 if (d >= 1) strength = 3;
999 }
1000 } else {
1001 if (blk_wh <= 8) {
1002 if (d >= 40) strength = 1;
1003 if (d >= 64) strength = 2;
1004 } else if (blk_wh <= 16) {
1005 if (d >= 20) strength = 1;
1006 if (d >= 48) strength = 2;
1007 } else if (blk_wh <= 24) {
1008 if (d >= 4) strength = 3;
1009 } else {
1010 if (d >= 1) strength = 3;
1011 }
1012 }
1013 return strength;
1014 }
1015
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1016 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1017 if (!strength) return;
1018
1019 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1020 { 0, 5, 6, 5, 0 },
1021 { 2, 4, 4, 4, 2 } };
1022 const int filt = strength - 1;
1023 uint8_t edge[129];
1024
1025 memcpy(edge, p, sz * sizeof(*p));
1026 for (int i = 1; i < sz; i++) {
1027 int s = 0;
1028 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1029 int k = i - 2 + j;
1030 k = (k < 0) ? 0 : k;
1031 k = (k > sz - 1) ? sz - 1 : k;
1032 s += edge[k] * kernel[filt][j];
1033 }
1034 s = (s + 8) >> 4;
1035 p[i] = s;
1036 }
1037 }
1038
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1039 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1040 const int kernel[3] = { 5, 6, 5 };
1041
1042 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1043 (p_above[0] * kernel[2]);
1044 s = (s + 8) >> 4;
1045 p_above[-1] = s;
1046 p_left[-1] = s;
1047 }
1048
av1_upsample_intra_edge_c(uint8_t * p,int sz)1049 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1050 // interpolate half-sample positions
1051 assert(sz <= MAX_UPSAMPLE_SZ);
1052
1053 uint8_t in[MAX_UPSAMPLE_SZ + 3];
1054 // copy p[-1..(sz-1)] and extend first and last samples
1055 in[0] = p[-1];
1056 in[1] = p[-1];
1057 for (int i = 0; i < sz; i++) {
1058 in[i + 2] = p[i];
1059 }
1060 in[sz + 2] = p[sz - 1];
1061
1062 // interpolate half-sample edge positions
1063 p[-2] = in[0];
1064 for (int i = 0; i < sz; i++) {
1065 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1066 s = clip_pixel((s + 8) >> 4);
1067 p[2 * i - 1] = s;
1068 p[2 * i] = in[i + 2];
1069 }
1070 }
1071
build_directional_and_filter_intra_predictors(const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int p_angle,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type)1072 static void build_directional_and_filter_intra_predictors(
1073 const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1074 PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1075 TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1076 int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1077 int i;
1078 const uint8_t *above_ref = ref - ref_stride;
1079 const uint8_t *left_ref = ref - 1;
1080 DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1081 DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1082 uint8_t *const above_row = above_data + 16;
1083 uint8_t *const left_col = left_data + 16;
1084 const int txwpx = tx_size_wide[tx_size];
1085 const int txhpx = tx_size_high[tx_size];
1086 int need_left = extend_modes[mode] & NEED_LEFT;
1087 int need_above = extend_modes[mode] & NEED_ABOVE;
1088 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1089 const int is_dr_mode = av1_is_directional_mode(mode);
1090 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1091 assert(use_filter_intra || is_dr_mode);
1092 // The left_data, above_data buffers must be zeroed to fix some intermittent
1093 // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1094 // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1095 // be the potential reason for this issue.
1096 memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1097 memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1098
1099 // The default values if ref pixels are not available:
1100 // 128 127 127 .. 127 127 127 127 127 127
1101 // 129 A B .. Y Z
1102 // 129 C D .. W X
1103 // 129 E F .. U V
1104 // 129 G H .. S T T T T T
1105 // ..
1106
1107 if (is_dr_mode) {
1108 if (p_angle <= 90)
1109 need_above = 1, need_left = 0, need_above_left = 1;
1110 else if (p_angle < 180)
1111 need_above = 1, need_left = 1, need_above_left = 1;
1112 else
1113 need_above = 0, need_left = 1, need_above_left = 1;
1114 }
1115 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1116
1117 assert(n_top_px >= 0);
1118 assert(n_topright_px >= -1);
1119 assert(n_left_px >= 0);
1120 assert(n_bottomleft_px >= -1);
1121
1122 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1123 int val;
1124 if (need_left) {
1125 val = (n_top_px > 0) ? above_ref[0] : 129;
1126 } else {
1127 val = (n_left_px > 0) ? left_ref[0] : 127;
1128 }
1129 for (i = 0; i < txhpx; ++i) {
1130 memset(dst, val, txwpx);
1131 dst += dst_stride;
1132 }
1133 return;
1134 }
1135
1136 // NEED_LEFT
1137 if (need_left) {
1138 const int num_left_pixels_needed =
1139 txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1140 i = 0;
1141 if (n_left_px > 0) {
1142 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1143 if (n_bottomleft_px > 0) {
1144 assert(i == txhpx);
1145 for (; i < txhpx + n_bottomleft_px; i++)
1146 left_col[i] = left_ref[i * ref_stride];
1147 }
1148 if (i < num_left_pixels_needed)
1149 memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1150 } else if (n_top_px > 0) {
1151 memset(left_col, above_ref[0], num_left_pixels_needed);
1152 }
1153 }
1154
1155 // NEED_ABOVE
1156 if (need_above) {
1157 const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1158 if (n_top_px > 0) {
1159 memcpy(above_row, above_ref, n_top_px);
1160 i = n_top_px;
1161 if (n_topright_px > 0) {
1162 assert(n_top_px == txwpx);
1163 memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1164 i += n_topright_px;
1165 }
1166 if (i < num_top_pixels_needed)
1167 memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1168 } else if (n_left_px > 0) {
1169 memset(above_row, left_ref[0], num_top_pixels_needed);
1170 }
1171 }
1172
1173 if (need_above_left) {
1174 if (n_top_px > 0 && n_left_px > 0) {
1175 above_row[-1] = above_ref[-1];
1176 } else if (n_top_px > 0) {
1177 above_row[-1] = above_ref[0];
1178 } else if (n_left_px > 0) {
1179 above_row[-1] = left_ref[0];
1180 } else {
1181 above_row[-1] = 128;
1182 }
1183 left_col[-1] = above_row[-1];
1184 }
1185
1186 if (use_filter_intra) {
1187 av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1188 filter_intra_mode);
1189 return;
1190 }
1191
1192 assert(is_dr_mode);
1193 int upsample_above = 0;
1194 int upsample_left = 0;
1195 if (!disable_edge_filter) {
1196 const int need_right = p_angle < 90;
1197 const int need_bottom = p_angle > 180;
1198 if (p_angle != 90 && p_angle != 180) {
1199 assert(need_above_left);
1200 const int ab_le = 1;
1201 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1202 filter_intra_edge_corner(above_row, left_col);
1203 }
1204 if (need_above && n_top_px > 0) {
1205 const int strength = intra_edge_filter_strength(
1206 txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1207 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1208 av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1209 }
1210 if (need_left && n_left_px > 0) {
1211 const int strength = intra_edge_filter_strength(
1212 txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1213 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1214 av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1215 }
1216 }
1217 upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1218 intra_edge_filter_type);
1219 if (need_above && upsample_above) {
1220 const int n_px = txwpx + (need_right ? txhpx : 0);
1221 av1_upsample_intra_edge(above_row, n_px);
1222 }
1223 upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1224 intra_edge_filter_type);
1225 if (need_left && upsample_left) {
1226 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1227 av1_upsample_intra_edge(left_col, n_px);
1228 }
1229 }
1230 dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1231 upsample_left, p_angle);
1232 }
1233
1234 // This function generates the pred data of a given block for non-directional
1235 // intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH).
build_non_directional_intra_predictors(const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,TX_SIZE tx_size,int n_top_px,int n_left_px)1236 static void build_non_directional_intra_predictors(
1237 const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1238 PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) {
1239 const uint8_t *above_ref = ref - ref_stride;
1240 const uint8_t *left_ref = ref - 1;
1241 const int txwpx = tx_size_wide[tx_size];
1242 const int txhpx = tx_size_high[tx_size];
1243 const int need_left = extend_modes[mode] & NEED_LEFT;
1244 const int need_above = extend_modes[mode] & NEED_ABOVE;
1245 const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1246 int i = 0;
1247 assert(n_top_px >= 0);
1248 assert(n_left_px >= 0);
1249 assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1250 mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1251
1252 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1253 int val = 0;
1254 if (need_left) {
1255 val = (n_top_px > 0) ? above_ref[0] : 129;
1256 } else {
1257 val = (n_left_px > 0) ? left_ref[0] : 127;
1258 }
1259 for (i = 0; i < txhpx; ++i) {
1260 memset(dst, val, txwpx);
1261 dst += dst_stride;
1262 }
1263 return;
1264 }
1265
1266 DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1267 DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1268 uint8_t *const above_row = above_data + 16;
1269 uint8_t *const left_col = left_data + 16;
1270
1271 if (need_left) {
1272 memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1273 if (n_left_px > 0) {
1274 for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1275 if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i);
1276 } else if (n_top_px > 0) {
1277 memset(left_col, above_ref[0], txhpx);
1278 }
1279 }
1280
1281 if (need_above) {
1282 memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1283 if (n_top_px > 0) {
1284 memcpy(above_row, above_ref, n_top_px);
1285 i = n_top_px;
1286 if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i);
1287 } else if (n_left_px > 0) {
1288 memset(above_row, left_ref[0], txwpx);
1289 }
1290 }
1291
1292 if (need_above_left) {
1293 if (n_top_px > 0 && n_left_px > 0) {
1294 above_row[-1] = above_ref[-1];
1295 } else if (n_top_px > 0) {
1296 above_row[-1] = above_ref[0];
1297 } else if (n_left_px > 0) {
1298 above_row[-1] = left_ref[0];
1299 } else {
1300 above_row[-1] = 128;
1301 }
1302 left_col[-1] = above_row[-1];
1303 }
1304
1305 if (mode == DC_PRED) {
1306 dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1307 left_col);
1308 } else {
1309 pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1310 }
1311 }
1312
1313 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_filter_intra_edge_c(uint16_t * p,int sz,int strength)1314 void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) {
1315 if (!strength) return;
1316
1317 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1318 { 0, 5, 6, 5, 0 },
1319 { 2, 4, 4, 4, 2 } };
1320 const int filt = strength - 1;
1321 uint16_t edge[129];
1322
1323 memcpy(edge, p, sz * sizeof(*p));
1324 for (int i = 1; i < sz; i++) {
1325 int s = 0;
1326 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1327 int k = i - 2 + j;
1328 k = (k < 0) ? 0 : k;
1329 k = (k > sz - 1) ? sz - 1 : k;
1330 s += edge[k] * kernel[filt][j];
1331 }
1332 s = (s + 8) >> 4;
1333 p[i] = s;
1334 }
1335 }
1336
highbd_filter_intra_edge_corner(uint16_t * p_above,uint16_t * p_left)1337 static void highbd_filter_intra_edge_corner(uint16_t *p_above,
1338 uint16_t *p_left) {
1339 const int kernel[3] = { 5, 6, 5 };
1340
1341 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1342 (p_above[0] * kernel[2]);
1343 s = (s + 8) >> 4;
1344 p_above[-1] = s;
1345 p_left[-1] = s;
1346 }
1347
av1_highbd_upsample_intra_edge_c(uint16_t * p,int sz,int bd)1348 void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) {
1349 // interpolate half-sample positions
1350 assert(sz <= MAX_UPSAMPLE_SZ);
1351
1352 uint16_t in[MAX_UPSAMPLE_SZ + 3];
1353 // copy p[-1..(sz-1)] and extend first and last samples
1354 in[0] = p[-1];
1355 in[1] = p[-1];
1356 for (int i = 0; i < sz; i++) {
1357 in[i + 2] = p[i];
1358 }
1359 in[sz + 2] = p[sz - 1];
1360
1361 // interpolate half-sample edge positions
1362 p[-2] = in[0];
1363 for (int i = 0; i < sz; i++) {
1364 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1365 s = (s + 8) >> 4;
1366 s = clip_pixel_highbd(s, bd);
1367 p[2 * i - 1] = s;
1368 p[2 * i] = in[i + 2];
1369 }
1370 }
1371
highbd_build_directional_and_filter_intra_predictors(const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int p_angle,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type,int bit_depth)1372 static void highbd_build_directional_and_filter_intra_predictors(
1373 const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1374 PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1375 TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1376 int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1377 int bit_depth) {
1378 int i;
1379 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1380 const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1381 DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1382 DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1383 uint16_t *const above_row = above_data + 16;
1384 uint16_t *const left_col = left_data + 16;
1385 const int txwpx = tx_size_wide[tx_size];
1386 const int txhpx = tx_size_high[tx_size];
1387 int need_left = extend_modes[mode] & NEED_LEFT;
1388 int need_above = extend_modes[mode] & NEED_ABOVE;
1389 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1390 const uint16_t *above_ref = ref - ref_stride;
1391 const uint16_t *left_ref = ref - 1;
1392 const int is_dr_mode = av1_is_directional_mode(mode);
1393 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1394 assert(use_filter_intra || is_dr_mode);
1395 const int base = 128 << (bit_depth - 8);
1396 // The left_data, above_data buffers must be zeroed to fix some intermittent
1397 // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1398 // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1399 // seen to be the potential reason for this issue.
1400 aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1401 aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1402
1403 // The default values if ref pixels are not available:
1404 // base base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1405 // base+1 A B .. Y Z
1406 // base+1 C D .. W X
1407 // base+1 E F .. U V
1408 // base+1 G H .. S T T T T T
1409
1410 if (is_dr_mode) {
1411 if (p_angle <= 90)
1412 need_above = 1, need_left = 0, need_above_left = 1;
1413 else if (p_angle < 180)
1414 need_above = 1, need_left = 1, need_above_left = 1;
1415 else
1416 need_above = 0, need_left = 1, need_above_left = 1;
1417 }
1418 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1419
1420 assert(n_top_px >= 0);
1421 assert(n_topright_px >= -1);
1422 assert(n_left_px >= 0);
1423 assert(n_bottomleft_px >= -1);
1424
1425 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1426 int val;
1427 if (need_left) {
1428 val = (n_top_px > 0) ? above_ref[0] : base + 1;
1429 } else {
1430 val = (n_left_px > 0) ? left_ref[0] : base - 1;
1431 }
1432 for (i = 0; i < txhpx; ++i) {
1433 aom_memset16(dst, val, txwpx);
1434 dst += dst_stride;
1435 }
1436 return;
1437 }
1438
1439 // NEED_LEFT
1440 if (need_left) {
1441 const int num_left_pixels_needed =
1442 txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1443 i = 0;
1444 if (n_left_px > 0) {
1445 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1446 if (n_bottomleft_px > 0) {
1447 assert(i == txhpx);
1448 for (; i < txhpx + n_bottomleft_px; i++)
1449 left_col[i] = left_ref[i * ref_stride];
1450 }
1451 if (i < num_left_pixels_needed)
1452 aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1453 } else if (n_top_px > 0) {
1454 aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1455 }
1456 }
1457
1458 // NEED_ABOVE
1459 if (need_above) {
1460 const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1461 if (n_top_px > 0) {
1462 memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1463 i = n_top_px;
1464 if (n_topright_px > 0) {
1465 assert(n_top_px == txwpx);
1466 memcpy(above_row + txwpx, above_ref + txwpx,
1467 n_topright_px * sizeof(above_ref[0]));
1468 i += n_topright_px;
1469 }
1470 if (i < num_top_pixels_needed)
1471 aom_memset16(&above_row[i], above_row[i - 1],
1472 num_top_pixels_needed - i);
1473 } else if (n_left_px > 0) {
1474 aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1475 }
1476 }
1477
1478 if (need_above_left) {
1479 if (n_top_px > 0 && n_left_px > 0) {
1480 above_row[-1] = above_ref[-1];
1481 } else if (n_top_px > 0) {
1482 above_row[-1] = above_ref[0];
1483 } else if (n_left_px > 0) {
1484 above_row[-1] = left_ref[0];
1485 } else {
1486 above_row[-1] = base;
1487 }
1488 left_col[-1] = above_row[-1];
1489 }
1490
1491 if (use_filter_intra) {
1492 highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1493 filter_intra_mode, bit_depth);
1494 return;
1495 }
1496
1497 assert(is_dr_mode);
1498 int upsample_above = 0;
1499 int upsample_left = 0;
1500 if (!disable_edge_filter) {
1501 const int need_right = p_angle < 90;
1502 const int need_bottom = p_angle > 180;
1503 if (p_angle != 90 && p_angle != 180) {
1504 assert(need_above_left);
1505 const int ab_le = 1;
1506 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1507 highbd_filter_intra_edge_corner(above_row, left_col);
1508 }
1509 if (need_above && n_top_px > 0) {
1510 const int strength = intra_edge_filter_strength(
1511 txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1512 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1513 av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength);
1514 }
1515 if (need_left && n_left_px > 0) {
1516 const int strength = intra_edge_filter_strength(
1517 txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1518 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1519 av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength);
1520 }
1521 }
1522 upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1523 intra_edge_filter_type);
1524 if (need_above && upsample_above) {
1525 const int n_px = txwpx + (need_right ? txhpx : 0);
1526 av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth);
1527 }
1528 upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1529 intra_edge_filter_type);
1530 if (need_left && upsample_left) {
1531 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1532 av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth);
1533 }
1534 }
1535 highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1536 upsample_above, upsample_left, p_angle, bit_depth);
1537 }
1538
1539 // For HBD encode/decode, this function generates the pred data of a given
1540 // block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H,
1541 // SMOOTH_V and PAETH).
highbd_build_non_directional_intra_predictors(const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,TX_SIZE tx_size,int n_top_px,int n_left_px,int bit_depth)1542 static void highbd_build_non_directional_intra_predictors(
1543 const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1544 PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px,
1545 int bit_depth) {
1546 int i = 0;
1547 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1548 const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1549 const int txwpx = tx_size_wide[tx_size];
1550 const int txhpx = tx_size_high[tx_size];
1551 int need_left = extend_modes[mode] & NEED_LEFT;
1552 int need_above = extend_modes[mode] & NEED_ABOVE;
1553 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1554 const uint16_t *above_ref = ref - ref_stride;
1555 const uint16_t *left_ref = ref - 1;
1556 const int base = 128 << (bit_depth - 8);
1557
1558 assert(n_top_px >= 0);
1559 assert(n_left_px >= 0);
1560 assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1561 mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1562
1563 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1564 int val = 0;
1565 if (need_left) {
1566 val = (n_top_px > 0) ? above_ref[0] : base + 1;
1567 } else {
1568 val = (n_left_px > 0) ? left_ref[0] : base - 1;
1569 }
1570 for (i = 0; i < txhpx; ++i) {
1571 aom_memset16(dst, val, txwpx);
1572 dst += dst_stride;
1573 }
1574 return;
1575 }
1576
1577 DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1578 DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1579 uint16_t *const above_row = above_data + 16;
1580 uint16_t *const left_col = left_data + 16;
1581
1582 if (need_left) {
1583 aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1584 if (n_left_px > 0) {
1585 for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1586 if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i);
1587 } else if (n_top_px > 0) {
1588 aom_memset16(left_col, above_ref[0], txhpx);
1589 }
1590 }
1591
1592 if (need_above) {
1593 aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1594 if (n_top_px > 0) {
1595 memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1596 i = n_top_px;
1597 if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i));
1598 } else if (n_left_px > 0) {
1599 aom_memset16(above_row, left_ref[0], txwpx);
1600 }
1601 }
1602
1603 if (need_above_left) {
1604 if (n_top_px > 0 && n_left_px > 0) {
1605 above_row[-1] = above_ref[-1];
1606 } else if (n_top_px > 0) {
1607 above_row[-1] = above_ref[0];
1608 } else if (n_left_px > 0) {
1609 above_row[-1] = left_ref[0];
1610 } else {
1611 above_row[-1] = base;
1612 }
1613 left_col[-1] = above_row[-1];
1614 }
1615
1616 if (mode == DC_PRED) {
1617 dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1618 dst, dst_stride, above_row, left_col, bit_depth);
1619 } else {
1620 pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1621 }
1622 }
1623 #endif // CONFIG_AV1_HIGHBITDEPTH
1624
scale_chroma_bsize(BLOCK_SIZE bsize,int subsampling_x,int subsampling_y)1625 static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1626 int subsampling_y) {
1627 assert(subsampling_x >= 0 && subsampling_x < 2);
1628 assert(subsampling_y >= 0 && subsampling_y < 2);
1629 BLOCK_SIZE bs = bsize;
1630 switch (bsize) {
1631 case BLOCK_4X4:
1632 if (subsampling_x == 1 && subsampling_y == 1)
1633 bs = BLOCK_8X8;
1634 else if (subsampling_x == 1)
1635 bs = BLOCK_8X4;
1636 else if (subsampling_y == 1)
1637 bs = BLOCK_4X8;
1638 break;
1639 case BLOCK_4X8:
1640 if (subsampling_x == 1 && subsampling_y == 1)
1641 bs = BLOCK_8X8;
1642 else if (subsampling_x == 1)
1643 bs = BLOCK_8X8;
1644 else if (subsampling_y == 1)
1645 bs = BLOCK_4X8;
1646 break;
1647 case BLOCK_8X4:
1648 if (subsampling_x == 1 && subsampling_y == 1)
1649 bs = BLOCK_8X8;
1650 else if (subsampling_x == 1)
1651 bs = BLOCK_8X4;
1652 else if (subsampling_y == 1)
1653 bs = BLOCK_8X8;
1654 break;
1655 case BLOCK_4X16:
1656 if (subsampling_x == 1 && subsampling_y == 1)
1657 bs = BLOCK_8X16;
1658 else if (subsampling_x == 1)
1659 bs = BLOCK_8X16;
1660 else if (subsampling_y == 1)
1661 bs = BLOCK_4X16;
1662 break;
1663 case BLOCK_16X4:
1664 if (subsampling_x == 1 && subsampling_y == 1)
1665 bs = BLOCK_16X8;
1666 else if (subsampling_x == 1)
1667 bs = BLOCK_16X4;
1668 else if (subsampling_y == 1)
1669 bs = BLOCK_16X8;
1670 break;
1671 default: break;
1672 }
1673 return bs;
1674 }
1675
av1_predict_intra_block(const MACROBLOCKD * xd,BLOCK_SIZE sb_size,int enable_intra_edge_filter,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1676 void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1677 int enable_intra_edge_filter, int wpx, int hpx,
1678 TX_SIZE tx_size, PREDICTION_MODE mode,
1679 int angle_delta, int use_palette,
1680 FILTER_INTRA_MODE filter_intra_mode,
1681 const uint8_t *ref, int ref_stride, uint8_t *dst,
1682 int dst_stride, int col_off, int row_off,
1683 int plane) {
1684 const MB_MODE_INFO *const mbmi = xd->mi[0];
1685 const int txwpx = tx_size_wide[tx_size];
1686 const int txhpx = tx_size_high[tx_size];
1687 const int x = col_off << MI_SIZE_LOG2;
1688 const int y = row_off << MI_SIZE_LOG2;
1689 const int is_hbd = is_cur_buf_hbd(xd);
1690
1691 assert(mode < INTRA_MODES);
1692
1693 if (use_palette) {
1694 int r, c;
1695 const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1696 xd->color_index_map_offset[plane != 0];
1697 const uint16_t *const palette =
1698 mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1699 if (is_hbd) {
1700 uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1701 for (r = 0; r < txhpx; ++r) {
1702 for (c = 0; c < txwpx; ++c) {
1703 dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1704 }
1705 }
1706 } else {
1707 for (r = 0; r < txhpx; ++r) {
1708 for (c = 0; c < txwpx; ++c) {
1709 dst[r * dst_stride + c] =
1710 (uint8_t)palette[map[(r + y) * wpx + c + x]];
1711 }
1712 }
1713 }
1714 return;
1715 }
1716
1717 const struct macroblockd_plane *const pd = &xd->plane[plane];
1718 const int ss_x = pd->subsampling_x;
1719 const int ss_y = pd->subsampling_y;
1720 const int have_top =
1721 row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1722 const int have_left =
1723 col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1724
1725 // Distance between the right edge of this prediction block to
1726 // the frame right edge
1727 const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1728 // Distance between the bottom edge of this prediction block to
1729 // the frame bottom edge
1730 const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1731 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1732 const int is_dr_mode = av1_is_directional_mode(mode);
1733
1734 // The computations in this function, as well as in build_intra_predictors(),
1735 // are generalized for all intra modes. Some of these operations are not
1736 // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H,
1737 // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a
1738 // separate function build_non_directional_intra_predictors() is introduced
1739 // for these modes to avoid redundant computations while generating pred data.
1740
1741 const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0;
1742 const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0;
1743 if (!use_filter_intra && !is_dr_mode) {
1744 #if CONFIG_AV1_HIGHBITDEPTH
1745 if (is_hbd) {
1746 highbd_build_non_directional_intra_predictors(
1747 ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px,
1748 xd->bd);
1749 return;
1750 }
1751 #endif // CONFIG_AV1_HIGHBITDEPTH
1752 build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride,
1753 mode, tx_size, n_top_px, n_left_px);
1754 return;
1755 }
1756
1757 const int txw = tx_size_wide_unit[tx_size];
1758 const int txh = tx_size_high_unit[tx_size];
1759 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1760 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1761 const int right_available =
1762 mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1763 const int bottom_available =
1764 (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1765
1766 const PARTITION_TYPE partition = mbmi->partition;
1767
1768 BLOCK_SIZE bsize = mbmi->bsize;
1769 // force 4x4 chroma component block size.
1770 if (ss_x || ss_y) {
1771 bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1772 }
1773
1774 int p_angle = 0;
1775 int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1776 int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1777
1778 if (use_filter_intra) {
1779 need_top_right = 0;
1780 need_bottom_left = 0;
1781 }
1782 if (is_dr_mode) {
1783 p_angle = mode_to_angle_map[mode] + angle_delta;
1784 need_top_right = p_angle < 90;
1785 need_bottom_left = p_angle > 180;
1786 }
1787
1788 // Possible states for have_top_right(TR) and have_bottom_left(BL)
1789 // -1 : TR and BL are not needed
1790 // 0 : TR and BL are needed but not available
1791 // > 0 : TR and BL are needed and pixels are available
1792 const int have_top_right =
1793 need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1794 right_available, partition, tx_size,
1795 row_off, col_off, ss_x, ss_y)
1796 : -1;
1797 const int have_bottom_left =
1798 need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1799 bottom_available, have_left, partition,
1800 tx_size, row_off, col_off, ss_x, ss_y)
1801 : -1;
1802
1803 const int disable_edge_filter = !enable_intra_edge_filter;
1804 const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1805 const int n_topright_px =
1806 have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right;
1807 const int n_bottomleft_px =
1808 have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left;
1809 #if CONFIG_AV1_HIGHBITDEPTH
1810 if (is_hbd) {
1811 highbd_build_directional_and_filter_intra_predictors(
1812 ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1813 tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1814 n_bottomleft_px, intra_edge_filter_type, xd->bd);
1815 return;
1816 }
1817 #endif
1818 build_directional_and_filter_intra_predictors(
1819 ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1820 tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1821 n_bottomleft_px, intra_edge_filter_type);
1822 }
1823
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1824 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1825 int plane, int blk_col, int blk_row,
1826 TX_SIZE tx_size) {
1827 const MB_MODE_INFO *const mbmi = xd->mi[0];
1828 struct macroblockd_plane *const pd = &xd->plane[plane];
1829 const int dst_stride = pd->dst.stride;
1830 uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1831 const PREDICTION_MODE mode =
1832 (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1833 const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1834 const FILTER_INTRA_MODE filter_intra_mode =
1835 (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1836 ? mbmi->filter_intra_mode_info.filter_intra_mode
1837 : FILTER_INTRA_MODES;
1838 const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1839 const SequenceHeader *seq_params = cm->seq_params;
1840
1841 if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1842 #if CONFIG_DEBUG
1843 assert(is_cfl_allowed(xd));
1844 const BLOCK_SIZE plane_bsize =
1845 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1846 (void)plane_bsize;
1847 assert(plane_bsize < BLOCK_SIZES_ALL);
1848 if (!xd->lossless[mbmi->segment_id]) {
1849 assert(blk_col == 0);
1850 assert(blk_row == 0);
1851 assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1852 assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1853 }
1854 #endif
1855 CFL_CTX *const cfl = &xd->cfl;
1856 CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1857 if (!cfl->dc_pred_is_cached[pred_plane]) {
1858 av1_predict_intra_block(xd, seq_params->sb_size,
1859 seq_params->enable_intra_edge_filter, pd->width,
1860 pd->height, tx_size, mode, angle_delta,
1861 use_palette, filter_intra_mode, dst, dst_stride,
1862 dst, dst_stride, blk_col, blk_row, plane);
1863 if (cfl->use_dc_pred_cache) {
1864 cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1865 cfl->dc_pred_is_cached[pred_plane] = true;
1866 }
1867 } else {
1868 cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1869 }
1870 av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1871 return;
1872 }
1873 av1_predict_intra_block(
1874 xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1875 pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1876 dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1877 }
1878
av1_init_intra_predictors(void)1879 void av1_init_intra_predictors(void) {
1880 aom_once(init_intra_predictors_internal);
1881 }
1882