1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
23 #include "av1/common/av1_common_int.h"
24 #include "av1/common/cfl.h"
25 #include "av1/common/reconintra.h"
26
27 enum {
28 NEED_LEFT = 1 << 1,
29 NEED_ABOVE = 1 << 2,
30 NEED_ABOVERIGHT = 1 << 3,
31 NEED_ABOVELEFT = 1 << 4,
32 NEED_BOTTOMLEFT = 1 << 5,
33 };
34
35 #define INTRA_EDGE_FILT 3
36 #define INTRA_EDGE_TAPS 5
37 #define MAX_UPSAMPLE_SZ 16
38
39 static const uint8_t extend_modes[INTRA_MODES] = {
40 NEED_ABOVE | NEED_LEFT, // DC
41 NEED_ABOVE, // V
42 NEED_LEFT, // H
43 NEED_ABOVE | NEED_ABOVERIGHT, // D45
44 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
45 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
46 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
47 NEED_LEFT | NEED_BOTTOMLEFT, // D203
48 NEED_ABOVE | NEED_ABOVERIGHT, // D67
49 NEED_LEFT | NEED_ABOVE, // SMOOTH
50 NEED_LEFT | NEED_ABOVE, // SMOOTH_V
51 NEED_LEFT | NEED_ABOVE, // SMOOTH_H
52 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH
53 };
54
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 // . . . .
61 // . . . .
62 // . . o .
63 // . . . .
64 static uint8_t has_tr_4x4[128] = {
65 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92 255, 255, 119, 119, 127, 127, 119, 119,
93 255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99 255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113 127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120 255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123 15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129 // 4X4
130 has_tr_4x4,
131 // 4X8, 8X4, 8X8
132 has_tr_4x8, has_tr_8x4, has_tr_8x8,
133 // 8X16, 16X8, 16X16
134 has_tr_8x16, has_tr_16x8, has_tr_16x16,
135 // 16X32, 32X16, 32X32
136 has_tr_16x32, has_tr_32x16, has_tr_32x32,
137 // 32X64, 64X32, 64X64
138 has_tr_32x64, has_tr_64x32, has_tr_64x64,
139 // 64x128, 128x64, 128x128
140 has_tr_64x128, has_tr_128x64, has_tr_128x128,
141 // 4x16, 16x4, 8x32
142 has_tr_4x16, has_tr_16x4, has_tr_8x32,
143 // 32x8, 16x64, 64x16
144 has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146
147 static uint8_t has_tr_vert_8x8[32] = {
148 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152 255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167 // 4X4
168 NULL,
169 // 4X8, 8X4, 8X8
170 has_tr_4x8, NULL, has_tr_vert_8x8,
171 // 8X16, 16X8, 16X16
172 has_tr_8x16, NULL, has_tr_vert_16x16,
173 // 16X32, 32X16, 32X32
174 has_tr_16x32, NULL, has_tr_vert_32x32,
175 // 32X64, 64X32, 64X64
176 has_tr_32x64, NULL, has_tr_vert_64x64,
177 // 64x128, 128x64, 128x128
178 has_tr_64x128, NULL, has_tr_128x128
179 };
180
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182 BLOCK_SIZE bsize) {
183 const uint8_t *ret = NULL;
184 // If this is a mixed vertical partition, look up bsize in orders_vert.
185 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186 assert(bsize < BLOCK_SIZES);
187 ret = has_tr_vert_tables[bsize];
188 } else {
189 ret = has_tr_tables[bsize];
190 }
191 assert(ret);
192 return ret;
193 }
194
has_top_right(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
196 int mi_col, int top_available, int right_available,
197 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198 int col_off, int ss_x, int ss_y) {
199 if (!top_available || !right_available) return 0;
200
201 const int bw_unit = mi_size_wide[bsize];
202 const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203 const int top_right_count_unit = tx_size_wide_unit[txsz];
204
205 if (row_off > 0) { // Just need to check if enough pixels on the right.
206 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207 // Special case: For 128x128 blocks, the transform unit whose
208 // top-right corner is at the center of the block does in fact have
209 // pixels available at its top-right corner.
210 if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211 col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212 return 1;
213 }
214 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215 const int col_off_64 = col_off % plane_bw_unit_64;
216 return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217 }
218 return col_off + top_right_count_unit < plane_bw_unit;
219 } else {
220 // All top-right pixels are in the block above, which is already available.
221 if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222
223 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225 const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
226 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228
229 // Top row of superblock: so top-right pixels are in the top and/or
230 // top-right superblocks, both of which are already available.
231 if (blk_row_in_sb == 0) return 1;
232
233 // Rightmost column of superblock (and not the top row): so top-right pixels
234 // fall in the right superblock, which is not available yet.
235 if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236 return 0;
237 }
238
239 // General case (neither top row nor rightmost column): check if the
240 // top-right block is coded before the current block.
241 const int this_blk_index =
242 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243 blk_col_in_sb + 0;
244 const int idx1 = this_blk_index / 8;
245 const int idx2 = this_blk_index % 8;
246 const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247 return (has_tr_table[idx1] >> idx2) & 1;
248 }
249 }
250
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85,
255 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17,
256 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84,
257 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1,
259 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85,
260 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285 84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305 0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308 238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314 // 4X4
315 has_bl_4x4,
316 // 4X8, 8X4, 8X8
317 has_bl_4x8, has_bl_8x4, has_bl_8x8,
318 // 8X16, 16X8, 16X16
319 has_bl_8x16, has_bl_16x8, has_bl_16x16,
320 // 16X32, 32X16, 32X32
321 has_bl_16x32, has_bl_32x16, has_bl_32x32,
322 // 32X64, 64X32, 64X64
323 has_bl_32x64, has_bl_64x32, has_bl_64x64,
324 // 64x128, 128x64, 128x128
325 has_bl_64x128, has_bl_128x64, has_bl_128x128,
326 // 4x16, 16x4, 8x32
327 has_bl_4x16, has_bl_16x4, has_bl_8x32,
328 // 32x8, 16x64, 64x16
329 has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331
332 static uint8_t has_bl_vert_8x8[32] = {
333 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337 254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352 // 4X4
353 NULL,
354 // 4X8, 8X4, 8X8
355 has_bl_4x8, NULL, has_bl_vert_8x8,
356 // 8X16, 16X8, 16X16
357 has_bl_8x16, NULL, has_bl_vert_16x16,
358 // 16X32, 32X16, 32X32
359 has_bl_16x32, NULL, has_bl_vert_32x32,
360 // 32X64, 64X32, 64X64
361 has_bl_32x64, NULL, has_bl_vert_64x64,
362 // 64x128, 128x64, 128x128
363 has_bl_64x128, NULL, has_bl_128x128
364 };
365
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367 BLOCK_SIZE bsize) {
368 const uint8_t *ret = NULL;
369 // If this is a mixed vertical partition, look up bsize in orders_vert.
370 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371 assert(bsize < BLOCK_SIZES);
372 ret = has_bl_vert_tables[bsize];
373 } else {
374 ret = has_bl_tables[bsize];
375 }
376 assert(ret);
377 return ret;
378 }
379
has_bottom_left(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
381 int mi_col, int bottom_available, int left_available,
382 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383 int col_off, int ss_x, int ss_y) {
384 if (!bottom_available || !left_available) return 0;
385
386 // Special case for 128x* blocks, when col_off is half the block width.
387 // This is needed because 128x* superblocks are divided into 64x* blocks in
388 // raster order
389 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391 const int col_off_64 = col_off % plane_bw_unit_64;
392 if (col_off_64 == 0) {
393 // We are at the left edge of top-right or bottom-right 64x* block.
394 const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395 const int row_off_64 = row_off % plane_bh_unit_64;
396 const int plane_bh_unit =
397 AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398 // Check if all bottom-left pixels are in the left 64x* block (which is
399 // already coded).
400 return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401 }
402 }
403
404 if (col_off > 0) {
405 // Bottom-left pixels are in the bottom-left block, which is not available.
406 return 0;
407 } else {
408 const int bh_unit = mi_size_high[bsize];
409 const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410 const int bottom_left_count_unit = tx_size_high_unit[txsz];
411
412 // All bottom-left pixels are in the left block, which is already available.
413 if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414
415 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417 const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
418 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420
421 // Leftmost column of superblock: so bottom-left pixels maybe in the left
422 // and/or bottom-left superblocks. But only the left superblock is
423 // available, so check if all required pixels fall in that superblock.
424 if (blk_col_in_sb == 0) {
425 const int blk_start_row_off =
426 blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
427 ss_y;
428 const int row_off_in_sb = blk_start_row_off + row_off;
429 const int sb_height_unit = sb_mi_size >> ss_y;
430 return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
431 }
432
433 // Bottom row of superblock (and not the leftmost column): so bottom-left
434 // pixels fall in the bottom superblock, which is not available yet.
435 if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
436
437 // General case (neither leftmost column nor bottom row): check if the
438 // bottom-left block is coded before the current block.
439 const int this_blk_index =
440 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
441 blk_col_in_sb + 0;
442 const int idx1 = this_blk_index / 8;
443 const int idx2 = this_blk_index % 8;
444 const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
445 return (has_bl_table[idx1] >> idx2) & 1;
446 }
447 }
448
449 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
450 const uint8_t *above, const uint8_t *left);
451
452 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
453 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
454
455 #if CONFIG_AV1_HIGHBITDEPTH
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457 const uint16_t *above, const uint16_t *left,
458 int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461 #endif
462
init_intra_predictors_internal(void)463 static void init_intra_predictors_internal(void) {
464 assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
465
466 #define INIT_RECTANGULAR(p, type) \
467 p[TX_4X8] = aom_##type##_predictor_4x8; \
468 p[TX_8X4] = aom_##type##_predictor_8x4; \
469 p[TX_8X16] = aom_##type##_predictor_8x16; \
470 p[TX_16X8] = aom_##type##_predictor_16x8; \
471 p[TX_16X32] = aom_##type##_predictor_16x32; \
472 p[TX_32X16] = aom_##type##_predictor_32x16; \
473 p[TX_32X64] = aom_##type##_predictor_32x64; \
474 p[TX_64X32] = aom_##type##_predictor_64x32; \
475 p[TX_4X16] = aom_##type##_predictor_4x16; \
476 p[TX_16X4] = aom_##type##_predictor_16x4; \
477 p[TX_8X32] = aom_##type##_predictor_8x32; \
478 p[TX_32X8] = aom_##type##_predictor_32x8; \
479 p[TX_16X64] = aom_##type##_predictor_16x64; \
480 p[TX_64X16] = aom_##type##_predictor_64x16;
481
482 #define INIT_NO_4X4(p, type) \
483 p[TX_8X8] = aom_##type##_predictor_8x8; \
484 p[TX_16X16] = aom_##type##_predictor_16x16; \
485 p[TX_32X32] = aom_##type##_predictor_32x32; \
486 p[TX_64X64] = aom_##type##_predictor_64x64; \
487 INIT_RECTANGULAR(p, type)
488
489 #define INIT_ALL_SIZES(p, type) \
490 p[TX_4X4] = aom_##type##_predictor_4x4; \
491 INIT_NO_4X4(p, type)
492
493 INIT_ALL_SIZES(pred[V_PRED], v);
494 INIT_ALL_SIZES(pred[H_PRED], h);
495 INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
496 INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
497 INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
498 INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
499 INIT_ALL_SIZES(dc_pred[0][0], dc_128);
500 INIT_ALL_SIZES(dc_pred[0][1], dc_top);
501 INIT_ALL_SIZES(dc_pred[1][0], dc_left);
502 INIT_ALL_SIZES(dc_pred[1][1], dc);
503 #if CONFIG_AV1_HIGHBITDEPTH
504 INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
505 INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
506 INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
507 INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
508 INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
509 INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
510 INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
511 INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
512 INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
513 INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
514 #endif
515 #undef intra_pred_allsizes
516 }
517
518 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)519 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
520 const uint8_t *above, const uint8_t *left,
521 int upsample_above, int dx, int dy) {
522 int r, c, x, base, shift, val;
523
524 (void)left;
525 (void)dy;
526 assert(dy == 1);
527 assert(dx > 0);
528
529 const int max_base_x = ((bw + bh) - 1) << upsample_above;
530 const int frac_bits = 6 - upsample_above;
531 const int base_inc = 1 << upsample_above;
532 x = dx;
533 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
534 base = x >> frac_bits;
535 shift = ((x << upsample_above) & 0x3F) >> 1;
536
537 if (base >= max_base_x) {
538 for (int i = r; i < bh; ++i) {
539 memset(dst, above[max_base_x], bw * sizeof(dst[0]));
540 dst += stride;
541 }
542 return;
543 }
544
545 for (c = 0; c < bw; ++c, base += base_inc) {
546 if (base < max_base_x) {
547 val = above[base] * (32 - shift) + above[base + 1] * shift;
548 dst[c] = ROUND_POWER_OF_TWO(val, 5);
549 } else {
550 dst[c] = above[max_base_x];
551 }
552 }
553 }
554 }
555
556 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)557 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
558 const uint8_t *above, const uint8_t *left,
559 int upsample_above, int upsample_left, int dx,
560 int dy) {
561 assert(dx > 0);
562 assert(dy > 0);
563
564 const int min_base_x = -(1 << upsample_above);
565 const int min_base_y = -(1 << upsample_left);
566 (void)min_base_y;
567 const int frac_bits_x = 6 - upsample_above;
568 const int frac_bits_y = 6 - upsample_left;
569
570 for (int r = 0; r < bh; ++r) {
571 for (int c = 0; c < bw; ++c) {
572 int val;
573 int y = r + 1;
574 int x = (c << 6) - y * dx;
575 const int base_x = x >> frac_bits_x;
576 if (base_x >= min_base_x) {
577 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
578 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
579 val = ROUND_POWER_OF_TWO(val, 5);
580 } else {
581 x = c + 1;
582 y = (r << 6) - x * dy;
583 const int base_y = y >> frac_bits_y;
584 assert(base_y >= min_base_y);
585 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
586 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
587 val = ROUND_POWER_OF_TWO(val, 5);
588 }
589 dst[c] = val;
590 }
591 dst += stride;
592 }
593 }
594
595 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)596 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
597 const uint8_t *above, const uint8_t *left,
598 int upsample_left, int dx, int dy) {
599 int r, c, y, base, shift, val;
600
601 (void)above;
602 (void)dx;
603
604 assert(dx == 1);
605 assert(dy > 0);
606
607 const int max_base_y = (bw + bh - 1) << upsample_left;
608 const int frac_bits = 6 - upsample_left;
609 const int base_inc = 1 << upsample_left;
610 y = dy;
611 for (c = 0; c < bw; ++c, y += dy) {
612 base = y >> frac_bits;
613 shift = ((y << upsample_left) & 0x3F) >> 1;
614
615 for (r = 0; r < bh; ++r, base += base_inc) {
616 if (base < max_base_y) {
617 val = left[base] * (32 - shift) + left[base + 1] * shift;
618 dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
619 } else {
620 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
621 break;
622 }
623 }
624 }
625 }
626
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)627 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
628 const uint8_t *above, const uint8_t *left,
629 int upsample_above, int upsample_left, int angle) {
630 const int dx = av1_get_dx(angle);
631 const int dy = av1_get_dy(angle);
632 const int bw = tx_size_wide[tx_size];
633 const int bh = tx_size_high[tx_size];
634 assert(angle > 0 && angle < 270);
635
636 if (angle > 0 && angle < 90) {
637 av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
638 dy);
639 } else if (angle > 90 && angle < 180) {
640 av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
641 upsample_left, dx, dy);
642 } else if (angle > 180 && angle < 270) {
643 av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
644 dy);
645 } else if (angle == 90) {
646 pred[V_PRED][tx_size](dst, stride, above, left);
647 } else if (angle == 180) {
648 pred[H_PRED][tx_size](dst, stride, above, left);
649 }
650 }
651
652 #if CONFIG_AV1_HIGHBITDEPTH
653 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)654 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
655 int bh, const uint16_t *above,
656 const uint16_t *left, int upsample_above,
657 int dx, int dy, int bd) {
658 int r, c, x, base, shift, val;
659
660 (void)left;
661 (void)dy;
662 (void)bd;
663 assert(dy == 1);
664 assert(dx > 0);
665
666 const int max_base_x = ((bw + bh) - 1) << upsample_above;
667 const int frac_bits = 6 - upsample_above;
668 const int base_inc = 1 << upsample_above;
669 x = dx;
670 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
671 base = x >> frac_bits;
672 shift = ((x << upsample_above) & 0x3F) >> 1;
673
674 if (base >= max_base_x) {
675 for (int i = r; i < bh; ++i) {
676 aom_memset16(dst, above[max_base_x], bw);
677 dst += stride;
678 }
679 return;
680 }
681
682 for (c = 0; c < bw; ++c, base += base_inc) {
683 if (base < max_base_x) {
684 val = above[base] * (32 - shift) + above[base + 1] * shift;
685 dst[c] = ROUND_POWER_OF_TWO(val, 5);
686 } else {
687 dst[c] = above[max_base_x];
688 }
689 }
690 }
691 }
692
693 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)694 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
695 int bh, const uint16_t *above,
696 const uint16_t *left, int upsample_above,
697 int upsample_left, int dx, int dy, int bd) {
698 (void)bd;
699 assert(dx > 0);
700 assert(dy > 0);
701
702 const int min_base_x = -(1 << upsample_above);
703 const int min_base_y = -(1 << upsample_left);
704 (void)min_base_y;
705 const int frac_bits_x = 6 - upsample_above;
706 const int frac_bits_y = 6 - upsample_left;
707
708 for (int r = 0; r < bh; ++r) {
709 for (int c = 0; c < bw; ++c) {
710 int val;
711 int y = r + 1;
712 int x = (c << 6) - y * dx;
713 const int base_x = x >> frac_bits_x;
714 if (base_x >= min_base_x) {
715 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
716 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
717 val = ROUND_POWER_OF_TWO(val, 5);
718 } else {
719 x = c + 1;
720 y = (r << 6) - x * dy;
721 const int base_y = y >> frac_bits_y;
722 assert(base_y >= min_base_y);
723 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
724 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
725 val = ROUND_POWER_OF_TWO(val, 5);
726 }
727 dst[c] = val;
728 }
729 dst += stride;
730 }
731 }
732
733 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)734 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
735 int bh, const uint16_t *above,
736 const uint16_t *left, int upsample_left,
737 int dx, int dy, int bd) {
738 int r, c, y, base, shift, val;
739
740 (void)above;
741 (void)dx;
742 (void)bd;
743 assert(dx == 1);
744 assert(dy > 0);
745
746 const int max_base_y = (bw + bh - 1) << upsample_left;
747 const int frac_bits = 6 - upsample_left;
748 const int base_inc = 1 << upsample_left;
749 y = dy;
750 for (c = 0; c < bw; ++c, y += dy) {
751 base = y >> frac_bits;
752 shift = ((y << upsample_left) & 0x3F) >> 1;
753
754 for (r = 0; r < bh; ++r, base += base_inc) {
755 if (base < max_base_y) {
756 val = left[base] * (32 - shift) + left[base + 1] * shift;
757 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
758 } else {
759 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
760 break;
761 }
762 }
763 }
764 }
765
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)766 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
767 TX_SIZE tx_size, const uint16_t *above,
768 const uint16_t *left, int upsample_above,
769 int upsample_left, int angle, int bd) {
770 const int dx = av1_get_dx(angle);
771 const int dy = av1_get_dy(angle);
772 const int bw = tx_size_wide[tx_size];
773 const int bh = tx_size_high[tx_size];
774 assert(angle > 0 && angle < 270);
775
776 if (angle > 0 && angle < 90) {
777 av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
778 upsample_above, dx, dy, bd);
779 } else if (angle > 90 && angle < 180) {
780 av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
781 upsample_above, upsample_left, dx, dy, bd);
782 } else if (angle > 180 && angle < 270) {
783 av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
784 dx, dy, bd);
785 } else if (angle == 90) {
786 pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
787 } else if (angle == 180) {
788 pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
789 }
790 }
791 #endif // CONFIG_AV1_HIGHBITDEPTH
792
793 DECLARE_ALIGNED(16, const int8_t,
794 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
795 {
796 { -6, 10, 0, 0, 0, 12, 0, 0 },
797 { -5, 2, 10, 0, 0, 9, 0, 0 },
798 { -3, 1, 1, 10, 0, 7, 0, 0 },
799 { -3, 1, 1, 2, 10, 5, 0, 0 },
800 { -4, 6, 0, 0, 0, 2, 12, 0 },
801 { -3, 2, 6, 0, 0, 2, 9, 0 },
802 { -3, 2, 2, 6, 0, 2, 7, 0 },
803 { -3, 1, 2, 2, 6, 3, 5, 0 },
804 },
805 {
806 { -10, 16, 0, 0, 0, 10, 0, 0 },
807 { -6, 0, 16, 0, 0, 6, 0, 0 },
808 { -4, 0, 0, 16, 0, 4, 0, 0 },
809 { -2, 0, 0, 0, 16, 2, 0, 0 },
810 { -10, 16, 0, 0, 0, 0, 10, 0 },
811 { -6, 0, 16, 0, 0, 0, 6, 0 },
812 { -4, 0, 0, 16, 0, 0, 4, 0 },
813 { -2, 0, 0, 0, 16, 0, 2, 0 },
814 },
815 {
816 { -8, 8, 0, 0, 0, 16, 0, 0 },
817 { -8, 0, 8, 0, 0, 16, 0, 0 },
818 { -8, 0, 0, 8, 0, 16, 0, 0 },
819 { -8, 0, 0, 0, 8, 16, 0, 0 },
820 { -4, 4, 0, 0, 0, 0, 16, 0 },
821 { -4, 0, 4, 0, 0, 0, 16, 0 },
822 { -4, 0, 0, 4, 0, 0, 16, 0 },
823 { -4, 0, 0, 0, 4, 0, 16, 0 },
824 },
825 {
826 { -2, 8, 0, 0, 0, 10, 0, 0 },
827 { -1, 3, 8, 0, 0, 6, 0, 0 },
828 { -1, 2, 3, 8, 0, 4, 0, 0 },
829 { 0, 1, 2, 3, 8, 2, 0, 0 },
830 { -1, 4, 0, 0, 0, 3, 10, 0 },
831 { -1, 3, 4, 0, 0, 4, 6, 0 },
832 { -1, 2, 3, 4, 0, 4, 4, 0 },
833 { -1, 2, 2, 3, 4, 3, 3, 0 },
834 },
835 {
836 { -12, 14, 0, 0, 0, 14, 0, 0 },
837 { -10, 0, 14, 0, 0, 12, 0, 0 },
838 { -9, 0, 0, 14, 0, 11, 0, 0 },
839 { -8, 0, 0, 0, 14, 10, 0, 0 },
840 { -10, 12, 0, 0, 0, 0, 14, 0 },
841 { -9, 1, 12, 0, 0, 0, 12, 0 },
842 { -8, 0, 0, 12, 0, 1, 11, 0 },
843 { -7, 0, 0, 1, 12, 1, 9, 0 },
844 },
845 };
846
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)847 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
848 TX_SIZE tx_size, const uint8_t *above,
849 const uint8_t *left, int mode) {
850 int r, c;
851 uint8_t buffer[33][33];
852 const int bw = tx_size_wide[tx_size];
853 const int bh = tx_size_high[tx_size];
854
855 assert(bw <= 32 && bh <= 32);
856
857 // The initialization is just for silencing Jenkins static analysis warnings
858 for (r = 0; r < bh + 1; ++r)
859 memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
860
861 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
862 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
863
864 for (r = 1; r < bh + 1; r += 2)
865 for (c = 1; c < bw + 1; c += 4) {
866 const uint8_t p0 = buffer[r - 1][c - 1];
867 const uint8_t p1 = buffer[r - 1][c];
868 const uint8_t p2 = buffer[r - 1][c + 1];
869 const uint8_t p3 = buffer[r - 1][c + 2];
870 const uint8_t p4 = buffer[r - 1][c + 3];
871 const uint8_t p5 = buffer[r][c - 1];
872 const uint8_t p6 = buffer[r + 1][c - 1];
873 for (int k = 0; k < 8; ++k) {
874 int r_offset = k >> 2;
875 int c_offset = k & 0x03;
876 buffer[r + r_offset][c + c_offset] =
877 clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
878 av1_filter_intra_taps[mode][k][0] * p0 +
879 av1_filter_intra_taps[mode][k][1] * p1 +
880 av1_filter_intra_taps[mode][k][2] * p2 +
881 av1_filter_intra_taps[mode][k][3] * p3 +
882 av1_filter_intra_taps[mode][k][4] * p4 +
883 av1_filter_intra_taps[mode][k][5] * p5 +
884 av1_filter_intra_taps[mode][k][6] * p6,
885 FILTER_INTRA_SCALE_BITS));
886 }
887 }
888
889 for (r = 0; r < bh; ++r) {
890 memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
891 dst += stride;
892 }
893 }
894
895 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)896 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
897 TX_SIZE tx_size,
898 const uint16_t *above,
899 const uint16_t *left, int mode,
900 int bd) {
901 int r, c;
902 uint16_t buffer[33][33];
903 const int bw = tx_size_wide[tx_size];
904 const int bh = tx_size_high[tx_size];
905
906 assert(bw <= 32 && bh <= 32);
907
908 // The initialization is just for silencing Jenkins static analysis warnings
909 for (r = 0; r < bh + 1; ++r)
910 memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
911
912 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
913 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
914
915 for (r = 1; r < bh + 1; r += 2)
916 for (c = 1; c < bw + 1; c += 4) {
917 const uint16_t p0 = buffer[r - 1][c - 1];
918 const uint16_t p1 = buffer[r - 1][c];
919 const uint16_t p2 = buffer[r - 1][c + 1];
920 const uint16_t p3 = buffer[r - 1][c + 2];
921 const uint16_t p4 = buffer[r - 1][c + 3];
922 const uint16_t p5 = buffer[r][c - 1];
923 const uint16_t p6 = buffer[r + 1][c - 1];
924 for (int k = 0; k < 8; ++k) {
925 int r_offset = k >> 2;
926 int c_offset = k & 0x03;
927 buffer[r + r_offset][c + c_offset] =
928 clip_pixel_highbd(ROUND_POWER_OF_TWO_SIGNED(
929 av1_filter_intra_taps[mode][k][0] * p0 +
930 av1_filter_intra_taps[mode][k][1] * p1 +
931 av1_filter_intra_taps[mode][k][2] * p2 +
932 av1_filter_intra_taps[mode][k][3] * p3 +
933 av1_filter_intra_taps[mode][k][4] * p4 +
934 av1_filter_intra_taps[mode][k][5] * p5 +
935 av1_filter_intra_taps[mode][k][6] * p6,
936 FILTER_INTRA_SCALE_BITS),
937 bd);
938 }
939 }
940
941 for (r = 0; r < bh; ++r) {
942 memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
943 dst += stride;
944 }
945 }
946 #endif // CONFIG_AV1_HIGHBITDEPTH
947
is_smooth(const MB_MODE_INFO * mbmi,int plane)948 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
949 if (plane == 0) {
950 const PREDICTION_MODE mode = mbmi->mode;
951 return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
952 mode == SMOOTH_H_PRED);
953 } else {
954 // uv_mode is not set for inter blocks, so need to explicitly
955 // detect that case.
956 if (is_inter_block(mbmi)) return 0;
957
958 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
959 return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
960 uv_mode == UV_SMOOTH_H_PRED);
961 }
962 }
963
get_filt_type(const MACROBLOCKD * xd,int plane)964 static int get_filt_type(const MACROBLOCKD *xd, int plane) {
965 int ab_sm, le_sm;
966
967 if (plane == 0) {
968 const MB_MODE_INFO *ab = xd->above_mbmi;
969 const MB_MODE_INFO *le = xd->left_mbmi;
970 ab_sm = ab ? is_smooth(ab, plane) : 0;
971 le_sm = le ? is_smooth(le, plane) : 0;
972 } else {
973 const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
974 const MB_MODE_INFO *le = xd->chroma_left_mbmi;
975 ab_sm = ab ? is_smooth(ab, plane) : 0;
976 le_sm = le ? is_smooth(le, plane) : 0;
977 }
978
979 return (ab_sm || le_sm) ? 1 : 0;
980 }
981
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)982 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
983 const int d = abs(delta);
984 int strength = 0;
985
986 const int blk_wh = bs0 + bs1;
987 if (type == 0) {
988 if (blk_wh <= 8) {
989 if (d >= 56) strength = 1;
990 } else if (blk_wh <= 12) {
991 if (d >= 40) strength = 1;
992 } else if (blk_wh <= 16) {
993 if (d >= 40) strength = 1;
994 } else if (blk_wh <= 24) {
995 if (d >= 8) strength = 1;
996 if (d >= 16) strength = 2;
997 if (d >= 32) strength = 3;
998 } else if (blk_wh <= 32) {
999 if (d >= 1) strength = 1;
1000 if (d >= 4) strength = 2;
1001 if (d >= 32) strength = 3;
1002 } else {
1003 if (d >= 1) strength = 3;
1004 }
1005 } else {
1006 if (blk_wh <= 8) {
1007 if (d >= 40) strength = 1;
1008 if (d >= 64) strength = 2;
1009 } else if (blk_wh <= 16) {
1010 if (d >= 20) strength = 1;
1011 if (d >= 48) strength = 2;
1012 } else if (blk_wh <= 24) {
1013 if (d >= 4) strength = 3;
1014 } else {
1015 if (d >= 1) strength = 3;
1016 }
1017 }
1018 return strength;
1019 }
1020
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1021 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1022 if (!strength) return;
1023
1024 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1025 { 0, 5, 6, 5, 0 },
1026 { 2, 4, 4, 4, 2 } };
1027 const int filt = strength - 1;
1028 uint8_t edge[129];
1029
1030 memcpy(edge, p, sz * sizeof(*p));
1031 for (int i = 1; i < sz; i++) {
1032 int s = 0;
1033 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1034 int k = i - 2 + j;
1035 k = (k < 0) ? 0 : k;
1036 k = (k > sz - 1) ? sz - 1 : k;
1037 s += edge[k] * kernel[filt][j];
1038 }
1039 s = (s + 8) >> 4;
1040 p[i] = s;
1041 }
1042 }
1043
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1044 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1045 const int kernel[3] = { 5, 6, 5 };
1046
1047 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1048 (p_above[0] * kernel[2]);
1049 s = (s + 8) >> 4;
1050 p_above[-1] = s;
1051 p_left[-1] = s;
1052 }
1053
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1054 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1055 if (!strength) return;
1056
1057 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1058 { 0, 5, 6, 5, 0 },
1059 { 2, 4, 4, 4, 2 } };
1060 const int filt = strength - 1;
1061 uint16_t edge[129];
1062
1063 memcpy(edge, p, sz * sizeof(*p));
1064 for (int i = 1; i < sz; i++) {
1065 int s = 0;
1066 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1067 int k = i - 2 + j;
1068 k = (k < 0) ? 0 : k;
1069 k = (k > sz - 1) ? sz - 1 : k;
1070 s += edge[k] * kernel[filt][j];
1071 }
1072 s = (s + 8) >> 4;
1073 p[i] = s;
1074 }
1075 }
1076
1077 #if CONFIG_AV1_HIGHBITDEPTH
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1078 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1079 const int kernel[3] = { 5, 6, 5 };
1080
1081 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1082 (p_above[0] * kernel[2]);
1083 s = (s + 8) >> 4;
1084 p_above[-1] = s;
1085 p_left[-1] = s;
1086 }
1087 #endif
1088
av1_upsample_intra_edge_c(uint8_t * p,int sz)1089 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1090 // interpolate half-sample positions
1091 assert(sz <= MAX_UPSAMPLE_SZ);
1092
1093 uint8_t in[MAX_UPSAMPLE_SZ + 3];
1094 // copy p[-1..(sz-1)] and extend first and last samples
1095 in[0] = p[-1];
1096 in[1] = p[-1];
1097 for (int i = 0; i < sz; i++) {
1098 in[i + 2] = p[i];
1099 }
1100 in[sz + 2] = p[sz - 1];
1101
1102 // interpolate half-sample edge positions
1103 p[-2] = in[0];
1104 for (int i = 0; i < sz; i++) {
1105 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1106 s = clip_pixel((s + 8) >> 4);
1107 p[2 * i - 1] = s;
1108 p[2 * i] = in[i + 2];
1109 }
1110 }
1111
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1112 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1113 // interpolate half-sample positions
1114 assert(sz <= MAX_UPSAMPLE_SZ);
1115
1116 uint16_t in[MAX_UPSAMPLE_SZ + 3];
1117 // copy p[-1..(sz-1)] and extend first and last samples
1118 in[0] = p[-1];
1119 in[1] = p[-1];
1120 for (int i = 0; i < sz; i++) {
1121 in[i + 2] = p[i];
1122 }
1123 in[sz + 2] = p[sz - 1];
1124
1125 // interpolate half-sample edge positions
1126 p[-2] = in[0];
1127 for (int i = 0; i < sz; i++) {
1128 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1129 s = (s + 8) >> 4;
1130 s = clip_pixel_highbd(s, bd);
1131 p[2 * i - 1] = s;
1132 p[2 * i] = in[i + 2];
1133 }
1134 }
1135 #if CONFIG_AV1_HIGHBITDEPTH
build_intra_predictors_high(const MACROBLOCKD * xd,const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1136 static void build_intra_predictors_high(
1137 const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
1138 int dst_stride, PREDICTION_MODE mode, int angle_delta,
1139 FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size,
1140 int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px,
1141 int n_bottomleft_px, int plane) {
1142 int i;
1143 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1144 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1145 DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 32]);
1146 DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 32]);
1147 uint16_t *const above_row = above_data + 16;
1148 uint16_t *const left_col = left_data + 16;
1149 const int txwpx = tx_size_wide[tx_size];
1150 const int txhpx = tx_size_high[tx_size];
1151 int need_left = extend_modes[mode] & NEED_LEFT;
1152 int need_above = extend_modes[mode] & NEED_ABOVE;
1153 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1154 const uint16_t *above_ref = ref - ref_stride;
1155 const uint16_t *left_ref = ref - 1;
1156 int p_angle = 0;
1157 const int is_dr_mode = av1_is_directional_mode(mode);
1158 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1159 int base = 128 << (xd->bd - 8);
1160
1161 // The default values if ref pixels are not available:
1162 // base base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1163 // base+1 A B .. Y Z
1164 // base+1 C D .. W X
1165 // base+1 E F .. U V
1166 // base+1 G H .. S T T T T T
1167
1168 if (is_dr_mode) {
1169 p_angle = mode_to_angle_map[mode] + angle_delta;
1170 if (p_angle <= 90)
1171 need_above = 1, need_left = 0, need_above_left = 1;
1172 else if (p_angle < 180)
1173 need_above = 1, need_left = 1, need_above_left = 1;
1174 else
1175 need_above = 0, need_left = 1, need_above_left = 1;
1176 }
1177 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1178
1179 assert(n_top_px >= 0);
1180 assert(n_topright_px >= 0);
1181 assert(n_left_px >= 0);
1182 assert(n_bottomleft_px >= 0);
1183
1184 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1185 int val;
1186 if (need_left) {
1187 val = (n_top_px > 0) ? above_ref[0] : base + 1;
1188 } else {
1189 val = (n_left_px > 0) ? left_ref[0] : base - 1;
1190 }
1191 for (i = 0; i < txhpx; ++i) {
1192 aom_memset16(dst, val, txwpx);
1193 dst += dst_stride;
1194 }
1195 return;
1196 }
1197
1198 // NEED_LEFT
1199 if (need_left) {
1200 int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1201 if (use_filter_intra) need_bottom = 0;
1202 if (is_dr_mode) need_bottom = p_angle > 180;
1203 const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1204 i = 0;
1205 if (n_left_px > 0) {
1206 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1207 if (need_bottom && n_bottomleft_px > 0) {
1208 assert(i == txhpx);
1209 for (; i < txhpx + n_bottomleft_px; i++)
1210 left_col[i] = left_ref[i * ref_stride];
1211 }
1212 if (i < num_left_pixels_needed)
1213 aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1214 } else {
1215 if (n_top_px > 0) {
1216 aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1217 } else {
1218 aom_memset16(left_col, base + 1, num_left_pixels_needed);
1219 }
1220 }
1221 }
1222
1223 // NEED_ABOVE
1224 if (need_above) {
1225 int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1226 if (use_filter_intra) need_right = 0;
1227 if (is_dr_mode) need_right = p_angle < 90;
1228 const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1229 if (n_top_px > 0) {
1230 memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1231 i = n_top_px;
1232 if (need_right && n_topright_px > 0) {
1233 assert(n_top_px == txwpx);
1234 memcpy(above_row + txwpx, above_ref + txwpx,
1235 n_topright_px * sizeof(above_ref[0]));
1236 i += n_topright_px;
1237 }
1238 if (i < num_top_pixels_needed)
1239 aom_memset16(&above_row[i], above_row[i - 1],
1240 num_top_pixels_needed - i);
1241 } else {
1242 if (n_left_px > 0) {
1243 aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1244 } else {
1245 aom_memset16(above_row, base - 1, num_top_pixels_needed);
1246 }
1247 }
1248 }
1249
1250 if (need_above_left) {
1251 if (n_top_px > 0 && n_left_px > 0) {
1252 above_row[-1] = above_ref[-1];
1253 } else if (n_top_px > 0) {
1254 above_row[-1] = above_ref[0];
1255 } else if (n_left_px > 0) {
1256 above_row[-1] = left_ref[0];
1257 } else {
1258 above_row[-1] = base;
1259 }
1260 left_col[-1] = above_row[-1];
1261 }
1262
1263 if (use_filter_intra) {
1264 highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1265 filter_intra_mode, xd->bd);
1266 return;
1267 }
1268
1269 if (is_dr_mode) {
1270 int upsample_above = 0;
1271 int upsample_left = 0;
1272 if (!disable_edge_filter) {
1273 const int need_right = p_angle < 90;
1274 const int need_bottom = p_angle > 180;
1275 const int filt_type = get_filt_type(xd, plane);
1276 if (p_angle != 90 && p_angle != 180) {
1277 const int ab_le = need_above_left ? 1 : 0;
1278 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1279 filter_intra_edge_corner_high(above_row, left_col);
1280 }
1281 if (need_above && n_top_px > 0) {
1282 const int strength =
1283 intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1284 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1285 av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1286 }
1287 if (need_left && n_left_px > 0) {
1288 const int strength = intra_edge_filter_strength(
1289 txhpx, txwpx, p_angle - 180, filt_type);
1290 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1291 av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1292 }
1293 }
1294 upsample_above =
1295 av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1296 if (need_above && upsample_above) {
1297 const int n_px = txwpx + (need_right ? txhpx : 0);
1298 av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
1299 }
1300 upsample_left =
1301 av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1302 if (need_left && upsample_left) {
1303 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1304 av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
1305 }
1306 }
1307 highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1308 upsample_above, upsample_left, p_angle, xd->bd);
1309 return;
1310 }
1311
1312 // predict
1313 if (mode == DC_PRED) {
1314 dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1315 dst, dst_stride, above_row, left_col, xd->bd);
1316 } else {
1317 pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd);
1318 }
1319 }
1320 #endif // CONFIG_AV1_HIGHBITDEPTH
1321
build_intra_predictors(const MACROBLOCKD * xd,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1322 static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
1323 int ref_stride, uint8_t *dst, int dst_stride,
1324 PREDICTION_MODE mode, int angle_delta,
1325 FILTER_INTRA_MODE filter_intra_mode,
1326 TX_SIZE tx_size, int disable_edge_filter,
1327 int n_top_px, int n_topright_px,
1328 int n_left_px, int n_bottomleft_px,
1329 int plane) {
1330 int i;
1331 const uint8_t *above_ref = ref - ref_stride;
1332 const uint8_t *left_ref = ref - 1;
1333 DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
1334 DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
1335 uint8_t *const above_row = above_data + 16;
1336 uint8_t *const left_col = left_data + 16;
1337 const int txwpx = tx_size_wide[tx_size];
1338 const int txhpx = tx_size_high[tx_size];
1339 int need_left = extend_modes[mode] & NEED_LEFT;
1340 int need_above = extend_modes[mode] & NEED_ABOVE;
1341 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1342 int p_angle = 0;
1343 const int is_dr_mode = av1_is_directional_mode(mode);
1344 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1345
1346 // The default values if ref pixels are not available:
1347 // 128 127 127 .. 127 127 127 127 127 127
1348 // 129 A B .. Y Z
1349 // 129 C D .. W X
1350 // 129 E F .. U V
1351 // 129 G H .. S T T T T T
1352 // ..
1353
1354 if (is_dr_mode) {
1355 p_angle = mode_to_angle_map[mode] + angle_delta;
1356 if (p_angle <= 90)
1357 need_above = 1, need_left = 0, need_above_left = 1;
1358 else if (p_angle < 180)
1359 need_above = 1, need_left = 1, need_above_left = 1;
1360 else
1361 need_above = 0, need_left = 1, need_above_left = 1;
1362 }
1363 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1364
1365 assert(n_top_px >= 0);
1366 assert(n_topright_px >= 0);
1367 assert(n_left_px >= 0);
1368 assert(n_bottomleft_px >= 0);
1369
1370 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1371 int val;
1372 if (need_left) {
1373 val = (n_top_px > 0) ? above_ref[0] : 129;
1374 } else {
1375 val = (n_left_px > 0) ? left_ref[0] : 127;
1376 }
1377 for (i = 0; i < txhpx; ++i) {
1378 memset(dst, val, txwpx);
1379 dst += dst_stride;
1380 }
1381 return;
1382 }
1383
1384 // NEED_LEFT
1385 if (need_left) {
1386 int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
1387 if (use_filter_intra) need_bottom = 0;
1388 if (is_dr_mode) need_bottom = p_angle > 180;
1389 // the avx2 dr_prediction_z2 may read at most 3 extra bytes,
1390 // due to the avx2 mask load is with dword granularity.
1391 // so we initialize 3 extra bytes to silence valgrind complain.
1392 const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 3);
1393 i = 0;
1394 if (n_left_px > 0) {
1395 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1396 if (need_bottom && n_bottomleft_px > 0) {
1397 assert(i == txhpx);
1398 for (; i < txhpx + n_bottomleft_px; i++)
1399 left_col[i] = left_ref[i * ref_stride];
1400 }
1401 if (i < num_left_pixels_needed)
1402 memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1403 } else {
1404 if (n_top_px > 0) {
1405 memset(left_col, above_ref[0], num_left_pixels_needed);
1406 } else {
1407 memset(left_col, 129, num_left_pixels_needed);
1408 }
1409 }
1410 }
1411
1412 // NEED_ABOVE
1413 if (need_above) {
1414 int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
1415 if (use_filter_intra) need_right = 0;
1416 if (is_dr_mode) need_right = p_angle < 90;
1417 const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1418 if (n_top_px > 0) {
1419 memcpy(above_row, above_ref, n_top_px);
1420 i = n_top_px;
1421 if (need_right && n_topright_px > 0) {
1422 assert(n_top_px == txwpx);
1423 memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1424 i += n_topright_px;
1425 }
1426 if (i < num_top_pixels_needed)
1427 memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1428 } else {
1429 if (n_left_px > 0) {
1430 memset(above_row, left_ref[0], num_top_pixels_needed);
1431 } else {
1432 memset(above_row, 127, num_top_pixels_needed);
1433 }
1434 }
1435 }
1436
1437 if (need_above_left) {
1438 if (n_top_px > 0 && n_left_px > 0) {
1439 above_row[-1] = above_ref[-1];
1440 } else if (n_top_px > 0) {
1441 above_row[-1] = above_ref[0];
1442 } else if (n_left_px > 0) {
1443 above_row[-1] = left_ref[0];
1444 } else {
1445 above_row[-1] = 128;
1446 }
1447 left_col[-1] = above_row[-1];
1448 }
1449
1450 if (use_filter_intra) {
1451 av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1452 filter_intra_mode);
1453 return;
1454 }
1455
1456 if (is_dr_mode) {
1457 int upsample_above = 0;
1458 int upsample_left = 0;
1459 if (!disable_edge_filter) {
1460 const int need_right = p_angle < 90;
1461 const int need_bottom = p_angle > 180;
1462 const int filt_type = get_filt_type(xd, plane);
1463 if (p_angle != 90 && p_angle != 180) {
1464 const int ab_le = need_above_left ? 1 : 0;
1465 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1466 filter_intra_edge_corner(above_row, left_col);
1467 }
1468 if (need_above && n_top_px > 0) {
1469 const int strength =
1470 intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1471 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1472 av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1473 }
1474 if (need_left && n_left_px > 0) {
1475 const int strength = intra_edge_filter_strength(
1476 txhpx, txwpx, p_angle - 180, filt_type);
1477 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1478 av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1479 }
1480 }
1481 upsample_above =
1482 av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1483 if (need_above && upsample_above) {
1484 const int n_px = txwpx + (need_right ? txhpx : 0);
1485 av1_upsample_intra_edge(above_row, n_px);
1486 }
1487 upsample_left =
1488 av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1489 if (need_left && upsample_left) {
1490 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1491 av1_upsample_intra_edge(left_col, n_px);
1492 }
1493 }
1494 dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1495 upsample_left, p_angle);
1496 return;
1497 }
1498
1499 // predict
1500 if (mode == DC_PRED) {
1501 dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1502 left_col);
1503 } else {
1504 pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1505 }
1506 }
1507
scale_chroma_bsize(BLOCK_SIZE bsize,int subsampling_x,int subsampling_y)1508 static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1509 int subsampling_y) {
1510 assert(subsampling_x >= 0 && subsampling_x < 2);
1511 assert(subsampling_y >= 0 && subsampling_y < 2);
1512 BLOCK_SIZE bs = bsize;
1513 switch (bsize) {
1514 case BLOCK_4X4:
1515 if (subsampling_x == 1 && subsampling_y == 1)
1516 bs = BLOCK_8X8;
1517 else if (subsampling_x == 1)
1518 bs = BLOCK_8X4;
1519 else if (subsampling_y == 1)
1520 bs = BLOCK_4X8;
1521 break;
1522 case BLOCK_4X8:
1523 if (subsampling_x == 1 && subsampling_y == 1)
1524 bs = BLOCK_8X8;
1525 else if (subsampling_x == 1)
1526 bs = BLOCK_8X8;
1527 else if (subsampling_y == 1)
1528 bs = BLOCK_4X8;
1529 break;
1530 case BLOCK_8X4:
1531 if (subsampling_x == 1 && subsampling_y == 1)
1532 bs = BLOCK_8X8;
1533 else if (subsampling_x == 1)
1534 bs = BLOCK_8X4;
1535 else if (subsampling_y == 1)
1536 bs = BLOCK_8X8;
1537 break;
1538 case BLOCK_4X16:
1539 if (subsampling_x == 1 && subsampling_y == 1)
1540 bs = BLOCK_8X16;
1541 else if (subsampling_x == 1)
1542 bs = BLOCK_8X16;
1543 else if (subsampling_y == 1)
1544 bs = BLOCK_4X16;
1545 break;
1546 case BLOCK_16X4:
1547 if (subsampling_x == 1 && subsampling_y == 1)
1548 bs = BLOCK_16X8;
1549 else if (subsampling_x == 1)
1550 bs = BLOCK_16X4;
1551 else if (subsampling_y == 1)
1552 bs = BLOCK_16X8;
1553 break;
1554 default: break;
1555 }
1556 return bs;
1557 }
1558
av1_predict_intra_block(const AV1_COMMON * cm,const MACROBLOCKD * xd,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1559 void av1_predict_intra_block(
1560 const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
1561 TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
1562 FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
1563 uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) {
1564 const MB_MODE_INFO *const mbmi = xd->mi[0];
1565 const int txwpx = tx_size_wide[tx_size];
1566 const int txhpx = tx_size_high[tx_size];
1567 const int x = col_off << MI_SIZE_LOG2;
1568 const int y = row_off << MI_SIZE_LOG2;
1569
1570 if (use_palette) {
1571 int r, c;
1572 const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1573 xd->color_index_map_offset[plane != 0];
1574 const uint16_t *const palette =
1575 mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1576 if (is_cur_buf_hbd(xd)) {
1577 uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1578 for (r = 0; r < txhpx; ++r) {
1579 for (c = 0; c < txwpx; ++c) {
1580 dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1581 }
1582 }
1583 } else {
1584 for (r = 0; r < txhpx; ++r) {
1585 for (c = 0; c < txwpx; ++c) {
1586 dst[r * dst_stride + c] =
1587 (uint8_t)palette[map[(r + y) * wpx + c + x]];
1588 }
1589 }
1590 }
1591 return;
1592 }
1593
1594 const struct macroblockd_plane *const pd = &xd->plane[plane];
1595 const int txw = tx_size_wide_unit[tx_size];
1596 const int txh = tx_size_high_unit[tx_size];
1597 const int ss_x = pd->subsampling_x;
1598 const int ss_y = pd->subsampling_y;
1599 const int have_top =
1600 row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1601 const int have_left =
1602 col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1603 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1604 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1605 const int xr_chr_offset = 0;
1606 const int yd_chr_offset = 0;
1607
1608 // Distance between the right edge of this prediction block to
1609 // the frame right edge
1610 const int xr =
1611 (xd->mb_to_right_edge >> (3 + ss_x)) + (wpx - x - txwpx) - xr_chr_offset;
1612 // Distance between the bottom edge of this prediction block to
1613 // the frame bottom edge
1614 const int yd =
1615 (xd->mb_to_bottom_edge >> (3 + ss_y)) + (hpx - y - txhpx) - yd_chr_offset;
1616 const int right_available =
1617 mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1618 const int bottom_available =
1619 (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1620
1621 const PARTITION_TYPE partition = mbmi->partition;
1622
1623 BLOCK_SIZE bsize = mbmi->sb_type;
1624 // force 4x4 chroma component block size.
1625 if (ss_x || ss_y) {
1626 bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1627 }
1628
1629 const int have_top_right =
1630 has_top_right(cm, bsize, mi_row, mi_col, have_top, right_available,
1631 partition, tx_size, row_off, col_off, ss_x, ss_y);
1632 const int have_bottom_left =
1633 has_bottom_left(cm, bsize, mi_row, mi_col, bottom_available, have_left,
1634 partition, tx_size, row_off, col_off, ss_x, ss_y);
1635
1636 const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
1637 #if CONFIG_AV1_HIGHBITDEPTH
1638 if (is_cur_buf_hbd(xd)) {
1639 build_intra_predictors_high(
1640 xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
1641 filter_intra_mode, tx_size, disable_edge_filter,
1642 have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1643 have_top_right ? AOMMIN(txwpx, xr) : 0,
1644 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1645 have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1646 return;
1647 }
1648 #endif
1649 build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
1650 angle_delta, filter_intra_mode, tx_size,
1651 disable_edge_filter,
1652 have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1653 have_top_right ? AOMMIN(txwpx, xr) : 0,
1654 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1655 have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1656 }
1657
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1658 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1659 int plane, int blk_col, int blk_row,
1660 TX_SIZE tx_size) {
1661 const MB_MODE_INFO *const mbmi = xd->mi[0];
1662 struct macroblockd_plane *const pd = &xd->plane[plane];
1663 const int dst_stride = pd->dst.stride;
1664 uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1665 const PREDICTION_MODE mode =
1666 (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1667 const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1668 const FILTER_INTRA_MODE filter_intra_mode =
1669 (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1670 ? mbmi->filter_intra_mode_info.filter_intra_mode
1671 : FILTER_INTRA_MODES;
1672 const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1673
1674 if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1675 #if CONFIG_DEBUG
1676 assert(is_cfl_allowed(xd));
1677 const BLOCK_SIZE plane_bsize = get_plane_block_size(
1678 mbmi->sb_type, pd->subsampling_x, pd->subsampling_y);
1679 (void)plane_bsize;
1680 assert(plane_bsize < BLOCK_SIZES_ALL);
1681 if (!xd->lossless[mbmi->segment_id]) {
1682 assert(blk_col == 0);
1683 assert(blk_row == 0);
1684 assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1685 assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1686 }
1687 #endif
1688 CFL_CTX *const cfl = &xd->cfl;
1689 CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1690 if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1691 av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1692 angle_delta, use_palette, filter_intra_mode, dst,
1693 dst_stride, dst, dst_stride, blk_col, blk_row,
1694 plane);
1695 if (cfl->use_dc_pred_cache) {
1696 cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1697 cfl->dc_pred_is_cached[pred_plane] = 1;
1698 }
1699 } else {
1700 cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1701 }
1702 cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1703 return;
1704 }
1705 av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1706 angle_delta, use_palette, filter_intra_mode, dst,
1707 dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1708 }
1709
av1_init_intra_predictors(void)1710 void av1_init_intra_predictors(void) {
1711 aom_once(init_intra_predictors_internal);
1712 }
1713