1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
17
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/aom_once.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
23 #include "av1/common/reconintra.h"
24 #include "av1/common/onyxc_int.h"
25 #include "av1/common/cfl.h"
26
27 enum {
28 NEED_LEFT = 1 << 1,
29 NEED_ABOVE = 1 << 2,
30 NEED_ABOVERIGHT = 1 << 3,
31 NEED_ABOVELEFT = 1 << 4,
32 NEED_BOTTOMLEFT = 1 << 5,
33 };
34
35 #define INTRA_EDGE_FILT 3
36 #define INTRA_EDGE_TAPS 5
37 #define MAX_UPSAMPLE_SZ 16
38
39 static const uint8_t extend_modes[INTRA_MODES] = {
40 NEED_ABOVE | NEED_LEFT, // DC
41 NEED_ABOVE, // V
42 NEED_LEFT, // H
43 NEED_ABOVE | NEED_ABOVERIGHT, // D45
44 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
45 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D113
46 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D157
47 NEED_LEFT | NEED_BOTTOMLEFT, // D203
48 NEED_ABOVE | NEED_ABOVERIGHT, // D67
49 NEED_LEFT | NEED_ABOVE, // SMOOTH
50 NEED_LEFT | NEED_ABOVE, // SMOOTH_V
51 NEED_LEFT | NEED_ABOVE, // SMOOTH_H
52 NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH
53 };
54
55 // Tables to store if the top-right reference pixels are available. The flags
56 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
57 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
58 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
59 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
60 // . . . .
61 // . . . .
62 // . . o .
63 // . . . .
64 static uint8_t has_tr_4x4[128] = {
65 255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
66 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69 255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71 255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72 127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73 };
74 static uint8_t has_tr_4x8[64] = {
75 255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
76 119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
77 127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
78 119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
79 119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
80 };
81 static uint8_t has_tr_8x4[64] = {
82 255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
83 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84 255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85 127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86 };
87 static uint8_t has_tr_8x8[32] = {
88 255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
89 255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90 };
91 static uint8_t has_tr_8x16[16] = {
92 255, 255, 119, 119, 127, 127, 119, 119,
93 255, 127, 119, 119, 127, 127, 119, 119,
94 };
95 static uint8_t has_tr_16x8[16] = {
96 255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
97 };
98 static uint8_t has_tr_16x16[8] = {
99 255, 85, 119, 85, 127, 85, 119, 85,
100 };
101 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
102 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
103 static uint8_t has_tr_32x32[2] = { 95, 87 };
104 static uint8_t has_tr_32x64[1] = { 127 };
105 static uint8_t has_tr_64x32[1] = { 19 };
106 static uint8_t has_tr_64x64[1] = { 7 };
107 static uint8_t has_tr_64x128[1] = { 3 };
108 static uint8_t has_tr_128x64[1] = { 1 };
109 static uint8_t has_tr_128x128[1] = { 1 };
110 static uint8_t has_tr_4x16[32] = {
111 255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
112 127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
113 127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
114 };
115 static uint8_t has_tr_16x4[32] = {
116 255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
117 127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118 };
119 static uint8_t has_tr_8x32[8] = {
120 255, 255, 127, 127, 255, 127, 127, 127,
121 };
122 static uint8_t has_tr_32x8[8] = {
123 15, 0, 5, 0, 7, 0, 5, 0,
124 };
125 static uint8_t has_tr_16x64[2] = { 255, 127 };
126 static uint8_t has_tr_64x16[2] = { 3, 1 };
127
128 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
129 // 4X4
130 has_tr_4x4,
131 // 4X8, 8X4, 8X8
132 has_tr_4x8, has_tr_8x4, has_tr_8x8,
133 // 8X16, 16X8, 16X16
134 has_tr_8x16, has_tr_16x8, has_tr_16x16,
135 // 16X32, 32X16, 32X32
136 has_tr_16x32, has_tr_32x16, has_tr_32x32,
137 // 32X64, 64X32, 64X64
138 has_tr_32x64, has_tr_64x32, has_tr_64x64,
139 // 64x128, 128x64, 128x128
140 has_tr_64x128, has_tr_128x64, has_tr_128x128,
141 // 4x16, 16x4, 8x32
142 has_tr_4x16, has_tr_16x4, has_tr_8x32,
143 // 32x8, 16x64, 64x16
144 has_tr_32x8, has_tr_16x64, has_tr_64x16
145 };
146
147 static uint8_t has_tr_vert_8x8[32] = {
148 255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
149 255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150 };
151 static uint8_t has_tr_vert_16x16[8] = {
152 255, 0, 119, 0, 127, 0, 119, 0,
153 };
154 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
155 static uint8_t has_tr_vert_64x64[1] = { 3 };
156
157 // The _vert_* tables are like the ordinary tables above, but describe the
158 // order we visit square blocks when doing a PARTITION_VERT_A or
159 // PARTITION_VERT_B. This is the same order as normal except for on the last
160 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
161 // as a pair of squares, which means that these tables work correctly for both
162 // mixed vertical partition types.
163 //
164 // There are tables for each of the square sizes. Vertical rectangles (like
165 // BLOCK_16X32) use their respective "non-vert" table
166 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
167 // 4X4
168 NULL,
169 // 4X8, 8X4, 8X8
170 has_tr_4x8, NULL, has_tr_vert_8x8,
171 // 8X16, 16X8, 16X16
172 has_tr_8x16, NULL, has_tr_vert_16x16,
173 // 16X32, 32X16, 32X32
174 has_tr_16x32, NULL, has_tr_vert_32x32,
175 // 32X64, 64X32, 64X64
176 has_tr_32x64, NULL, has_tr_vert_64x64,
177 // 64x128, 128x64, 128x128
178 has_tr_64x128, NULL, has_tr_128x128
179 };
180
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)181 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
182 BLOCK_SIZE bsize) {
183 const uint8_t *ret = NULL;
184 // If this is a mixed vertical partition, look up bsize in orders_vert.
185 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
186 assert(bsize < BLOCK_SIZES);
187 ret = has_tr_vert_tables[bsize];
188 } else {
189 ret = has_tr_tables[bsize];
190 }
191 assert(ret);
192 return ret;
193 }
194
has_top_right(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)195 static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
196 int mi_col, int top_available, int right_available,
197 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
198 int col_off, int ss_x, int ss_y) {
199 if (!top_available || !right_available) return 0;
200
201 const int bw_unit = block_size_wide[bsize] >> tx_size_wide_log2[0];
202 const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
203 const int top_right_count_unit = tx_size_wide_unit[txsz];
204
205 if (row_off > 0) { // Just need to check if enough pixels on the right.
206 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
207 // Special case: For 128x128 blocks, the transform unit whose
208 // top-right corner is at the center of the block does in fact have
209 // pixels available at its top-right corner.
210 if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
211 col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
212 return 1;
213 }
214 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
215 const int col_off_64 = col_off % plane_bw_unit_64;
216 return col_off_64 + top_right_count_unit < plane_bw_unit_64;
217 }
218 return col_off + top_right_count_unit < plane_bw_unit;
219 } else {
220 // All top-right pixels are in the block above, which is already available.
221 if (col_off + top_right_count_unit < plane_bw_unit) return 1;
222
223 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
224 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
225 const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
226 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
227 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
228
229 // Top row of superblock: so top-right pixels are in the top and/or
230 // top-right superblocks, both of which are already available.
231 if (blk_row_in_sb == 0) return 1;
232
233 // Rightmost column of superblock (and not the top row): so top-right pixels
234 // fall in the right superblock, which is not available yet.
235 if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
236 return 0;
237 }
238
239 // General case (neither top row nor rightmost column): check if the
240 // top-right block is coded before the current block.
241 const int this_blk_index =
242 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
243 blk_col_in_sb + 0;
244 const int idx1 = this_blk_index / 8;
245 const int idx2 = this_blk_index % 8;
246 const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
247 return (has_tr_table[idx1] >> idx2) & 1;
248 }
249 }
250
251 // Similar to the has_tr_* tables, but store if the bottom-left reference
252 // pixels are available.
253 static uint8_t has_bl_4x4[128] = {
254 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85,
255 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1, 0, 84, 85, 85, 85, 16, 17,
256 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84,
257 85, 85, 85, 0, 0, 0, 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
258 0, 1, 1, 1, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 1,
259 0, 84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 1, 1, 1, 84, 85,
260 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0, 0, 0, 0,
261 };
262 static uint8_t has_bl_4x8[64] = {
263 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
264 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
265 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
266 16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
267 };
268 static uint8_t has_bl_8x4[64] = {
269 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
270 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
271 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
272 254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
273 };
274 static uint8_t has_bl_8x8[32] = {
275 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
276 84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277 };
278 static uint8_t has_bl_8x16[16] = {
279 16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
280 };
281 static uint8_t has_bl_16x8[16] = {
282 254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
283 };
284 static uint8_t has_bl_16x16[8] = {
285 84, 16, 84, 0, 84, 16, 84, 0,
286 };
287 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
288 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
289 static uint8_t has_bl_32x32[2] = { 4, 4 };
290 static uint8_t has_bl_32x64[1] = { 0 };
291 static uint8_t has_bl_64x32[1] = { 34 };
292 static uint8_t has_bl_64x64[1] = { 0 };
293 static uint8_t has_bl_64x128[1] = { 0 };
294 static uint8_t has_bl_128x64[1] = { 0 };
295 static uint8_t has_bl_128x128[1] = { 0 };
296 static uint8_t has_bl_4x16[32] = {
297 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
298 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299 };
300 static uint8_t has_bl_16x4[32] = {
301 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
302 254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303 };
304 static uint8_t has_bl_8x32[8] = {
305 0, 1, 0, 0, 0, 1, 0, 0,
306 };
307 static uint8_t has_bl_32x8[8] = {
308 238, 78, 238, 14, 238, 78, 238, 14,
309 };
310 static uint8_t has_bl_16x64[2] = { 0, 0 };
311 static uint8_t has_bl_64x16[2] = { 42, 42 };
312
313 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
314 // 4X4
315 has_bl_4x4,
316 // 4X8, 8X4, 8X8
317 has_bl_4x8, has_bl_8x4, has_bl_8x8,
318 // 8X16, 16X8, 16X16
319 has_bl_8x16, has_bl_16x8, has_bl_16x16,
320 // 16X32, 32X16, 32X32
321 has_bl_16x32, has_bl_32x16, has_bl_32x32,
322 // 32X64, 64X32, 64X64
323 has_bl_32x64, has_bl_64x32, has_bl_64x64,
324 // 64x128, 128x64, 128x128
325 has_bl_64x128, has_bl_128x64, has_bl_128x128,
326 // 4x16, 16x4, 8x32
327 has_bl_4x16, has_bl_16x4, has_bl_8x32,
328 // 32x8, 16x64, 64x16
329 has_bl_32x8, has_bl_16x64, has_bl_64x16
330 };
331
332 static uint8_t has_bl_vert_8x8[32] = {
333 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
334 254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335 };
336 static uint8_t has_bl_vert_16x16[8] = {
337 254, 16, 254, 0, 254, 16, 254, 0,
338 };
339 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
340 static uint8_t has_bl_vert_64x64[1] = { 2 };
341
342 // The _vert_* tables are like the ordinary tables above, but describe the
343 // order we visit square blocks when doing a PARTITION_VERT_A or
344 // PARTITION_VERT_B. This is the same order as normal except for on the last
345 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
346 // as a pair of squares, which means that these tables work correctly for both
347 // mixed vertical partition types.
348 //
349 // There are tables for each of the square sizes. Vertical rectangles (like
350 // BLOCK_16X32) use their respective "non-vert" table
351 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
352 // 4X4
353 NULL,
354 // 4X8, 8X4, 8X8
355 has_bl_4x8, NULL, has_bl_vert_8x8,
356 // 8X16, 16X8, 16X16
357 has_bl_8x16, NULL, has_bl_vert_16x16,
358 // 16X32, 32X16, 32X32
359 has_bl_16x32, NULL, has_bl_vert_32x32,
360 // 32X64, 64X32, 64X64
361 has_bl_32x64, NULL, has_bl_vert_64x64,
362 // 64x128, 128x64, 128x128
363 has_bl_64x128, NULL, has_bl_128x128
364 };
365
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)366 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
367 BLOCK_SIZE bsize) {
368 const uint8_t *ret = NULL;
369 // If this is a mixed vertical partition, look up bsize in orders_vert.
370 if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
371 assert(bsize < BLOCK_SIZES);
372 ret = has_bl_vert_tables[bsize];
373 } else {
374 ret = has_bl_tables[bsize];
375 }
376 assert(ret);
377 return ret;
378 }
379
has_bottom_left(const AV1_COMMON * cm,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)380 static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
381 int mi_col, int bottom_available, int left_available,
382 PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
383 int col_off, int ss_x, int ss_y) {
384 if (!bottom_available || !left_available) return 0;
385
386 // Special case for 128x* blocks, when col_off is half the block width.
387 // This is needed because 128x* superblocks are divided into 64x* blocks in
388 // raster order
389 if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
390 const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
391 const int col_off_64 = col_off % plane_bw_unit_64;
392 if (col_off_64 == 0) {
393 // We are at the left edge of top-right or bottom-right 64x* block.
394 const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
395 const int row_off_64 = row_off % plane_bh_unit_64;
396 const int plane_bh_unit =
397 AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
398 // Check if all bottom-left pixels are in the left 64x* block (which is
399 // already coded).
400 return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
401 }
402 }
403
404 if (col_off > 0) {
405 // Bottom-left pixels are in the bottom-left block, which is not available.
406 return 0;
407 } else {
408 const int bh_unit = block_size_high[bsize] >> tx_size_high_log2[0];
409 const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
410 const int bottom_left_count_unit = tx_size_high_unit[txsz];
411
412 // All bottom-left pixels are in the left block, which is already available.
413 if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
414
415 const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
416 const int bh_in_mi_log2 = mi_size_high_log2[bsize];
417 const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
418 const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
419 const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
420
421 // Leftmost column of superblock: so bottom-left pixels maybe in the left
422 // and/or bottom-left superblocks. But only the left superblock is
423 // available, so check if all required pixels fall in that superblock.
424 if (blk_col_in_sb == 0) {
425 const int blk_start_row_off = blk_row_in_sb
426 << (bh_in_mi_log2 + MI_SIZE_LOG2 -
427 tx_size_wide_log2[0]) >>
428 ss_y;
429 const int row_off_in_sb = blk_start_row_off + row_off;
430 const int sb_height_unit = sb_mi_size >> ss_y;
431 return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432 }
433
434 // Bottom row of superblock (and not the leftmost column): so bottom-left
435 // pixels fall in the bottom superblock, which is not available yet.
436 if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437
438 // General case (neither leftmost column nor bottom row): check if the
439 // bottom-left block is coded before the current block.
440 const int this_blk_index =
441 ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442 blk_col_in_sb + 0;
443 const int idx1 = this_blk_index / 8;
444 const int idx2 = this_blk_index % 8;
445 const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446 return (has_bl_table[idx1] >> idx2) & 1;
447 }
448 }
449
450 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451 const uint8_t *above, const uint8_t *left);
452
453 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455
456 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
457 const uint16_t *above, const uint16_t *left,
458 int bd);
459 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
460 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
461
init_intra_predictors_internal(void)462 static void init_intra_predictors_internal(void) {
463 assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
464
465 #define INIT_RECTANGULAR(p, type) \
466 p[TX_4X8] = aom_##type##_predictor_4x8; \
467 p[TX_8X4] = aom_##type##_predictor_8x4; \
468 p[TX_8X16] = aom_##type##_predictor_8x16; \
469 p[TX_16X8] = aom_##type##_predictor_16x8; \
470 p[TX_16X32] = aom_##type##_predictor_16x32; \
471 p[TX_32X16] = aom_##type##_predictor_32x16; \
472 p[TX_32X64] = aom_##type##_predictor_32x64; \
473 p[TX_64X32] = aom_##type##_predictor_64x32; \
474 p[TX_4X16] = aom_##type##_predictor_4x16; \
475 p[TX_16X4] = aom_##type##_predictor_16x4; \
476 p[TX_8X32] = aom_##type##_predictor_8x32; \
477 p[TX_32X8] = aom_##type##_predictor_32x8; \
478 p[TX_16X64] = aom_##type##_predictor_16x64; \
479 p[TX_64X16] = aom_##type##_predictor_64x16;
480
481 #define INIT_NO_4X4(p, type) \
482 p[TX_8X8] = aom_##type##_predictor_8x8; \
483 p[TX_16X16] = aom_##type##_predictor_16x16; \
484 p[TX_32X32] = aom_##type##_predictor_32x32; \
485 p[TX_64X64] = aom_##type##_predictor_64x64; \
486 INIT_RECTANGULAR(p, type)
487
488 #define INIT_ALL_SIZES(p, type) \
489 p[TX_4X4] = aom_##type##_predictor_4x4; \
490 INIT_NO_4X4(p, type)
491
492 INIT_ALL_SIZES(pred[V_PRED], v);
493 INIT_ALL_SIZES(pred[H_PRED], h);
494 INIT_ALL_SIZES(pred[PAETH_PRED], paeth);
495 INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth);
496 INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v);
497 INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h);
498 INIT_ALL_SIZES(dc_pred[0][0], dc_128);
499 INIT_ALL_SIZES(dc_pred[0][1], dc_top);
500 INIT_ALL_SIZES(dc_pred[1][0], dc_left);
501 INIT_ALL_SIZES(dc_pred[1][1], dc);
502
503 INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
504 INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
505 INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth);
506 INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth);
507 INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v);
508 INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h);
509 INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
510 INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
511 INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
512 INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
513 #undef intra_pred_allsizes
514 }
515
516 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)517 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
518 const uint8_t *above, const uint8_t *left,
519 int upsample_above, int dx, int dy) {
520 int r, c, x, base, shift, val;
521
522 (void)left;
523 (void)dy;
524 assert(dy == 1);
525 assert(dx > 0);
526
527 const int max_base_x = ((bw + bh) - 1) << upsample_above;
528 const int frac_bits = 6 - upsample_above;
529 const int base_inc = 1 << upsample_above;
530 x = dx;
531 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
532 base = x >> frac_bits;
533 shift = ((x << upsample_above) & 0x3F) >> 1;
534
535 if (base >= max_base_x) {
536 for (int i = r; i < bh; ++i) {
537 memset(dst, above[max_base_x], bw * sizeof(dst[0]));
538 dst += stride;
539 }
540 return;
541 }
542
543 for (c = 0; c < bw; ++c, base += base_inc) {
544 if (base < max_base_x) {
545 val = above[base] * (32 - shift) + above[base + 1] * shift;
546 dst[c] = ROUND_POWER_OF_TWO(val, 5);
547 } else {
548 dst[c] = above[max_base_x];
549 }
550 }
551 }
552 }
553
554 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)555 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
556 const uint8_t *above, const uint8_t *left,
557 int upsample_above, int upsample_left, int dx,
558 int dy) {
559 assert(dx > 0);
560 assert(dy > 0);
561
562 const int min_base_x = -(1 << upsample_above);
563 const int min_base_y = -(1 << upsample_left);
564 (void)min_base_y;
565 const int frac_bits_x = 6 - upsample_above;
566 const int frac_bits_y = 6 - upsample_left;
567
568 for (int r = 0; r < bh; ++r) {
569 for (int c = 0; c < bw; ++c) {
570 int val;
571 int y = r + 1;
572 int x = (c << 6) - y * dx;
573 const int base_x = x >> frac_bits_x;
574 if (base_x >= min_base_x) {
575 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
576 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
577 val = ROUND_POWER_OF_TWO(val, 5);
578 } else {
579 x = c + 1;
580 y = (r << 6) - x * dy;
581 const int base_y = y >> frac_bits_y;
582 assert(base_y >= min_base_y);
583 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
584 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
585 val = ROUND_POWER_OF_TWO(val, 5);
586 }
587 dst[c] = val;
588 }
589 dst += stride;
590 }
591 }
592
593 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)594 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
595 const uint8_t *above, const uint8_t *left,
596 int upsample_left, int dx, int dy) {
597 int r, c, y, base, shift, val;
598
599 (void)above;
600 (void)dx;
601
602 assert(dx == 1);
603 assert(dy > 0);
604
605 const int max_base_y = (bw + bh - 1) << upsample_left;
606 const int frac_bits = 6 - upsample_left;
607 const int base_inc = 1 << upsample_left;
608 y = dy;
609 for (c = 0; c < bw; ++c, y += dy) {
610 base = y >> frac_bits;
611 shift = ((y << upsample_left) & 0x3F) >> 1;
612
613 for (r = 0; r < bh; ++r, base += base_inc) {
614 if (base < max_base_y) {
615 val = left[base] * (32 - shift) + left[base + 1] * shift;
616 dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
617 } else {
618 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
619 break;
620 }
621 }
622 }
623 }
624
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)625 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
626 const uint8_t *above, const uint8_t *left,
627 int upsample_above, int upsample_left, int angle) {
628 const int dx = av1_get_dx(angle);
629 const int dy = av1_get_dy(angle);
630 const int bw = tx_size_wide[tx_size];
631 const int bh = tx_size_high[tx_size];
632 assert(angle > 0 && angle < 270);
633
634 if (angle > 0 && angle < 90) {
635 av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
636 dy);
637 } else if (angle > 90 && angle < 180) {
638 av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
639 upsample_left, dx, dy);
640 } else if (angle > 180 && angle < 270) {
641 av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
642 dy);
643 } else if (angle == 90) {
644 pred[V_PRED][tx_size](dst, stride, above, left);
645 } else if (angle == 180) {
646 pred[H_PRED][tx_size](dst, stride, above, left);
647 }
648 }
649
650 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)651 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
652 int bh, const uint16_t *above,
653 const uint16_t *left, int upsample_above,
654 int dx, int dy, int bd) {
655 int r, c, x, base, shift, val;
656
657 (void)left;
658 (void)dy;
659 (void)bd;
660 assert(dy == 1);
661 assert(dx > 0);
662
663 const int max_base_x = ((bw + bh) - 1) << upsample_above;
664 const int frac_bits = 6 - upsample_above;
665 const int base_inc = 1 << upsample_above;
666 x = dx;
667 for (r = 0; r < bh; ++r, dst += stride, x += dx) {
668 base = x >> frac_bits;
669 shift = ((x << upsample_above) & 0x3F) >> 1;
670
671 if (base >= max_base_x) {
672 for (int i = r; i < bh; ++i) {
673 aom_memset16(dst, above[max_base_x], bw);
674 dst += stride;
675 }
676 return;
677 }
678
679 for (c = 0; c < bw; ++c, base += base_inc) {
680 if (base < max_base_x) {
681 val = above[base] * (32 - shift) + above[base + 1] * shift;
682 dst[c] = ROUND_POWER_OF_TWO(val, 5);
683 } else {
684 dst[c] = above[max_base_x];
685 }
686 }
687 }
688 }
689
690 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)691 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
692 int bh, const uint16_t *above,
693 const uint16_t *left, int upsample_above,
694 int upsample_left, int dx, int dy, int bd) {
695 (void)bd;
696 assert(dx > 0);
697 assert(dy > 0);
698
699 const int min_base_x = -(1 << upsample_above);
700 const int min_base_y = -(1 << upsample_left);
701 (void)min_base_y;
702 const int frac_bits_x = 6 - upsample_above;
703 const int frac_bits_y = 6 - upsample_left;
704
705 for (int r = 0; r < bh; ++r) {
706 for (int c = 0; c < bw; ++c) {
707 int val;
708 int y = r + 1;
709 int x = (c << 6) - y * dx;
710 const int base_x = x >> frac_bits_x;
711 if (base_x >= min_base_x) {
712 const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
713 val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
714 val = ROUND_POWER_OF_TWO(val, 5);
715 } else {
716 x = c + 1;
717 y = (r << 6) - x * dy;
718 const int base_y = y >> frac_bits_y;
719 assert(base_y >= min_base_y);
720 const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
721 val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
722 val = ROUND_POWER_OF_TWO(val, 5);
723 }
724 dst[c] = val;
725 }
726 dst += stride;
727 }
728 }
729
730 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)731 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
732 int bh, const uint16_t *above,
733 const uint16_t *left, int upsample_left,
734 int dx, int dy, int bd) {
735 int r, c, y, base, shift, val;
736
737 (void)above;
738 (void)dx;
739 (void)bd;
740 assert(dx == 1);
741 assert(dy > 0);
742
743 const int max_base_y = (bw + bh - 1) << upsample_left;
744 const int frac_bits = 6 - upsample_left;
745 const int base_inc = 1 << upsample_left;
746 y = dy;
747 for (c = 0; c < bw; ++c, y += dy) {
748 base = y >> frac_bits;
749 shift = ((y << upsample_left) & 0x3F) >> 1;
750
751 for (r = 0; r < bh; ++r, base += base_inc) {
752 if (base < max_base_y) {
753 val = left[base] * (32 - shift) + left[base + 1] * shift;
754 dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
755 } else {
756 for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
757 break;
758 }
759 }
760 }
761 }
762
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)763 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
764 TX_SIZE tx_size, const uint16_t *above,
765 const uint16_t *left, int upsample_above,
766 int upsample_left, int angle, int bd) {
767 const int dx = av1_get_dx(angle);
768 const int dy = av1_get_dy(angle);
769 const int bw = tx_size_wide[tx_size];
770 const int bh = tx_size_high[tx_size];
771 assert(angle > 0 && angle < 270);
772
773 if (angle > 0 && angle < 90) {
774 av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
775 upsample_above, dx, dy, bd);
776 } else if (angle > 90 && angle < 180) {
777 av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
778 upsample_above, upsample_left, dx, dy, bd);
779 } else if (angle > 180 && angle < 270) {
780 av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
781 dx, dy, bd);
782 } else if (angle == 90) {
783 pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
784 } else if (angle == 180) {
785 pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
786 }
787 }
788
789 DECLARE_ALIGNED(16, const int8_t,
790 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
791 {
792 { -6, 10, 0, 0, 0, 12, 0, 0 },
793 { -5, 2, 10, 0, 0, 9, 0, 0 },
794 { -3, 1, 1, 10, 0, 7, 0, 0 },
795 { -3, 1, 1, 2, 10, 5, 0, 0 },
796 { -4, 6, 0, 0, 0, 2, 12, 0 },
797 { -3, 2, 6, 0, 0, 2, 9, 0 },
798 { -3, 2, 2, 6, 0, 2, 7, 0 },
799 { -3, 1, 2, 2, 6, 3, 5, 0 },
800 },
801 {
802 { -10, 16, 0, 0, 0, 10, 0, 0 },
803 { -6, 0, 16, 0, 0, 6, 0, 0 },
804 { -4, 0, 0, 16, 0, 4, 0, 0 },
805 { -2, 0, 0, 0, 16, 2, 0, 0 },
806 { -10, 16, 0, 0, 0, 0, 10, 0 },
807 { -6, 0, 16, 0, 0, 0, 6, 0 },
808 { -4, 0, 0, 16, 0, 0, 4, 0 },
809 { -2, 0, 0, 0, 16, 0, 2, 0 },
810 },
811 {
812 { -8, 8, 0, 0, 0, 16, 0, 0 },
813 { -8, 0, 8, 0, 0, 16, 0, 0 },
814 { -8, 0, 0, 8, 0, 16, 0, 0 },
815 { -8, 0, 0, 0, 8, 16, 0, 0 },
816 { -4, 4, 0, 0, 0, 0, 16, 0 },
817 { -4, 0, 4, 0, 0, 0, 16, 0 },
818 { -4, 0, 0, 4, 0, 0, 16, 0 },
819 { -4, 0, 0, 0, 4, 0, 16, 0 },
820 },
821 {
822 { -2, 8, 0, 0, 0, 10, 0, 0 },
823 { -1, 3, 8, 0, 0, 6, 0, 0 },
824 { -1, 2, 3, 8, 0, 4, 0, 0 },
825 { 0, 1, 2, 3, 8, 2, 0, 0 },
826 { -1, 4, 0, 0, 0, 3, 10, 0 },
827 { -1, 3, 4, 0, 0, 4, 6, 0 },
828 { -1, 2, 3, 4, 0, 4, 4, 0 },
829 { -1, 2, 2, 3, 4, 3, 3, 0 },
830 },
831 {
832 { -12, 14, 0, 0, 0, 14, 0, 0 },
833 { -10, 0, 14, 0, 0, 12, 0, 0 },
834 { -9, 0, 0, 14, 0, 11, 0, 0 },
835 { -8, 0, 0, 0, 14, 10, 0, 0 },
836 { -10, 12, 0, 0, 0, 0, 14, 0 },
837 { -9, 1, 12, 0, 0, 0, 12, 0 },
838 { -8, 0, 0, 12, 0, 1, 11, 0 },
839 { -7, 0, 0, 1, 12, 1, 9, 0 },
840 },
841 };
842
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)843 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
844 TX_SIZE tx_size, const uint8_t *above,
845 const uint8_t *left, int mode) {
846 int r, c;
847 uint8_t buffer[33][33];
848 const int bw = tx_size_wide[tx_size];
849 const int bh = tx_size_high[tx_size];
850
851 assert(bw <= 32 && bh <= 32);
852
853 // The initialization is just for silencing Jenkins static analysis warnings
854 for (r = 0; r < bh + 1; ++r)
855 memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
856
857 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
858 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
859
860 for (r = 1; r < bh + 1; r += 2)
861 for (c = 1; c < bw + 1; c += 4) {
862 const uint8_t p0 = buffer[r - 1][c - 1];
863 const uint8_t p1 = buffer[r - 1][c];
864 const uint8_t p2 = buffer[r - 1][c + 1];
865 const uint8_t p3 = buffer[r - 1][c + 2];
866 const uint8_t p4 = buffer[r - 1][c + 3];
867 const uint8_t p5 = buffer[r][c - 1];
868 const uint8_t p6 = buffer[r + 1][c - 1];
869 for (int k = 0; k < 8; ++k) {
870 int r_offset = k >> 2;
871 int c_offset = k & 0x03;
872 buffer[r + r_offset][c + c_offset] =
873 clip_pixel(ROUND_POWER_OF_TWO_SIGNED(
874 av1_filter_intra_taps[mode][k][0] * p0 +
875 av1_filter_intra_taps[mode][k][1] * p1 +
876 av1_filter_intra_taps[mode][k][2] * p2 +
877 av1_filter_intra_taps[mode][k][3] * p3 +
878 av1_filter_intra_taps[mode][k][4] * p4 +
879 av1_filter_intra_taps[mode][k][5] * p5 +
880 av1_filter_intra_taps[mode][k][6] * p6,
881 FILTER_INTRA_SCALE_BITS));
882 }
883 }
884
885 for (r = 0; r < bh; ++r) {
886 memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
887 dst += stride;
888 }
889 }
890
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)891 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
892 TX_SIZE tx_size,
893 const uint16_t *above,
894 const uint16_t *left, int mode,
895 int bd) {
896 int r, c;
897 uint16_t buffer[33][33];
898 const int bw = tx_size_wide[tx_size];
899 const int bh = tx_size_high[tx_size];
900
901 assert(bw <= 32 && bh <= 32);
902
903 // The initialization is just for silencing Jenkins static analysis warnings
904 for (r = 0; r < bh + 1; ++r)
905 memset(buffer[r], 0, (bw + 1) * sizeof(buffer[0][0]));
906
907 for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
908 memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
909
910 for (r = 1; r < bh + 1; r += 2)
911 for (c = 1; c < bw + 1; c += 4) {
912 const uint16_t p0 = buffer[r - 1][c - 1];
913 const uint16_t p1 = buffer[r - 1][c];
914 const uint16_t p2 = buffer[r - 1][c + 1];
915 const uint16_t p3 = buffer[r - 1][c + 2];
916 const uint16_t p4 = buffer[r - 1][c + 3];
917 const uint16_t p5 = buffer[r][c - 1];
918 const uint16_t p6 = buffer[r + 1][c - 1];
919 for (int k = 0; k < 8; ++k) {
920 int r_offset = k >> 2;
921 int c_offset = k & 0x03;
922 buffer[r + r_offset][c + c_offset] =
923 clip_pixel_highbd(ROUND_POWER_OF_TWO_SIGNED(
924 av1_filter_intra_taps[mode][k][0] * p0 +
925 av1_filter_intra_taps[mode][k][1] * p1 +
926 av1_filter_intra_taps[mode][k][2] * p2 +
927 av1_filter_intra_taps[mode][k][3] * p3 +
928 av1_filter_intra_taps[mode][k][4] * p4 +
929 av1_filter_intra_taps[mode][k][5] * p5 +
930 av1_filter_intra_taps[mode][k][6] * p6,
931 FILTER_INTRA_SCALE_BITS),
932 bd);
933 }
934 }
935
936 for (r = 0; r < bh; ++r) {
937 memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
938 dst += stride;
939 }
940 }
941
is_smooth(const MB_MODE_INFO * mbmi,int plane)942 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
943 if (plane == 0) {
944 const PREDICTION_MODE mode = mbmi->mode;
945 return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
946 mode == SMOOTH_H_PRED);
947 } else {
948 // uv_mode is not set for inter blocks, so need to explicitly
949 // detect that case.
950 if (is_inter_block(mbmi)) return 0;
951
952 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
953 return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
954 uv_mode == UV_SMOOTH_H_PRED);
955 }
956 }
957
get_filt_type(const MACROBLOCKD * xd,int plane)958 static int get_filt_type(const MACROBLOCKD *xd, int plane) {
959 int ab_sm, le_sm;
960
961 if (plane == 0) {
962 const MB_MODE_INFO *ab = xd->above_mbmi;
963 const MB_MODE_INFO *le = xd->left_mbmi;
964 ab_sm = ab ? is_smooth(ab, plane) : 0;
965 le_sm = le ? is_smooth(le, plane) : 0;
966 } else {
967 const MB_MODE_INFO *ab = xd->chroma_above_mbmi;
968 const MB_MODE_INFO *le = xd->chroma_left_mbmi;
969 ab_sm = ab ? is_smooth(ab, plane) : 0;
970 le_sm = le ? is_smooth(le, plane) : 0;
971 }
972
973 return (ab_sm || le_sm) ? 1 : 0;
974 }
975
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)976 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
977 const int d = abs(delta);
978 int strength = 0;
979
980 const int blk_wh = bs0 + bs1;
981 if (type == 0) {
982 if (blk_wh <= 8) {
983 if (d >= 56) strength = 1;
984 } else if (blk_wh <= 12) {
985 if (d >= 40) strength = 1;
986 } else if (blk_wh <= 16) {
987 if (d >= 40) strength = 1;
988 } else if (blk_wh <= 24) {
989 if (d >= 8) strength = 1;
990 if (d >= 16) strength = 2;
991 if (d >= 32) strength = 3;
992 } else if (blk_wh <= 32) {
993 if (d >= 1) strength = 1;
994 if (d >= 4) strength = 2;
995 if (d >= 32) strength = 3;
996 } else {
997 if (d >= 1) strength = 3;
998 }
999 } else {
1000 if (blk_wh <= 8) {
1001 if (d >= 40) strength = 1;
1002 if (d >= 64) strength = 2;
1003 } else if (blk_wh <= 16) {
1004 if (d >= 20) strength = 1;
1005 if (d >= 48) strength = 2;
1006 } else if (blk_wh <= 24) {
1007 if (d >= 4) strength = 3;
1008 } else {
1009 if (d >= 1) strength = 3;
1010 }
1011 }
1012 return strength;
1013 }
1014
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1015 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1016 if (!strength) return;
1017
1018 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
1019 { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
1020 };
1021 const int filt = strength - 1;
1022 uint8_t edge[129];
1023
1024 memcpy(edge, p, sz * sizeof(*p));
1025 for (int i = 1; i < sz; i++) {
1026 int s = 0;
1027 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1028 int k = i - 2 + j;
1029 k = (k < 0) ? 0 : k;
1030 k = (k > sz - 1) ? sz - 1 : k;
1031 s += edge[k] * kernel[filt][j];
1032 }
1033 s = (s + 8) >> 4;
1034 p[i] = s;
1035 }
1036 }
1037
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1038 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1039 const int kernel[3] = { 5, 6, 5 };
1040
1041 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1042 (p_above[0] * kernel[2]);
1043 s = (s + 8) >> 4;
1044 p_above[-1] = s;
1045 p_left[-1] = s;
1046 }
1047
av1_filter_intra_edge_high_c(uint16_t * p,int sz,int strength)1048 void av1_filter_intra_edge_high_c(uint16_t *p, int sz, int strength) {
1049 if (!strength) return;
1050
1051 const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = {
1052 { 0, 4, 8, 4, 0 }, { 0, 5, 6, 5, 0 }, { 2, 4, 4, 4, 2 }
1053 };
1054 const int filt = strength - 1;
1055 uint16_t edge[129];
1056
1057 memcpy(edge, p, sz * sizeof(*p));
1058 for (int i = 1; i < sz; i++) {
1059 int s = 0;
1060 for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1061 int k = i - 2 + j;
1062 k = (k < 0) ? 0 : k;
1063 k = (k > sz - 1) ? sz - 1 : k;
1064 s += edge[k] * kernel[filt][j];
1065 }
1066 s = (s + 8) >> 4;
1067 p[i] = s;
1068 }
1069 }
1070
filter_intra_edge_corner_high(uint16_t * p_above,uint16_t * p_left)1071 static void filter_intra_edge_corner_high(uint16_t *p_above, uint16_t *p_left) {
1072 const int kernel[3] = { 5, 6, 5 };
1073
1074 int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1075 (p_above[0] * kernel[2]);
1076 s = (s + 8) >> 4;
1077 p_above[-1] = s;
1078 p_left[-1] = s;
1079 }
1080
av1_upsample_intra_edge_c(uint8_t * p,int sz)1081 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1082 // interpolate half-sample positions
1083 assert(sz <= MAX_UPSAMPLE_SZ);
1084
1085 uint8_t in[MAX_UPSAMPLE_SZ + 3];
1086 // copy p[-1..(sz-1)] and extend first and last samples
1087 in[0] = p[-1];
1088 in[1] = p[-1];
1089 for (int i = 0; i < sz; i++) {
1090 in[i + 2] = p[i];
1091 }
1092 in[sz + 2] = p[sz - 1];
1093
1094 // interpolate half-sample edge positions
1095 p[-2] = in[0];
1096 for (int i = 0; i < sz; i++) {
1097 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1098 s = clip_pixel((s + 8) >> 4);
1099 p[2 * i - 1] = s;
1100 p[2 * i] = in[i + 2];
1101 }
1102 }
1103
av1_upsample_intra_edge_high_c(uint16_t * p,int sz,int bd)1104 void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
1105 // interpolate half-sample positions
1106 assert(sz <= MAX_UPSAMPLE_SZ);
1107
1108 uint16_t in[MAX_UPSAMPLE_SZ + 3];
1109 // copy p[-1..(sz-1)] and extend first and last samples
1110 in[0] = p[-1];
1111 in[1] = p[-1];
1112 for (int i = 0; i < sz; i++) {
1113 in[i + 2] = p[i];
1114 }
1115 in[sz + 2] = p[sz - 1];
1116
1117 // interpolate half-sample edge positions
1118 p[-2] = in[0];
1119 for (int i = 0; i < sz; i++) {
1120 int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1121 s = (s + 8) >> 4;
1122 s = clip_pixel_highbd(s, bd);
1123 p[2 * i - 1] = s;
1124 p[2 * i] = in[i + 2];
1125 }
1126 }
1127
build_intra_predictors_high(const MACROBLOCKD * xd,const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1128 static void build_intra_predictors_high(
1129 const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
1130 int dst_stride, PREDICTION_MODE mode, int angle_delta,
1131 FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size,
1132 int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px,
1133 int n_bottomleft_px, int plane) {
1134 int i;
1135 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1136 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
1137 DECLARE_ALIGNED(16, uint16_t, left_data[MAX_TX_SIZE * 2 + 32]);
1138 DECLARE_ALIGNED(16, uint16_t, above_data[MAX_TX_SIZE * 2 + 32]);
1139 uint16_t *const above_row = above_data + 16;
1140 uint16_t *const left_col = left_data + 16;
1141 const int txwpx = tx_size_wide[tx_size];
1142 const int txhpx = tx_size_high[tx_size];
1143 int need_left = extend_modes[mode] & NEED_LEFT;
1144 int need_above = extend_modes[mode] & NEED_ABOVE;
1145 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1146 const uint16_t *above_ref = ref - ref_stride;
1147 const uint16_t *left_ref = ref - 1;
1148 int p_angle = 0;
1149 const int is_dr_mode = av1_is_directional_mode(mode);
1150 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1151 int base = 128 << (xd->bd - 8);
1152
1153 // The default values if ref pixels are not available:
1154 // base-1 base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1155 // base+1 A B .. Y Z
1156 // base+1 C D .. W X
1157 // base+1 E F .. U V
1158 // base+1 G H .. S T T T T T
1159
1160 if (is_dr_mode) {
1161 p_angle = mode_to_angle_map[mode] + angle_delta;
1162 if (p_angle <= 90)
1163 need_above = 1, need_left = 0, need_above_left = 1;
1164 else if (p_angle < 180)
1165 need_above = 1, need_left = 1, need_above_left = 1;
1166 else
1167 need_above = 0, need_left = 1, need_above_left = 1;
1168 }
1169 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1170
1171 assert(n_top_px >= 0);
1172 assert(n_topright_px >= 0);
1173 assert(n_left_px >= 0);
1174 assert(n_bottomleft_px >= 0);
1175
1176 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1177 int val;
1178 if (need_left) {
1179 val = (n_top_px > 0) ? above_ref[0] : base + 1;
1180 } else {
1181 val = (n_left_px > 0) ? left_ref[0] : base - 1;
1182 }
1183 for (i = 0; i < txhpx; ++i) {
1184 aom_memset16(dst, val, txwpx);
1185 dst += dst_stride;
1186 }
1187 return;
1188 }
1189
1190 // NEED_LEFT
1191 if (need_left) {
1192 int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
1193 if (use_filter_intra) need_bottom = 0;
1194 if (is_dr_mode) need_bottom = p_angle > 180;
1195 const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1196 i = 0;
1197 if (n_left_px > 0) {
1198 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1199 if (need_bottom && n_bottomleft_px > 0) {
1200 assert(i == txhpx);
1201 for (; i < txhpx + n_bottomleft_px; i++)
1202 left_col[i] = left_ref[i * ref_stride];
1203 }
1204 if (i < num_left_pixels_needed)
1205 aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1206 } else {
1207 if (n_top_px > 0) {
1208 aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1209 } else {
1210 aom_memset16(left_col, base + 1, num_left_pixels_needed);
1211 }
1212 }
1213 }
1214
1215 // NEED_ABOVE
1216 if (need_above) {
1217 int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
1218 if (use_filter_intra) need_right = 0;
1219 if (is_dr_mode) need_right = p_angle < 90;
1220 const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1221 if (n_top_px > 0) {
1222 memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1223 i = n_top_px;
1224 if (need_right && n_topright_px > 0) {
1225 assert(n_top_px == txwpx);
1226 memcpy(above_row + txwpx, above_ref + txwpx,
1227 n_topright_px * sizeof(above_ref[0]));
1228 i += n_topright_px;
1229 }
1230 if (i < num_top_pixels_needed)
1231 aom_memset16(&above_row[i], above_row[i - 1],
1232 num_top_pixels_needed - i);
1233 } else {
1234 if (n_left_px > 0) {
1235 aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1236 } else {
1237 aom_memset16(above_row, base - 1, num_top_pixels_needed);
1238 }
1239 }
1240 }
1241
1242 if (need_above_left) {
1243 if (n_top_px > 0 && n_left_px > 0) {
1244 above_row[-1] = above_ref[-1];
1245 } else if (n_top_px > 0) {
1246 above_row[-1] = above_ref[0];
1247 } else if (n_left_px > 0) {
1248 above_row[-1] = left_ref[0];
1249 } else {
1250 above_row[-1] = base;
1251 }
1252 left_col[-1] = above_row[-1];
1253 }
1254
1255 if (use_filter_intra) {
1256 highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1257 filter_intra_mode, xd->bd);
1258 return;
1259 }
1260
1261 if (is_dr_mode) {
1262 int upsample_above = 0;
1263 int upsample_left = 0;
1264 if (!disable_edge_filter) {
1265 const int need_right = p_angle < 90;
1266 const int need_bottom = p_angle > 180;
1267 const int filt_type = get_filt_type(xd, plane);
1268 if (p_angle != 90 && p_angle != 180) {
1269 const int ab_le = need_above_left ? 1 : 0;
1270 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1271 filter_intra_edge_corner_high(above_row, left_col);
1272 }
1273 if (need_above && n_top_px > 0) {
1274 const int strength =
1275 intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1276 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1277 av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
1278 }
1279 if (need_left && n_left_px > 0) {
1280 const int strength = intra_edge_filter_strength(
1281 txhpx, txwpx, p_angle - 180, filt_type);
1282 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1283 av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
1284 }
1285 }
1286 upsample_above =
1287 av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1288 if (need_above && upsample_above) {
1289 const int n_px = txwpx + (need_right ? txhpx : 0);
1290 av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
1291 }
1292 upsample_left =
1293 av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1294 if (need_left && upsample_left) {
1295 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1296 av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
1297 }
1298 }
1299 highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1300 upsample_above, upsample_left, p_angle, xd->bd);
1301 return;
1302 }
1303
1304 // predict
1305 if (mode == DC_PRED) {
1306 dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1307 dst, dst_stride, above_row, left_col, xd->bd);
1308 } else {
1309 pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd);
1310 }
1311 }
1312
build_intra_predictors(const MACROBLOCKD * xd,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int angle_delta,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int plane)1313 static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
1314 int ref_stride, uint8_t *dst, int dst_stride,
1315 PREDICTION_MODE mode, int angle_delta,
1316 FILTER_INTRA_MODE filter_intra_mode,
1317 TX_SIZE tx_size, int disable_edge_filter,
1318 int n_top_px, int n_topright_px,
1319 int n_left_px, int n_bottomleft_px,
1320 int plane) {
1321 int i;
1322 const uint8_t *above_ref = ref - ref_stride;
1323 const uint8_t *left_ref = ref - 1;
1324 DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
1325 DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
1326 uint8_t *const above_row = above_data + 16;
1327 uint8_t *const left_col = left_data + 16;
1328 const int txwpx = tx_size_wide[tx_size];
1329 const int txhpx = tx_size_high[tx_size];
1330 int need_left = extend_modes[mode] & NEED_LEFT;
1331 int need_above = extend_modes[mode] & NEED_ABOVE;
1332 int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1333 int p_angle = 0;
1334 const int is_dr_mode = av1_is_directional_mode(mode);
1335 const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1336
1337 // The default values if ref pixels are not available:
1338 // 127 127 127 .. 127 127 127 127 127 127
1339 // 129 A B .. Y Z
1340 // 129 C D .. W X
1341 // 129 E F .. U V
1342 // 129 G H .. S T T T T T
1343 // ..
1344
1345 if (is_dr_mode) {
1346 p_angle = mode_to_angle_map[mode] + angle_delta;
1347 if (p_angle <= 90)
1348 need_above = 1, need_left = 0, need_above_left = 1;
1349 else if (p_angle < 180)
1350 need_above = 1, need_left = 1, need_above_left = 1;
1351 else
1352 need_above = 0, need_left = 1, need_above_left = 1;
1353 }
1354 if (use_filter_intra) need_left = need_above = need_above_left = 1;
1355
1356 assert(n_top_px >= 0);
1357 assert(n_topright_px >= 0);
1358 assert(n_left_px >= 0);
1359 assert(n_bottomleft_px >= 0);
1360
1361 if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1362 int val;
1363 if (need_left) {
1364 val = (n_top_px > 0) ? above_ref[0] : 129;
1365 } else {
1366 val = (n_left_px > 0) ? left_ref[0] : 127;
1367 }
1368 for (i = 0; i < txhpx; ++i) {
1369 memset(dst, val, txwpx);
1370 dst += dst_stride;
1371 }
1372 return;
1373 }
1374
1375 // NEED_LEFT
1376 if (need_left) {
1377 int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
1378 if (use_filter_intra) need_bottom = 0;
1379 if (is_dr_mode) need_bottom = p_angle > 180;
1380 const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
1381 i = 0;
1382 if (n_left_px > 0) {
1383 for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1384 if (need_bottom && n_bottomleft_px > 0) {
1385 assert(i == txhpx);
1386 for (; i < txhpx + n_bottomleft_px; i++)
1387 left_col[i] = left_ref[i * ref_stride];
1388 }
1389 if (i < num_left_pixels_needed)
1390 memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1391 } else {
1392 if (n_top_px > 0) {
1393 memset(left_col, above_ref[0], num_left_pixels_needed);
1394 } else {
1395 memset(left_col, 129, num_left_pixels_needed);
1396 }
1397 }
1398 }
1399
1400 // NEED_ABOVE
1401 if (need_above) {
1402 int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
1403 if (use_filter_intra) need_right = 0;
1404 if (is_dr_mode) need_right = p_angle < 90;
1405 const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
1406 if (n_top_px > 0) {
1407 memcpy(above_row, above_ref, n_top_px);
1408 i = n_top_px;
1409 if (need_right && n_topright_px > 0) {
1410 assert(n_top_px == txwpx);
1411 memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1412 i += n_topright_px;
1413 }
1414 if (i < num_top_pixels_needed)
1415 memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1416 } else {
1417 if (n_left_px > 0) {
1418 memset(above_row, left_ref[0], num_top_pixels_needed);
1419 } else {
1420 memset(above_row, 127, num_top_pixels_needed);
1421 }
1422 }
1423 }
1424
1425 if (need_above_left) {
1426 if (n_top_px > 0 && n_left_px > 0) {
1427 above_row[-1] = above_ref[-1];
1428 } else if (n_top_px > 0) {
1429 above_row[-1] = above_ref[0];
1430 } else if (n_left_px > 0) {
1431 above_row[-1] = left_ref[0];
1432 } else {
1433 above_row[-1] = 128;
1434 }
1435 left_col[-1] = above_row[-1];
1436 }
1437
1438 if (use_filter_intra) {
1439 av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1440 filter_intra_mode);
1441 return;
1442 }
1443
1444 if (is_dr_mode) {
1445 int upsample_above = 0;
1446 int upsample_left = 0;
1447 if (!disable_edge_filter) {
1448 const int need_right = p_angle < 90;
1449 const int need_bottom = p_angle > 180;
1450 const int filt_type = get_filt_type(xd, plane);
1451 if (p_angle != 90 && p_angle != 180) {
1452 const int ab_le = need_above_left ? 1 : 0;
1453 if (need_above && need_left && (txwpx + txhpx >= 24)) {
1454 filter_intra_edge_corner(above_row, left_col);
1455 }
1456 if (need_above && n_top_px > 0) {
1457 const int strength =
1458 intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
1459 const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1460 av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1461 }
1462 if (need_left && n_left_px > 0) {
1463 const int strength = intra_edge_filter_strength(
1464 txhpx, txwpx, p_angle - 180, filt_type);
1465 const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1466 av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1467 }
1468 }
1469 upsample_above =
1470 av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
1471 if (need_above && upsample_above) {
1472 const int n_px = txwpx + (need_right ? txhpx : 0);
1473 av1_upsample_intra_edge(above_row, n_px);
1474 }
1475 upsample_left =
1476 av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
1477 if (need_left && upsample_left) {
1478 const int n_px = txhpx + (need_bottom ? txwpx : 0);
1479 av1_upsample_intra_edge(left_col, n_px);
1480 }
1481 }
1482 dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1483 upsample_left, p_angle);
1484 return;
1485 }
1486
1487 // predict
1488 if (mode == DC_PRED) {
1489 dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1490 left_col);
1491 } else {
1492 pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1493 }
1494 }
1495
av1_predict_intra_block(const AV1_COMMON * cm,const MACROBLOCKD * xd,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1496 void av1_predict_intra_block(
1497 const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
1498 TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
1499 FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
1500 uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) {
1501 const MB_MODE_INFO *const mbmi = xd->mi[0];
1502 const int txwpx = tx_size_wide[tx_size];
1503 const int txhpx = tx_size_high[tx_size];
1504 const int x = col_off << tx_size_wide_log2[0];
1505 const int y = row_off << tx_size_high_log2[0];
1506
1507 if (use_palette) {
1508 int r, c;
1509 const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1510 xd->color_index_map_offset[plane != 0];
1511 const uint16_t *const palette =
1512 mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1513 if (is_cur_buf_hbd(xd)) {
1514 uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1515 for (r = 0; r < txhpx; ++r) {
1516 for (c = 0; c < txwpx; ++c) {
1517 dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1518 }
1519 }
1520 } else {
1521 for (r = 0; r < txhpx; ++r) {
1522 for (c = 0; c < txwpx; ++c) {
1523 dst[r * dst_stride + c] =
1524 (uint8_t)palette[map[(r + y) * wpx + c + x]];
1525 }
1526 }
1527 }
1528 return;
1529 }
1530
1531 BLOCK_SIZE bsize = mbmi->sb_type;
1532 const struct macroblockd_plane *const pd = &xd->plane[plane];
1533 const int txw = tx_size_wide_unit[tx_size];
1534 const int txh = tx_size_high_unit[tx_size];
1535 const int have_top = row_off || (pd->subsampling_y ? xd->chroma_up_available
1536 : xd->up_available);
1537 const int have_left =
1538 col_off ||
1539 (pd->subsampling_x ? xd->chroma_left_available : xd->left_available);
1540 const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1541 const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1542 const int xr_chr_offset = 0;
1543 const int yd_chr_offset = 0;
1544
1545 // Distance between the right edge of this prediction block to
1546 // the frame right edge
1547 const int xr = (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) +
1548 (wpx - x - txwpx) - xr_chr_offset;
1549 // Distance between the bottom edge of this prediction block to
1550 // the frame bottom edge
1551 const int yd = (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) +
1552 (hpx - y - txhpx) - yd_chr_offset;
1553 const int right_available =
1554 mi_col + ((col_off + txw) << pd->subsampling_x) < xd->tile.mi_col_end;
1555 const int bottom_available =
1556 (yd > 0) &&
1557 (mi_row + ((row_off + txh) << pd->subsampling_y) < xd->tile.mi_row_end);
1558
1559 const PARTITION_TYPE partition = mbmi->partition;
1560
1561 // force 4x4 chroma component block size.
1562 bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
1563
1564 const int have_top_right = has_top_right(
1565 cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size,
1566 row_off, col_off, pd->subsampling_x, pd->subsampling_y);
1567 const int have_bottom_left = has_bottom_left(
1568 cm, bsize, mi_row, mi_col, bottom_available, have_left, partition,
1569 tx_size, row_off, col_off, pd->subsampling_x, pd->subsampling_y);
1570
1571 const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
1572 if (is_cur_buf_hbd(xd)) {
1573 build_intra_predictors_high(
1574 xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
1575 filter_intra_mode, tx_size, disable_edge_filter,
1576 have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1577 have_top_right ? AOMMIN(txwpx, xr) : 0,
1578 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1579 have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1580 return;
1581 }
1582
1583 build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
1584 angle_delta, filter_intra_mode, tx_size,
1585 disable_edge_filter,
1586 have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
1587 have_top_right ? AOMMIN(txwpx, xr) : 0,
1588 have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
1589 have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
1590 }
1591
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1592 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1593 int plane, int blk_col, int blk_row,
1594 TX_SIZE tx_size) {
1595 const MB_MODE_INFO *const mbmi = xd->mi[0];
1596 struct macroblockd_plane *const pd = &xd->plane[plane];
1597 const int dst_stride = pd->dst.stride;
1598 uint8_t *dst =
1599 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
1600 const PREDICTION_MODE mode =
1601 (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1602 const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1603 const FILTER_INTRA_MODE filter_intra_mode =
1604 (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1605 ? mbmi->filter_intra_mode_info.filter_intra_mode
1606 : FILTER_INTRA_MODES;
1607 const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1608
1609 if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1610 #if CONFIG_DEBUG
1611 assert(is_cfl_allowed(xd));
1612 const BLOCK_SIZE plane_bsize = get_plane_block_size(
1613 mbmi->sb_type, pd->subsampling_x, pd->subsampling_y);
1614 (void)plane_bsize;
1615 assert(plane_bsize < BLOCK_SIZES_ALL);
1616 if (!xd->lossless[mbmi->segment_id]) {
1617 assert(blk_col == 0);
1618 assert(blk_row == 0);
1619 assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1620 assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1621 }
1622 #endif
1623 CFL_CTX *const cfl = &xd->cfl;
1624 CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1625 if (cfl->dc_pred_is_cached[pred_plane] == 0) {
1626 av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1627 angle_delta, use_palette, filter_intra_mode, dst,
1628 dst_stride, dst, dst_stride, blk_col, blk_row,
1629 plane);
1630 if (cfl->use_dc_pred_cache) {
1631 cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1632 cfl->dc_pred_is_cached[pred_plane] = 1;
1633 }
1634 } else {
1635 cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1636 }
1637 cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1638 return;
1639 }
1640 av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
1641 angle_delta, use_palette, filter_intra_mode, dst,
1642 dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1643 }
1644
av1_init_intra_predictors(void)1645 void av1_init_intra_predictors(void) {
1646 aom_once(init_intra_predictors_internal);
1647 }
1648