1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/mem.h"
20 #include "av1/common/av1_loopfilter.h"
21 #include "av1/common/onyxc_int.h"
22 #include "av1/common/reconinter.h"
23 #include "av1/common/seg_common.h"
24
25 static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
26 { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
27 { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
28 { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
29 };
30
31 static const int delta_lf_id_lut[MAX_MB_PLANE][2] = {
32 { 0, 1 }, { 2, 2 }, { 3, 3 }
33 };
34
35 enum { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } UENUM1BYTE(EDGE_DIR);
36
37 static const int mode_lf_lut[] = {
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
39 1, 1, 0, 1, // INTER_MODES (GLOBALMV == 0)
40 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0)
41 };
42
43 // 256 bit masks (64x64 / 4x4) for left transform size for Y plane.
44 // We use 4 uint64_t to represent the 256 bit.
45 // Each 1 represents a position where we should apply a loop filter
46 // across the left border of an 4x4 block boundary.
47 //
48 // In the case of TX_8x8-> ( in low order byte first we end up with
49 // a mask that looks like this (-- and | are used for better view)
50 //
51 // 10101010|10101010
52 // 10101010|10101010
53 // 10101010|10101010
54 // 10101010|10101010
55 // 10101010|10101010
56 // 10101010|10101010
57 // 10101010|10101010
58 // 10101010|10101010
59 // -----------------
60 // 10101010|10101010
61 // 10101010|10101010
62 // 10101010|10101010
63 // 10101010|10101010
64 // 10101010|10101010
65 // 10101010|10101010
66 // 10101010|10101010
67 // 10101010|10101010
68 //
69 // A loopfilter should be applied to every other 4x4 horizontally.
70
71 // 256 bit masks (64x64 / 4x4) for above transform size for Y plane.
72 // We use 4 uint64_t to represent the 256 bit.
73 // Each 1 represents a position where we should apply a loop filter
74 // across the top border of an 4x4 block boundary.
75 //
76 // In the case of TX_8x8-> ( in low order byte first we end up with
77 // a mask that looks like this
78 //
79 // 11111111|11111111
80 // 00000000|00000000
81 // 11111111|11111111
82 // 00000000|00000000
83 // 11111111|11111111
84 // 00000000|00000000
85 // 11111111|11111111
86 // 00000000|00000000
87 // -----------------
88 // 11111111|11111111
89 // 00000000|00000000
90 // 11111111|11111111
91 // 00000000|00000000
92 // 11111111|11111111
93 // 00000000|00000000
94 // 11111111|11111111
95 // 00000000|00000000
96 //
97 // A loopfilter should be applied to every other 4x4 horizontally.
98
99 const int mask_id_table_tx_4x4[BLOCK_SIZES_ALL] = {
100 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1, 13, 14, 15, 16, 17, 18
101 };
102
103 const int mask_id_table_tx_8x8[BLOCK_SIZES_ALL] = {
104 -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, 10, 11, 12, 13
105 };
106
107 const int mask_id_table_tx_16x16[BLOCK_SIZES_ALL] = {
108 -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, -1, -1, -1, -1, -1, -1, -1, 7, 8
109 };
110
111 const int mask_id_table_tx_32x32[BLOCK_SIZES_ALL] = { -1, -1, -1, -1, -1, -1,
112 -1, -1, -1, 0, 1, 2,
113 3, -1, -1, -1, -1, -1,
114 -1, -1, -1, -1 };
115 const int mask_id_table_vert_border[BLOCK_SIZES_ALL] = { 0, 47, 49, 19, 51, 53,
116 33, 55, 57, 42, 59, 60,
117 46, -1, -1, -1, 61, 62,
118 63, 64, 65, 66 };
119
120 const FilterMask left_mask_univariant_reordered[67] = {
121 // TX_4X4
122 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
123 0x0000000000000000ULL } }, // block size 4X4, TX_4X4
124 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
125 0x0000000000000000ULL } }, // block size 4X8, TX_4X4
126 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
127 0x0000000000000000ULL } }, // block size 8X4, TX_4X4
128 { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
129 0x0000000000000000ULL } }, // block size 8X8, TX_4X4
130 { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
131 0x0000000000000000ULL } }, // block size 8X16, TX_4X4
132 { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
133 0x0000000000000000ULL } }, // block size 16X8, TX_4X4
134 { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
135 0x0000000000000000ULL } }, // block size 16X16, TX_4X4
136 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
137 0x0000000000000000ULL } }, // block size 16X32, TX_4X4
138 { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
139 0x0000000000000000ULL } }, // block size 32X16, TX_4X4
140 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
141 0x0000000000000000ULL } }, // block size 32X32, TX_4X4
142 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
143 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4
144 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
145 0x0000000000000000ULL } }, // block size 64X32, TX_4X4
146 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
147 0xffffffffffffffffULL } }, // block size 64X64, TX_4X4
148 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
149 0x0000000000000000ULL } }, // block size 4X16, TX_4X4
150 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
151 0x0000000000000000ULL } }, // block size 16X4, TX_4X4
152 { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
153 0x0000000000000000ULL } }, // block size 8X32, TX_4X4
154 { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
155 0x0000000000000000ULL } }, // block size 32X8, TX_4X4
156 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
157 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4
158 { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
159 0x0000000000000000ULL } }, // block size 64X16, TX_4X4
160 // TX_8X8
161 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
162 0x0000000000000000ULL } }, // block size 8X8, TX_8X8
163 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
164 0x0000000000000000ULL } }, // block size 8X16, TX_8X8
165 { { 0x0000000000050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
166 0x0000000000000000ULL } }, // block size 16X8, TX_8X8
167 { { 0x0005000500050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
168 0x0000000000000000ULL } }, // block size 16X16, TX_8X8
169 { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0000000000000000ULL,
170 0x0000000000000000ULL } }, // block size 16X32, TX_8X8
171 { { 0x0055005500550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
172 0x0000000000000000ULL } }, // block size 32X16, TX_8X8
173 { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0000000000000000ULL,
174 0x0000000000000000ULL } }, // block size 32X32, TX_8X8
175 { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0055005500550055ULL,
176 0x0055005500550055ULL } }, // block size 32X64, TX_8X8
177 { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x0000000000000000ULL,
178 0x0000000000000000ULL } }, // block size 64X32, TX_8X8
179 { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x5555555555555555ULL,
180 0x5555555555555555ULL } }, // block size 64X64, TX_8X8
181 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
182 0x0000000000000000ULL } }, // block size 8X32, TX_8X8
183 { { 0x0000000000550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
184 0x0000000000000000ULL } }, // block size 32X8, TX_8X8
185 { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0005000500050005ULL,
186 0x0005000500050005ULL } }, // block size 16X64, TX_8X8
187 { { 0x5555555555555555ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
188 0x0000000000000000ULL } }, // block size 64X16, TX_8X8
189 // TX_16X16
190 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
191 0x0000000000000000ULL } }, // block size 16X16, TX_16X16
192 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
193 0x0000000000000000ULL } }, // block size 16X32, TX_16X16
194 { { 0x0011001100110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
195 0x0000000000000000ULL } }, // block size 32X16, TX_16X16
196 { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0000000000000000ULL,
197 0x0000000000000000ULL } }, // block size 32X32, TX_16X16
198 { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0011001100110011ULL,
199 0x0011001100110011ULL } }, // block size 32X64, TX_16X16
200 { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x0000000000000000ULL,
201 0x0000000000000000ULL } }, // block size 64X32, TX_16X16
202 { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x1111111111111111ULL,
203 0x1111111111111111ULL } }, // block size 64X64, TX_16X16
204 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
205 0x0001000100010001ULL } }, // block size 16X64, TX_16X16
206 { { 0x1111111111111111ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
207 0x0000000000000000ULL } }, // block size 64X16, TX_16X16
208 // TX_32X32
209 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
210 0x0000000000000000ULL } }, // block size 32X32, TX_32X32
211 { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
212 0x0101010101010101ULL } }, // block size 32X64, TX_32X32
213 { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0000000000000000ULL,
214 0x0000000000000000ULL } }, // block size 64X32, TX_32X32
215 { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
216 0x0101010101010101ULL } }, // block size 64X64, TX_32X32
217 // TX_64X64
218 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
219 0x0001000100010001ULL } }, // block size 64X64, TX_64X64
220 // 2:1, 1:2 transform sizes.
221 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
222 0x0000000000000000ULL } }, // block size 4X8, TX_4X8
223 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
224 0x0000000000000000ULL } }, // block size 4X16, TX_4X8
225 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
226 0x0000000000000000ULL } }, // block size 8X4, TX_8X4
227 { { 0x0000000000000005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
228 0x0000000000000000ULL } }, // block size 16X4, TX_8X4
229 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
230 0x0000000000000000ULL } }, // block size 8X16, TX_8X16
231 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
232 0x0000000000000000ULL } }, // block size 8X32, TX_8X16
233 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
234 0x0000000000000000ULL } }, // block size 16X8, TX_16X8
235 { { 0x0000000000110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
236 0x0000000000000000ULL } }, // block size 32X8, TX_16X8
237 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
238 0x0000000000000000ULL } }, // block size 16X32, TX_16X32
239 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
240 0x0001000100010001ULL } }, // block size 16X64, TX_16X32
241 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
242 0x0000000000000000ULL } }, // block size 32X16, TX_32X16
243 { { 0x0101010101010101ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
244 0x0000000000000000ULL } }, // block size 64X16, TX_32X16
245 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
246 0x0001000100010001ULL } }, // block size 32X64, TX_32X64
247 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
248 0x0000000000000000ULL } }, // block size 64X32, TX_64X32
249 // 4:1, 1:4 transform sizes.
250 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
251 0x0000000000000000ULL } }, // block size 4X16, TX_4X16
252 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
253 0x0000000000000000ULL } }, // block size 16X4, TX_16X4
254 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
255 0x0000000000000000ULL } }, // block size 8X32, TX_8X32
256 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
257 0x0000000000000000ULL } }, // block size 32X8, TX_32X8
258 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
259 0x0001000100010001ULL } }, // block size 16X64, TX_16X64
260 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
261 0x0000000000000000ULL } }, // block size 64X16, TX_64X16
262 };
263
264 const FilterMask above_mask_univariant_reordered[67] = {
265 // TX_4X4
266 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
267 0x0000000000000000ULL } }, // block size 4X4, TX_4X4
268 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
269 0x0000000000000000ULL } }, // block size 4X8, TX_4X4
270 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
271 0x0000000000000000ULL } }, // block size 8X4, TX_4X4
272 { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
273 0x0000000000000000ULL } }, // block size 8X8, TX_4X4
274 { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
275 0x0000000000000000ULL } }, // block size 8X16, TX_4X4
276 { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
277 0x0000000000000000ULL } }, // block size 16X8, TX_4X4
278 { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
279 0x0000000000000000ULL } }, // block size 16X16, TX_4X4
280 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
281 0x0000000000000000ULL } }, // block size 16X32, TX_4X4
282 { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
283 0x0000000000000000ULL } }, // block size 32X16, TX_4X4
284 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
285 0x0000000000000000ULL } }, // block size 32X32, TX_4X4
286 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
287 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4
288 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
289 0x0000000000000000ULL } }, // block size 64X32, TX_4X4
290 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
291 0xffffffffffffffffULL } }, // block size 64X64, TX_4x4
292 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
293 0x0000000000000000ULL } }, // block size 4X16, TX_4X4
294 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
295 0x0000000000000000ULL } }, // block size 16X4, TX_4X4
296 { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
297 0x0000000000000000ULL } }, // block size 8X32, TX_4X4
298 { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
299 0x0000000000000000ULL } }, // block size 32X8, TX_4X4
300 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
301 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4
302 { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
303 0x0000000000000000ULL } }, // block size 64X16, TX_4X4
304 // TX_8X8
305 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
306 0x0000000000000000ULL } }, // block size 8X8, TX_8X8
307 { { 0x0000000300000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
308 0x0000000000000000ULL } }, // block size 8X16, TX_8X8
309 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
310 0x0000000000000000ULL } }, // block size 16X8, TX_8X8
311 { { 0x0000000f0000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
312 0x0000000000000000ULL } }, // block size 16X16, TX_8X8
313 { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000000000000ULL,
314 0x0000000000000000ULL } }, // block size 16X32, TX_8X8
315 { { 0x000000ff000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
316 0x0000000000000000ULL } }, // block size 32X16, TX_8X8
317 { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x0000000000000000ULL,
318 0x0000000000000000ULL } }, // block size 32X32, TX_8X8
319 { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x000000ff000000ffULL,
320 0x000000ff000000ffULL } }, // block size 32X64, TX_8X8
321 { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000000000000000ULL,
322 0x0000000000000000ULL } }, // block size 64X32, TX_8X8
323 { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
324 0x0000ffff0000ffffULL } }, // block size 64X64, TX_8X8
325 { { 0x0000000300000003ULL, 0x0000000300000003ULL, 0x0000000000000000ULL,
326 0x0000000000000000ULL } }, // block size 8X32, TX_8X8
327 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
328 0x0000000000000000ULL } }, // block size 32X8, TX_8X8
329 { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000f0000000fULL,
330 0x0000000f0000000fULL } }, // block size 16X64, TX_8X8
331 { { 0x0000ffff0000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
332 0x0000000000000000ULL } }, // block size 64X16, TX_8X8
333 // TX_16X16
334 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
335 0x0000000000000000ULL } }, // block size 16X16, TX_16X16
336 { { 0x000000000000000fULL, 0x000000000000000fULL, 0x0000000000000000ULL,
337 0x0000000000000000ULL } }, // block size 16X32, TX_16X16
338 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
339 0x0000000000000000ULL } }, // block size 32X16, TX_16X16
340 { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x0000000000000000ULL,
341 0x0000000000000000ULL } }, // block size 32X32, TX_16X16
342 { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x00000000000000ffULL,
343 0x00000000000000ffULL } }, // block size 32X64, TX_16X16
344 { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x0000000000000000ULL,
345 0x0000000000000000ULL } }, // block size 64X32, TX_16X16
346 { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x000000000000ffffULL,
347 0x000000000000ffffULL } }, // block size 64X64, TX_16X16
348 { { 0x000000000000000fULL, 0x000000000000000fULL, 0x000000000000000fULL,
349 0x000000000000000fULL } }, // block size 16X64, TX_16X16
350 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
351 0x0000000000000000ULL } }, // block size 64X16, TX_16X16
352 // TX_32X32
353 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
354 0x0000000000000000ULL } }, // block size 32X32, TX_32X32
355 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x00000000000000ffULL,
356 0x0000000000000000ULL } }, // block size 32X64, TX_32X32
357 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
358 0x0000000000000000ULL } }, // block size 64X32, TX_32X32
359 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x000000000000ffffULL,
360 0x0000000000000000ULL } }, // block size 64X64, TX_32X32
361 // TX_64X64
362 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
363 0x0000000000000000ULL } }, // block size 64X64, TX_64X64
364 // 2:1, 1:2 transform sizes.
365 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
366 0x0000000000000000ULL } }, // block size 4X8, TX_4X8
367 { { 0x0000000100000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
368 0x0000000000000000ULL } }, // block size 4X16, TX_4X8
369 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
370 0x0000000000000000ULL } }, // block size 8X4, TX_8X4
371 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
372 0x0000000000000000ULL } }, // block size 16X4, TX_8X4
373 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
374 0x0000000000000000ULL } }, // block size 8X16, TX_8X16
375 { { 0x0000000000000003ULL, 0x0000000000000003ULL, 0x0000000000000000ULL,
376 0x0000000000000000ULL } }, // block size 8X32, TX_8X16
377 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
378 0x0000000000000000ULL } }, // block size 16X8, TX_16X8
379 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
380 0x0000000000000000ULL } }, // block size 32X8, TX_16X8
381 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
382 0x0000000000000000ULL } }, // block size 16X32, TX_16X32
383 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x000000000000000fULL,
384 0x0000000000000000ULL } }, // block size 16X64, TX_16X32
385 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
386 0x0000000000000000ULL } }, // block size 32X16, TX_32X16
387 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
388 0x0000000000000000ULL } }, // block size 64X16, TX_32X16
389 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
390 0x0000000000000000ULL } }, // block size 32X64, TX_32X64
391 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
392 0x0000000000000000ULL } }, // block size 64X32, TX_64X32
393 // 4:1, 1:4 transform sizes.
394 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
395 0x0000000000000000ULL } }, // block size 4X16, TX_4X16
396 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
397 0x0000000000000000ULL } }, // block size 16X4, TX_16X4
398 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
399 0x0000000000000000ULL } }, // block size 8X32, TX_8X32
400 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
401 0x0000000000000000ULL } }, // block size 32X8, TX_32X8
402 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
403 0x0000000000000000ULL } }, // block size 16X64, TX_16X64
404 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
405 0x0000000000000000ULL } }, // block size 64X16, TX_64X16
406 };
407
408 #if LOOP_FILTER_BITMASK
get_loop_filter_mask(const AV1_COMMON * const cm,int mi_row,int mi_col)409 LoopFilterMask *get_loop_filter_mask(const AV1_COMMON *const cm, int mi_row,
410 int mi_col) {
411 assert(cm->lf.lfm != NULL);
412 const int row = mi_row >> MIN_MIB_SIZE_LOG2; // 64x64
413 const int col = mi_col >> MIN_MIB_SIZE_LOG2;
414 return &cm->lf.lfm[row * cm->lf.lfm_stride + col];
415 }
416
417 typedef void (*LpfFunc)(uint8_t *s, int p, const uint8_t *blimit,
418 const uint8_t *limit, const uint8_t *thresh);
419
420 typedef void (*LpfDualFunc)(uint8_t *s, int p, const uint8_t *blimit0,
421 const uint8_t *limit0, const uint8_t *thresh0,
422 const uint8_t *blimit1, const uint8_t *limit1,
423 const uint8_t *thresh1);
424
425 typedef void (*HbdLpfFunc)(uint16_t *s, int p, const uint8_t *blimit,
426 const uint8_t *limit, const uint8_t *thresh, int bd);
427
428 typedef void (*HbdLpfDualFunc)(uint16_t *s, int p, const uint8_t *blimit0,
429 const uint8_t *limit0, const uint8_t *thresh0,
430 const uint8_t *blimit1, const uint8_t *limit1,
431 const uint8_t *thresh1, int bd);
432 #endif // LOOP_FILTER_BITMASK
433
update_sharpness(loop_filter_info_n * lfi,int sharpness_lvl)434 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
435 int lvl;
436
437 // For each possible value for the loop filter fill out limits
438 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
439 // Set loop filter parameters that control sharpness.
440 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
441
442 if (sharpness_lvl > 0) {
443 if (block_inside_limit > (9 - sharpness_lvl))
444 block_inside_limit = (9 - sharpness_lvl);
445 }
446
447 if (block_inside_limit < 1) block_inside_limit = 1;
448
449 memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
450 memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
451 SIMD_WIDTH);
452 }
453 }
454
get_filter_level(const AV1_COMMON * cm,const loop_filter_info_n * lfi_n,const int dir_idx,int plane,const MB_MODE_INFO * mbmi)455 uint8_t get_filter_level(const AV1_COMMON *cm, const loop_filter_info_n *lfi_n,
456 const int dir_idx, int plane,
457 const MB_MODE_INFO *mbmi) {
458 const int segment_id = mbmi->segment_id;
459 if (cm->delta_q_info.delta_lf_present_flag) {
460 int delta_lf;
461 if (cm->delta_q_info.delta_lf_multi) {
462 const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
463 delta_lf = mbmi->delta_lf[delta_lf_idx];
464 } else {
465 delta_lf = mbmi->delta_lf_from_base;
466 }
467 int base_level;
468 if (plane == 0)
469 base_level = cm->lf.filter_level[dir_idx];
470 else if (plane == 1)
471 base_level = cm->lf.filter_level_u;
472 else
473 base_level = cm->lf.filter_level_v;
474 int lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER);
475 assert(plane >= 0 && plane <= 2);
476 const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
477 if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
478 const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
479 lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
480 }
481
482 if (cm->lf.mode_ref_delta_enabled) {
483 const int scale = 1 << (lvl_seg >> 5);
484 lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
485 if (mbmi->ref_frame[0] > INTRA_FRAME)
486 lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale;
487 lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER);
488 }
489 return lvl_seg;
490 } else {
491 return lfi_n->lvl[plane][segment_id][dir_idx][mbmi->ref_frame[0]]
492 [mode_lf_lut[mbmi->mode]];
493 }
494 }
495
av1_loop_filter_init(AV1_COMMON * cm)496 void av1_loop_filter_init(AV1_COMMON *cm) {
497 assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut));
498 loop_filter_info_n *lfi = &cm->lf_info;
499 struct loopfilter *lf = &cm->lf;
500 int lvl;
501
502 lf->combine_vert_horz_lf = 1;
503
504 // init limits for given sharpness
505 update_sharpness(lfi, lf->sharpness_level);
506
507 // init hev threshold const vectors
508 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
509 memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
510 }
511
512 // Update the loop filter for the current frame.
513 // This should be called before loop_filter_rows(),
514 // av1_loop_filter_frame() calls this function directly.
av1_loop_filter_frame_init(AV1_COMMON * cm,int plane_start,int plane_end)515 void av1_loop_filter_frame_init(AV1_COMMON *cm, int plane_start,
516 int plane_end) {
517 int filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE];
518 int plane;
519 int seg_id;
520 // n_shift is the multiplier for lf_deltas
521 // the multiplier is 1 for when filter_lvl is between 0 and 31;
522 // 2 when filter_lvl is between 32 and 63
523 loop_filter_info_n *const lfi = &cm->lf_info;
524 struct loopfilter *const lf = &cm->lf;
525 const struct segmentation *const seg = &cm->seg;
526
527 // update sharpness limits
528 update_sharpness(lfi, lf->sharpness_level);
529
530 filt_lvl[0] = cm->lf.filter_level[0];
531 filt_lvl[1] = cm->lf.filter_level_u;
532 filt_lvl[2] = cm->lf.filter_level_v;
533
534 filt_lvl_r[0] = cm->lf.filter_level[1];
535 filt_lvl_r[1] = cm->lf.filter_level_u;
536 filt_lvl_r[2] = cm->lf.filter_level_v;
537
538 assert(plane_start >= AOM_PLANE_Y);
539 assert(plane_end <= MAX_MB_PLANE);
540
541 for (plane = plane_start; plane < plane_end; plane++) {
542 if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0])
543 break;
544 else if (plane == 1 && !filt_lvl[1])
545 continue;
546 else if (plane == 2 && !filt_lvl[2])
547 continue;
548
549 for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
550 for (int dir = 0; dir < 2; ++dir) {
551 int lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane];
552 const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
553 if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
554 const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
555 lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
556 }
557
558 if (!lf->mode_ref_delta_enabled) {
559 // we could get rid of this if we assume that deltas are set to
560 // zero when not in use; encoder always uses deltas
561 memset(lfi->lvl[plane][seg_id][dir], lvl_seg,
562 sizeof(lfi->lvl[plane][seg_id][dir]));
563 } else {
564 int ref, mode;
565 const int scale = 1 << (lvl_seg >> 5);
566 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
567 lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] =
568 clamp(intra_lvl, 0, MAX_LOOP_FILTER);
569
570 for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) {
571 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
572 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
573 lf->mode_deltas[mode] * scale;
574 lfi->lvl[plane][seg_id][dir][ref][mode] =
575 clamp(inter_lvl, 0, MAX_LOOP_FILTER);
576 }
577 }
578 }
579 }
580 }
581 }
582 }
583
584 #if LOOP_FILTER_BITMASK
585 // A 64x64 tx block requires 256 bits to represent each 4x4 tx block.
586 // Every 4 rows is represented by one uint64_t mask. Hence,
587 // there are 4 uint64_t bitmask[4] to represent the 64x64 block.
588 //
589 // Given a location by (mi_col, mi_row), This function returns the index
590 // 0, 1, 2, 3 to select which bitmask[] to use, and the shift value.
591 //
592 // For example, mi_row is the offset of pixels in mi size (4),
593 // (mi_row / 4) returns which uint64_t.
594 // After locating which uint64_t, mi_row % 4 is the
595 // row offset, and each row has 16 = 1 << stride_log2 4x4 units.
596 // Therefore, shift = (row << stride_log2) + mi_col;
get_index_shift(int mi_col,int mi_row,int * index)597 int get_index_shift(int mi_col, int mi_row, int *index) {
598 // *index = mi_row >> 2;
599 // rows = mi_row % 4;
600 // stride_log2 = 4;
601 // shift = (rows << stride_log2) + mi_col;
602 *index = mi_row >> 2;
603 return ((mi_row & 3) << 4) | mi_col;
604 }
605
check_mask(const FilterMask * lfm)606 static void check_mask(const FilterMask *lfm) {
607 #ifndef NDEBUG
608 for (int i = 0; i < 4; ++i) {
609 assert(!(lfm[TX_4X4].bits[i] & lfm[TX_8X8].bits[i]));
610 assert(!(lfm[TX_4X4].bits[i] & lfm[TX_16X16].bits[i]));
611 assert(!(lfm[TX_4X4].bits[i] & lfm[TX_32X32].bits[i]));
612 assert(!(lfm[TX_4X4].bits[i] & lfm[TX_64X64].bits[i]));
613 assert(!(lfm[TX_8X8].bits[i] & lfm[TX_16X16].bits[i]));
614 assert(!(lfm[TX_8X8].bits[i] & lfm[TX_32X32].bits[i]));
615 assert(!(lfm[TX_8X8].bits[i] & lfm[TX_64X64].bits[i]));
616 assert(!(lfm[TX_16X16].bits[i] & lfm[TX_32X32].bits[i]));
617 assert(!(lfm[TX_16X16].bits[i] & lfm[TX_64X64].bits[i]));
618 assert(!(lfm[TX_32X32].bits[i] & lfm[TX_64X64].bits[i]));
619 }
620 #else
621 (void)lfm;
622 #endif
623 }
624
check_loop_filter_masks(const LoopFilterMask * lfm,int plane)625 static void check_loop_filter_masks(const LoopFilterMask *lfm, int plane) {
626 if (plane == 0) {
627 // Assert if we try to apply 2 different loop filters at the same
628 // position.
629 check_mask(lfm->left_y);
630 check_mask(lfm->above_y);
631 } else if (plane == 1) {
632 check_mask(lfm->left_u);
633 check_mask(lfm->above_u);
634 } else {
635 check_mask(lfm->left_v);
636 check_mask(lfm->above_v);
637 }
638 }
639
update_masks(EDGE_DIR dir,int plane,uint64_t * mask,TX_SIZE sqr_tx_size,LoopFilterMask * lfm)640 static void update_masks(EDGE_DIR dir, int plane, uint64_t *mask,
641 TX_SIZE sqr_tx_size, LoopFilterMask *lfm) {
642 if (dir == VERT_EDGE) {
643 switch (plane) {
644 case 0:
645 for (int i = 0; i < 4; ++i) lfm->left_y[sqr_tx_size].bits[i] |= mask[i];
646 break;
647 case 1:
648 for (int i = 0; i < 4; ++i) lfm->left_u[sqr_tx_size].bits[i] |= mask[i];
649 break;
650 case 2:
651 for (int i = 0; i < 4; ++i) lfm->left_v[sqr_tx_size].bits[i] |= mask[i];
652 break;
653 default: assert(plane <= 2);
654 }
655 } else {
656 switch (plane) {
657 case 0:
658 for (int i = 0; i < 4; ++i)
659 lfm->above_y[sqr_tx_size].bits[i] |= mask[i];
660 break;
661 case 1:
662 for (int i = 0; i < 4; ++i)
663 lfm->above_u[sqr_tx_size].bits[i] |= mask[i];
664 break;
665 case 2:
666 for (int i = 0; i < 4; ++i)
667 lfm->above_v[sqr_tx_size].bits[i] |= mask[i];
668 break;
669 default: assert(plane <= 2);
670 }
671 }
672 }
673
is_frame_boundary(AV1_COMMON * const cm,int plane,int mi_row,int mi_col,int ssx,int ssy,EDGE_DIR dir)674 static int is_frame_boundary(AV1_COMMON *const cm, int plane, int mi_row,
675 int mi_col, int ssx, int ssy, EDGE_DIR dir) {
676 if (plane && (ssx || ssy)) {
677 if (ssx && ssy) { // format 420
678 if ((mi_row << MI_SIZE_LOG2) > cm->height ||
679 (mi_col << MI_SIZE_LOG2) > cm->width)
680 return 1;
681 } else if (ssx) { // format 422
682 if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
683 (mi_col << MI_SIZE_LOG2) > cm->width)
684 return 1;
685 }
686 } else {
687 if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
688 (mi_col << MI_SIZE_LOG2) >= cm->width)
689 return 1;
690 }
691
692 int row_or_col;
693 if (plane == 0) {
694 row_or_col = dir == VERT_EDGE ? mi_col : mi_row;
695 } else {
696 // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block.
697 // So if mi_col == 1, it is actually the frame boundary.
698 if (dir == VERT_EDGE) {
699 row_or_col = ssx ? (mi_col & 0x0FFFFFFE) : mi_col;
700 } else {
701 row_or_col = ssy ? (mi_row & 0x0FFFFFFE) : mi_row;
702 }
703 }
704 return row_or_col == 0;
705 }
706
setup_masks(AV1_COMMON * const cm,int mi_row,int mi_col,int plane,int ssx,int ssy,TX_SIZE tx_size)707 static void setup_masks(AV1_COMMON *const cm, int mi_row, int mi_col, int plane,
708 int ssx, int ssy, TX_SIZE tx_size) {
709 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
710 const int x = (mi_col << (MI_SIZE_LOG2 - ssx));
711 const int y = (mi_row << (MI_SIZE_LOG2 - ssy));
712 // decide whether current vertical/horizontal edge needs loop filtering
713 for (EDGE_DIR dir = VERT_EDGE; dir <= HORZ_EDGE; ++dir) {
714 // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block.
715 mi_row |= ssy;
716 mi_col |= ssx;
717
718 MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
719 const MB_MODE_INFO *const mbmi = mi[0];
720 const int curr_skip = mbmi->skip && is_inter_block(mbmi);
721 const BLOCK_SIZE bsize = mbmi->sb_type;
722 const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy);
723 const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy];
724 const uint8_t level = get_filter_level(cm, &cm->lf_info, dir, plane, mbmi);
725 const int prediction_masks = dir == VERT_EDGE
726 ? block_size_wide[plane_bsize] - 1
727 : block_size_high[plane_bsize] - 1;
728 const int is_coding_block_border =
729 dir == VERT_EDGE ? !(x & prediction_masks) : !(y & prediction_masks);
730
731 // TODO(chengchen): step can be optimized.
732 const int row_step = mi_size_high[TX_4X4] << ssy;
733 const int col_step = mi_size_wide[TX_4X4] << ssx;
734 const int mi_height =
735 dir == VERT_EDGE ? tx_size_high_unit[tx_size] << ssy : row_step;
736 const int mi_width =
737 dir == VERT_EDGE ? col_step : tx_size_wide_unit[tx_size] << ssx;
738
739 // assign filter levels
740 for (int r = mi_row; r < mi_row + mi_height; r += row_step) {
741 for (int c = mi_col; c < mi_col + mi_width; c += col_step) {
742 // do not filter frame boundary
743 // Note: when chroma planes' size are half of luma plane,
744 // chroma plane mi corresponds to even position.
745 // If frame size is not even, we still need to filter this chroma
746 // position. Therefore the boundary condition check needs to be
747 // separated to two cases.
748 if (plane && (ssx || ssy)) {
749 if (ssx && ssy) { // format 420
750 if ((r << MI_SIZE_LOG2) > cm->height ||
751 (c << MI_SIZE_LOG2) > cm->width)
752 continue;
753 } else if (ssx) { // format 422
754 if ((r << MI_SIZE_LOG2) >= cm->height ||
755 (c << MI_SIZE_LOG2) > cm->width)
756 continue;
757 }
758 } else {
759 if ((r << MI_SIZE_LOG2) >= cm->height ||
760 (c << MI_SIZE_LOG2) >= cm->width)
761 continue;
762 }
763
764 const int row = r % MI_SIZE_64X64;
765 const int col = c % MI_SIZE_64X64;
766 if (plane == 0) {
767 if (dir == VERT_EDGE)
768 lfm->lfl_y_ver[row][col] = level;
769 else
770 lfm->lfl_y_hor[row][col] = level;
771 } else if (plane == 1) {
772 lfm->lfl_u_ver[row][col] = level;
773 lfm->lfl_u_hor[row][col] = level;
774 } else {
775 lfm->lfl_v_ver[row][col] = level;
776 lfm->lfl_v_hor[row][col] = level;
777 }
778 }
779 }
780
781 for (int r = mi_row; r < mi_row + mi_height; r += row_step) {
782 for (int c = mi_col; c < mi_col + mi_width; c += col_step) {
783 // do not filter frame boundary
784 if (is_frame_boundary(cm, plane, r, c, ssx, ssy, dir)) continue;
785
786 uint64_t mask[4] = { 0 };
787 const int prev_row = dir == VERT_EDGE ? r : r - (1 << ssy);
788 const int prev_col = dir == VERT_EDGE ? c - (1 << ssx) : c;
789 MB_MODE_INFO **mi_prev =
790 cm->mi_grid_visible + prev_row * cm->mi_stride + prev_col;
791 const MB_MODE_INFO *const mbmi_prev = mi_prev[0];
792 const int prev_skip = mbmi_prev->skip && is_inter_block(mbmi_prev);
793 const uint8_t level_prev =
794 get_filter_level(cm, &cm->lf_info, dir, plane, mbmi_prev);
795 const int is_edge =
796 (level || level_prev) &&
797 (!curr_skip || !prev_skip || is_coding_block_border);
798
799 if (is_edge) {
800 const TX_SIZE prev_tx_size =
801 plane ? av1_get_max_uv_txsize(mbmi_prev->sb_type, ssx, ssy)
802 : mbmi_prev->tx_size;
803 TX_SIZE min_tx_size = (dir == VERT_EDGE)
804 ? AOMMIN(txsize_horz_map[tx_size],
805 txsize_horz_map[prev_tx_size])
806 : AOMMIN(txsize_vert_map[tx_size],
807 txsize_vert_map[prev_tx_size]);
808 min_tx_size = AOMMIN(min_tx_size, TX_16X16);
809 assert(min_tx_size < TX_SIZES);
810 const int row = r % MI_SIZE_64X64;
811 const int col = c % MI_SIZE_64X64;
812 int index = 0;
813 const int shift = get_index_shift(col, row, &index);
814 assert(index < 4 && index >= 0);
815 mask[index] |= ((uint64_t)1 << shift);
816 // set mask on corresponding bit
817 update_masks(dir, plane, mask, min_tx_size, lfm);
818 }
819 }
820 }
821 }
822 }
823
setup_tx_block_mask(AV1_COMMON * const cm,int mi_row,int mi_col,int blk_row,int blk_col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,int plane,int ssx,int ssy)824 static void setup_tx_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
825 int blk_row, int blk_col,
826 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
827 int plane, int ssx, int ssy) {
828 blk_row <<= ssy;
829 blk_col <<= ssx;
830 if (((mi_row + blk_row) << MI_SIZE_LOG2) >= cm->height ||
831 ((mi_col + blk_col) << MI_SIZE_LOG2) >= cm->width)
832 return;
833
834 // U/V plane, tx_size is always the largest size
835 if (plane) {
836 assert(tx_size_wide[tx_size] <= 32 && tx_size_high[tx_size] <= 32);
837 setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy,
838 tx_size);
839 return;
840 }
841
842 MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
843 const MB_MODE_INFO *const mbmi = mi[0];
844 // For Y plane:
845 // If intra block, tx size is univariant.
846 // If inter block, tx size follows inter_tx_size.
847 TX_SIZE plane_tx_size = tx_size;
848 const int is_inter = is_inter_block(mbmi);
849
850 if (plane == 0) {
851 if (is_inter) {
852 if (mbmi->skip) {
853 // TODO(chengchen): change av1_get_transform_size() to be consistant.
854 // plane_tx_size = get_max_rect_tx_size(plane_bsize);
855 plane_tx_size = mbmi->tx_size;
856 } else {
857 plane_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
858 plane_bsize, blk_row, blk_col)];
859 }
860 } else {
861 MB_MODE_INFO **mi_this = cm->mi_grid_visible +
862 (mi_row + blk_row) * cm->mi_stride + mi_col +
863 blk_col;
864 const MB_MODE_INFO *const mbmi_this = mi_this[0];
865 plane_tx_size = mbmi_this->tx_size;
866 }
867 }
868
869 assert(txsize_to_bsize[plane_tx_size] <= plane_bsize);
870
871 if (plane || plane_tx_size == tx_size) {
872 setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy,
873 tx_size);
874 } else {
875 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
876 const int bsw = tx_size_wide_unit[sub_txs];
877 const int bsh = tx_size_high_unit[sub_txs];
878 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
879 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
880 const int offsetr = blk_row + row;
881 const int offsetc = blk_col + col;
882 setup_tx_block_mask(cm, mi_row, mi_col, offsetr, offsetc, plane_bsize,
883 sub_txs, plane, ssx, ssy);
884 }
885 }
886 }
887 }
888
setup_fix_block_mask(AV1_COMMON * const cm,int mi_row,int mi_col,int plane,int ssx,int ssy)889 static void setup_fix_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
890 int plane, int ssx, int ssy) {
891 MB_MODE_INFO **mi =
892 cm->mi_grid_visible + (mi_row | ssy) * cm->mi_stride + (mi_col | ssx);
893 const MB_MODE_INFO *const mbmi = mi[0];
894
895 const BLOCK_SIZE bsize = mbmi->sb_type;
896 const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy);
897 const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy];
898
899 const int block_width = mi_size_wide[plane_bsize];
900 const int block_height = mi_size_high[plane_bsize];
901
902 TX_SIZE max_txsize = max_txsize_rect_lookup[plane_bsize];
903 // The decoder is designed so that it can process 64x64 luma pixels at a
904 // time. If this is a chroma plane with subsampling and bsize corresponds to
905 // a subsampled BLOCK_128X128 then the lookup above will give TX_64X64. That
906 // mustn't be used for the subsampled plane (because it would be bigger than
907 // a 64x64 luma block) so we round down to TX_32X32.
908 if (plane && txsize_sqr_up_map[max_txsize] == TX_64X64) {
909 if (max_txsize == TX_16X64)
910 max_txsize = TX_16X32;
911 else if (max_txsize == TX_64X16)
912 max_txsize = TX_32X16;
913 else
914 max_txsize = TX_32X32;
915 }
916
917 const BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize];
918 const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
919 const int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
920 const BLOCK_SIZE max_unit_bsize = ss_size_lookup[BLOCK_64X64][ssx][ssy];
921 int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
922 int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
923
924 mu_blocks_wide = AOMMIN(block_width, mu_blocks_wide);
925 mu_blocks_high = AOMMIN(block_height, mu_blocks_high);
926
927 // Y: Largest tx_size is 64x64, while superblock size can be 128x128.
928 // Here we ensure that setup_tx_block_mask process at most a 64x64 block.
929 // U/V: largest tx size is 32x32.
930 for (int idy = 0; idy < block_height; idy += mu_blocks_high) {
931 for (int idx = 0; idx < block_width; idx += mu_blocks_wide) {
932 const int unit_height = AOMMIN(mu_blocks_high + idy, block_height);
933 const int unit_width = AOMMIN(mu_blocks_wide + idx, block_width);
934 for (int blk_row = idy; blk_row < unit_height; blk_row += bh) {
935 for (int blk_col = idx; blk_col < unit_width; blk_col += bw) {
936 setup_tx_block_mask(cm, mi_row, mi_col, blk_row, blk_col, plane_bsize,
937 max_txsize, plane, ssx, ssy);
938 }
939 }
940 }
941 }
942 }
943
setup_block_mask(AV1_COMMON * const cm,int mi_row,int mi_col,BLOCK_SIZE bsize,int plane,int ssx,int ssy)944 static void setup_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
945 BLOCK_SIZE bsize, int plane, int ssx, int ssy) {
946 if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
947 (mi_col << MI_SIZE_LOG2) >= cm->width)
948 return;
949
950 const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
951 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
952 const int hbs = mi_size_wide[bsize] / 2;
953 const int quarter_step = mi_size_wide[bsize] / 4;
954 const int allow_sub8x8 = (ssx || ssy) ? bsize > BLOCK_8X8 : 1;
955 const int has_next_row =
956 (((mi_row + hbs) << MI_SIZE_LOG2) < cm->height) & allow_sub8x8;
957 const int has_next_col =
958 (((mi_col + hbs) << MI_SIZE_LOG2) < cm->width) & allow_sub8x8;
959 int i;
960
961 switch (partition) {
962 case PARTITION_NONE:
963 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
964 break;
965 case PARTITION_HORZ:
966 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
967 if (has_next_row)
968 setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
969 break;
970 case PARTITION_VERT:
971 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
972 if (has_next_col)
973 setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
974 break;
975 case PARTITION_SPLIT:
976 setup_block_mask(cm, mi_row, mi_col, subsize, plane, ssx, ssy);
977 if (has_next_col)
978 setup_block_mask(cm, mi_row, mi_col + hbs, subsize, plane, ssx, ssy);
979 if (has_next_row)
980 setup_block_mask(cm, mi_row + hbs, mi_col, subsize, plane, ssx, ssy);
981 if (has_next_col & has_next_row)
982 setup_block_mask(cm, mi_row + hbs, mi_col + hbs, subsize, plane, ssx,
983 ssy);
984 break;
985 case PARTITION_HORZ_A:
986 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
987 if (has_next_col)
988 setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
989 if (has_next_row)
990 setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
991 break;
992 case PARTITION_HORZ_B:
993 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
994 if (has_next_row)
995 setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
996 if (has_next_col & has_next_row)
997 setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy);
998 break;
999 case PARTITION_VERT_A:
1000 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
1001 if (has_next_row)
1002 setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
1003 if (has_next_col)
1004 setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
1005 break;
1006 case PARTITION_VERT_B:
1007 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
1008 if (has_next_col)
1009 setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
1010 if (has_next_row)
1011 setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy);
1012 break;
1013 case PARTITION_HORZ_4:
1014 for (i = 0; i < 4; ++i) {
1015 int this_mi_row = mi_row + i * quarter_step;
1016 if (i > 0 && (this_mi_row << MI_SIZE_LOG2) >= cm->height) break;
1017 // chroma plane filter the odd location
1018 if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue;
1019
1020 setup_fix_block_mask(cm, this_mi_row, mi_col, plane, ssx, ssy);
1021 }
1022 break;
1023 case PARTITION_VERT_4:
1024 for (i = 0; i < 4; ++i) {
1025 int this_mi_col = mi_col + i * quarter_step;
1026 if (i > 0 && this_mi_col >= cm->mi_cols) break;
1027 // chroma plane filter the odd location
1028 if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue;
1029
1030 setup_fix_block_mask(cm, mi_row, this_mi_col, plane, ssx, ssy);
1031 }
1032 break;
1033 default: assert(0);
1034 }
1035 }
1036
1037 // TODO(chengchen): if lossless, do not need to setup mask. But when
1038 // segments enabled, each segment has different lossless settings.
av1_setup_bitmask(AV1_COMMON * const cm,int mi_row,int mi_col,int plane,int subsampling_x,int subsampling_y,int row_end,int col_end)1039 void av1_setup_bitmask(AV1_COMMON *const cm, int mi_row, int mi_col, int plane,
1040 int subsampling_x, int subsampling_y, int row_end,
1041 int col_end) {
1042 const int num_64x64 = cm->seq_params.mib_size >> MIN_MIB_SIZE_LOG2;
1043 for (int y = 0; y < num_64x64; ++y) {
1044 for (int x = 0; x < num_64x64; ++x) {
1045 const int row = mi_row + y * MI_SIZE_64X64;
1046 const int col = mi_col + x * MI_SIZE_64X64;
1047 if (row >= row_end || col >= col_end) continue;
1048 if ((row << MI_SIZE_LOG2) >= cm->height ||
1049 (col << MI_SIZE_LOG2) >= cm->width)
1050 continue;
1051
1052 LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col);
1053 if (lfm == NULL) return;
1054
1055 // init mask to zero
1056 if (plane == 0) {
1057 av1_zero(lfm->left_y);
1058 av1_zero(lfm->above_y);
1059 av1_zero(lfm->lfl_y_ver);
1060 av1_zero(lfm->lfl_y_hor);
1061 } else if (plane == 1) {
1062 av1_zero(lfm->left_u);
1063 av1_zero(lfm->above_u);
1064 av1_zero(lfm->lfl_u_ver);
1065 av1_zero(lfm->lfl_u_hor);
1066 } else {
1067 av1_zero(lfm->left_v);
1068 av1_zero(lfm->above_v);
1069 av1_zero(lfm->lfl_v_ver);
1070 av1_zero(lfm->lfl_v_hor);
1071 }
1072 }
1073 }
1074
1075 // set up bitmask for each superblock
1076 setup_block_mask(cm, mi_row, mi_col, cm->seq_params.sb_size, plane,
1077 subsampling_x, subsampling_y);
1078
1079 for (int y = 0; y < num_64x64; ++y) {
1080 for (int x = 0; x < num_64x64; ++x) {
1081 const int row = mi_row + y * MI_SIZE_64X64;
1082 const int col = mi_col + x * MI_SIZE_64X64;
1083 if (row >= row_end || col >= col_end) continue;
1084 if ((row << MI_SIZE_LOG2) >= cm->height ||
1085 (col << MI_SIZE_LOG2) >= cm->width)
1086 continue;
1087
1088 LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col);
1089 if (lfm == NULL) return;
1090
1091 // check if the mask is valid
1092 check_loop_filter_masks(lfm, plane);
1093
1094 {
1095 // Let 16x16 hold 32x32 (Y/U/V) and 64x64(Y only).
1096 // Even tx size is greater, we only apply max length filter, which
1097 // is 16.
1098 if (plane == 0) {
1099 for (int j = 0; j < 4; ++j) {
1100 lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_32X32].bits[j];
1101 lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_64X64].bits[j];
1102 lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_32X32].bits[j];
1103 lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_64X64].bits[j];
1104
1105 // set 32x32 and 64x64 to 0
1106 lfm->left_y[TX_32X32].bits[j] = 0;
1107 lfm->left_y[TX_64X64].bits[j] = 0;
1108 lfm->above_y[TX_32X32].bits[j] = 0;
1109 lfm->above_y[TX_64X64].bits[j] = 0;
1110 }
1111 } else if (plane == 1) {
1112 for (int j = 0; j < 4; ++j) {
1113 lfm->left_u[TX_16X16].bits[j] |= lfm->left_u[TX_32X32].bits[j];
1114 lfm->above_u[TX_16X16].bits[j] |= lfm->above_u[TX_32X32].bits[j];
1115
1116 // set 32x32 to 0
1117 lfm->left_u[TX_32X32].bits[j] = 0;
1118 lfm->above_u[TX_32X32].bits[j] = 0;
1119 }
1120 } else {
1121 for (int j = 0; j < 4; ++j) {
1122 lfm->left_v[TX_16X16].bits[j] |= lfm->left_v[TX_32X32].bits[j];
1123 lfm->above_v[TX_16X16].bits[j] |= lfm->above_v[TX_32X32].bits[j];
1124
1125 // set 32x32 to 0
1126 lfm->left_v[TX_32X32].bits[j] = 0;
1127 lfm->above_v[TX_32X32].bits[j] = 0;
1128 }
1129 }
1130 }
1131
1132 // check if the mask is valid
1133 check_loop_filter_masks(lfm, plane);
1134 }
1135 }
1136 }
1137
filter_selectively_vert_row2(int subsampling_factor,uint8_t * s,int pitch,int plane,uint64_t mask_16x16_0,uint64_t mask_8x8_0,uint64_t mask_4x4_0,uint64_t mask_16x16_1,uint64_t mask_8x8_1,uint64_t mask_4x4_1,const loop_filter_info_n * lfi_n,uint8_t * lfl,uint8_t * lfl2)1138 static void filter_selectively_vert_row2(
1139 int subsampling_factor, uint8_t *s, int pitch, int plane,
1140 uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
1141 uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
1142 const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2) {
1143 uint64_t mask;
1144 const int step = 1 << subsampling_factor;
1145
1146 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
1147 mask_8x8_1 | mask_4x4_1;
1148 mask; mask >>= step) {
1149 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
1150 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
1151
1152 if (mask & 1) {
1153 if ((mask_16x16_0 | mask_16x16_1) & 1) {
1154 // chroma plane filters less pixels introduced in deblock_13tap
1155 // experiment
1156 LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_14;
1157
1158 if ((mask_16x16_0 & mask_16x16_1) & 1) {
1159 if (plane) {
1160 aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
1161 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1162 lfi1->hev_thr);
1163 } else {
1164 aom_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
1165 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1166 lfi1->hev_thr);
1167 }
1168 } else if (mask_16x16_0 & 1) {
1169 lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
1170 } else {
1171 lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1172 lfi1->hev_thr);
1173 }
1174 }
1175
1176 if ((mask_8x8_0 | mask_8x8_1) & 1) {
1177 // chroma plane filters less pixels introduced in deblock_13tap
1178 // experiment
1179 LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_8;
1180
1181 if ((mask_8x8_0 & mask_8x8_1) & 1) {
1182 if (plane) {
1183 aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
1184 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1185 lfi1->hev_thr);
1186 } else {
1187 aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
1188 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1189 lfi1->hev_thr);
1190 }
1191 } else if (mask_8x8_0 & 1) {
1192 lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
1193 } else {
1194 lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1195 lfi1->hev_thr);
1196 }
1197 }
1198
1199 if ((mask_4x4_0 | mask_4x4_1) & 1) {
1200 if ((mask_4x4_0 & mask_4x4_1) & 1) {
1201 aom_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
1202 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1203 lfi1->hev_thr);
1204 } else if (mask_4x4_0 & 1) {
1205 aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
1206 } else {
1207 aom_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1208 lfi1->hev_thr);
1209 }
1210 }
1211 }
1212
1213 s += 4;
1214 lfl += step;
1215 lfl2 += step;
1216 mask_16x16_0 >>= step;
1217 mask_8x8_0 >>= step;
1218 mask_4x4_0 >>= step;
1219 mask_16x16_1 >>= step;
1220 mask_8x8_1 >>= step;
1221 mask_4x4_1 >>= step;
1222 }
1223 }
1224
highbd_filter_selectively_vert_row2(int subsampling_factor,uint16_t * s,int pitch,int plane,uint64_t mask_16x16_0,uint64_t mask_8x8_0,uint64_t mask_4x4_0,uint64_t mask_16x16_1,uint64_t mask_8x8_1,uint64_t mask_4x4_1,const loop_filter_info_n * lfi_n,uint8_t * lfl,uint8_t * lfl2,int bd)1225 static void highbd_filter_selectively_vert_row2(
1226 int subsampling_factor, uint16_t *s, int pitch, int plane,
1227 uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
1228 uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
1229 const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2, int bd) {
1230 uint64_t mask;
1231 const int step = 1 << subsampling_factor;
1232
1233 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
1234 mask_8x8_1 | mask_4x4_1;
1235 mask; mask >>= step) {
1236 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
1237 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
1238
1239 if (mask & 1) {
1240 if ((mask_16x16_0 | mask_16x16_1) & 1) {
1241 // chroma plane filters less pixels introduced in deblock_13tap
1242 // experiment
1243 HbdLpfFunc highbd_lpf_vertical =
1244 plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_14;
1245
1246 if ((mask_16x16_0 & mask_16x16_1) & 1) {
1247 if (plane) {
1248 aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
1249 lfi0->hev_thr, lfi1->mblim,
1250 lfi1->lim, lfi1->hev_thr, bd);
1251 } else {
1252 aom_highbd_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
1253 lfi0->hev_thr, lfi1->mblim,
1254 lfi1->lim, lfi1->hev_thr, bd);
1255 }
1256 } else if (mask_16x16_0 & 1) {
1257 highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1258 bd);
1259 } else {
1260 highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1261 lfi1->hev_thr, bd);
1262 }
1263 }
1264
1265 if ((mask_8x8_0 | mask_8x8_1) & 1) {
1266 HbdLpfFunc highbd_lpf_vertical =
1267 plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_8;
1268
1269 if ((mask_8x8_0 & mask_8x8_1) & 1) {
1270 if (plane) {
1271 aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
1272 lfi0->hev_thr, lfi1->mblim,
1273 lfi1->lim, lfi1->hev_thr, bd);
1274 } else {
1275 aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
1276 lfi0->hev_thr, lfi1->mblim,
1277 lfi1->lim, lfi1->hev_thr, bd);
1278 }
1279 } else if (mask_8x8_0 & 1) {
1280 highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1281 bd);
1282 } else {
1283 highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1284 lfi1->hev_thr, bd);
1285 }
1286 }
1287
1288 if ((mask_4x4_0 | mask_4x4_1) & 1) {
1289 if ((mask_4x4_0 & mask_4x4_1) & 1) {
1290 aom_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
1291 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1292 lfi1->hev_thr, bd);
1293 } else if (mask_4x4_0 & 1) {
1294 aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
1295 lfi0->hev_thr, bd);
1296 } else {
1297 aom_highbd_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim,
1298 lfi1->lim, lfi1->hev_thr, bd);
1299 }
1300 }
1301 }
1302
1303 s += 4;
1304 lfl += step;
1305 lfl2 += step;
1306 mask_16x16_0 >>= step;
1307 mask_8x8_0 >>= step;
1308 mask_4x4_0 >>= step;
1309 mask_16x16_1 >>= step;
1310 mask_8x8_1 >>= step;
1311 mask_4x4_1 >>= step;
1312 }
1313 }
1314
filter_selectively_horiz(uint8_t * s,int pitch,int plane,int subsampling,uint64_t mask_16x16,uint64_t mask_8x8,uint64_t mask_4x4,const loop_filter_info_n * lfi_n,const uint8_t * lfl)1315 static void filter_selectively_horiz(uint8_t *s, int pitch, int plane,
1316 int subsampling, uint64_t mask_16x16,
1317 uint64_t mask_8x8, uint64_t mask_4x4,
1318 const loop_filter_info_n *lfi_n,
1319 const uint8_t *lfl) {
1320 uint64_t mask;
1321 int count;
1322 const int step = 1 << subsampling;
1323 const unsigned int two_block_mask = subsampling ? 5 : 3;
1324 int offset = 0;
1325
1326 for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
1327 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1328 // Next block's thresholds, when it is within current 64x64 block.
1329 // If it is out of bound, its mask is zero, and it points to current edge's
1330 // filter parameters, instead of next edge's.
1331 int next_edge = step;
1332 if (offset + next_edge >= MI_SIZE_64X64) next_edge = 0;
1333 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + next_edge);
1334
1335 count = 1;
1336 if (mask & 1) {
1337 if (mask_16x16 & 1) {
1338 // chroma plane filters less pixels introduced in deblock_13tap
1339 // experiment
1340 LpfFunc lpf_horizontal =
1341 plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_14;
1342
1343 if ((mask_16x16 & two_block_mask) == two_block_mask) {
1344 if (plane) {
1345 aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
1346 lfi->hev_thr, lfin->mblim, lfin->lim,
1347 lfin->hev_thr);
1348 } else {
1349 aom_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
1350 lfi->hev_thr, lfin->mblim, lfin->lim,
1351 lfin->hev_thr);
1352 }
1353 count = 2;
1354 } else {
1355 lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1356 }
1357 } else if (mask_8x8 & 1) {
1358 // chroma plane filters less pixels introduced in deblock_13tap
1359 // experiment
1360 LpfFunc lpf_horizontal =
1361 plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_8;
1362
1363 if ((mask_8x8 & two_block_mask) == two_block_mask) {
1364 if (plane) {
1365 aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
1366 lfi->hev_thr, lfin->mblim, lfin->lim,
1367 lfin->hev_thr);
1368 } else {
1369 aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
1370 lfi->hev_thr, lfin->mblim, lfin->lim,
1371 lfin->hev_thr);
1372 }
1373 count = 2;
1374 } else {
1375 lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1376 }
1377 } else if (mask_4x4 & 1) {
1378 if ((mask_4x4 & two_block_mask) == two_block_mask) {
1379 aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
1380 lfi->hev_thr, lfin->mblim, lfin->lim,
1381 lfin->hev_thr);
1382 count = 2;
1383 } else {
1384 aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1385 }
1386 }
1387 }
1388
1389 s += 4 * count;
1390 lfl += step * count;
1391 mask_16x16 >>= step * count;
1392 mask_8x8 >>= step * count;
1393 mask_4x4 >>= step * count;
1394 offset += step * count;
1395 }
1396 }
1397
highbd_filter_selectively_horiz(uint16_t * s,int pitch,int plane,int subsampling,uint64_t mask_16x16,uint64_t mask_8x8,uint64_t mask_4x4,const loop_filter_info_n * lfi_n,uint8_t * lfl,int bd)1398 static void highbd_filter_selectively_horiz(
1399 uint16_t *s, int pitch, int plane, int subsampling, uint64_t mask_16x16,
1400 uint64_t mask_8x8, uint64_t mask_4x4, const loop_filter_info_n *lfi_n,
1401 uint8_t *lfl, int bd) {
1402 uint64_t mask;
1403 int count;
1404 const int step = 1 << subsampling;
1405 const unsigned int two_block_mask = subsampling ? 5 : 3;
1406 int offset = 0;
1407
1408 for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
1409 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1410 // Next block's thresholds, when it is within current 64x64 block.
1411 // If it is out of bound, its mask is zero, and it points to current edge's
1412 // filter parameters, instead of next edge's.
1413 int next_edge = step;
1414 if (offset + next_edge >= MI_SIZE_64X64) next_edge = 0;
1415 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + next_edge);
1416
1417 count = 1;
1418 if (mask & 1) {
1419 if (mask_16x16 & 1) {
1420 HbdLpfFunc highbd_lpf_horizontal =
1421 plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_14;
1422
1423 if ((mask_16x16 & two_block_mask) == two_block_mask) {
1424 if (plane) {
1425 aom_highbd_lpf_horizontal_6_dual_c(s, pitch, lfi->mblim, lfi->lim,
1426 lfi->hev_thr, lfin->mblim,
1427 lfin->lim, lfin->hev_thr, bd);
1428 } else {
1429 aom_highbd_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
1430 lfi->hev_thr, lfin->mblim,
1431 lfin->lim, lfin->hev_thr, bd);
1432 }
1433 count = 2;
1434 } else {
1435 highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
1436 bd);
1437 }
1438 } else if (mask_8x8 & 1) {
1439 HbdLpfFunc highbd_lpf_horizontal =
1440 plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_8;
1441
1442 if ((mask_8x8 & two_block_mask) == two_block_mask) {
1443 if (plane) {
1444 aom_highbd_lpf_horizontal_6_dual_c(s, pitch, lfi->mblim, lfi->lim,
1445 lfi->hev_thr, lfin->mblim,
1446 lfin->lim, lfin->hev_thr, bd);
1447 } else {
1448 aom_highbd_lpf_horizontal_8_dual_c(s, pitch, lfi->mblim, lfi->lim,
1449 lfi->hev_thr, lfin->mblim,
1450 lfin->lim, lfin->hev_thr, bd);
1451 }
1452 count = 2;
1453 } else {
1454 highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
1455 bd);
1456 }
1457 } else if (mask_4x4 & 1) {
1458 if ((mask_4x4 & two_block_mask) == two_block_mask) {
1459 aom_highbd_lpf_horizontal_4_dual_c(s, pitch, lfi->mblim, lfi->lim,
1460 lfi->hev_thr, lfin->mblim,
1461 lfin->lim, lfin->hev_thr, bd);
1462 count = 2;
1463 } else {
1464 aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
1465 lfi->hev_thr, bd);
1466 }
1467 }
1468 }
1469
1470 s += 4 * count;
1471 lfl += step * count;
1472 mask_16x16 >>= step * count;
1473 mask_8x8 >>= step * count;
1474 mask_4x4 >>= step * count;
1475 offset += step * count;
1476 }
1477 }
1478
av1_build_bitmask_vert_info(AV1_COMMON * const cm,const struct macroblockd_plane * const plane_ptr,int plane)1479 void av1_build_bitmask_vert_info(
1480 AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
1481 int plane) {
1482 const int subsampling_x = plane_ptr->subsampling_x;
1483 const int subsampling_y = plane_ptr->subsampling_y;
1484 const int row_step = (MI_SIZE >> MI_SIZE_LOG2);
1485 const int is_uv = plane > 0;
1486 TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
1487 uint8_t level, prev_level = 1;
1488 uint64_t skip, prev_skip = 0;
1489 uint64_t is_coding_block_border;
1490
1491 for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height; r += row_step) {
1492 const int mi_row = r << subsampling_y;
1493 const int row = mi_row % MI_SIZE_64X64;
1494 const int row_uv = row | subsampling_y;
1495 int index = 0;
1496 const int shift = get_index_shift(0, row, &index);
1497
1498 for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width;
1499 c += (tx_size_wide_unit[TX_64X64] >> subsampling_x)) {
1500 const int mi_col = c << subsampling_x;
1501 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1502
1503 for (int col_in_unit = 0;
1504 col_in_unit < (tx_size_wide_unit[TX_64X64] >> subsampling_x);) {
1505 const int x = (c + col_in_unit) << MI_SIZE_LOG2;
1506 if (x >= plane_ptr->dst.width) break;
1507 const int col = col_in_unit << subsampling_x;
1508 const int col_uv = col | subsampling_x;
1509 const uint64_t mask = ((uint64_t)1 << (shift | col));
1510 skip = lfm->skip.bits[index] & mask;
1511 is_coding_block_border = lfm->is_vert_border.bits[index] & mask;
1512 switch (plane) {
1513 case 0: level = lfm->lfl_y_ver[row_uv][col_uv]; break;
1514 case 1: level = lfm->lfl_u_ver[row_uv][col_uv]; break;
1515 case 2: level = lfm->lfl_v_ver[row_uv][col_uv]; break;
1516 default: assert(plane >= 0 && plane <= 2); return;
1517 }
1518 for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
1519 if (is_uv && ts == TX_64X64) continue;
1520 if (lfm->tx_size_ver[is_uv][ts].bits[index] & mask) {
1521 tx_size = ts;
1522 break;
1523 }
1524 }
1525 if ((c + col_in_unit > 0) && (level || prev_level) &&
1526 (!prev_skip || !skip || is_coding_block_border)) {
1527 const TX_SIZE min_tx_size =
1528 AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
1529 const int shift_1 = get_index_shift(col_uv, row_uv, &index);
1530 const uint64_t mask_1 = ((uint64_t)1 << shift_1);
1531 switch (plane) {
1532 case 0: lfm->left_y[min_tx_size].bits[index] |= mask_1; break;
1533 case 1: lfm->left_u[min_tx_size].bits[index] |= mask_1; break;
1534 case 2: lfm->left_v[min_tx_size].bits[index] |= mask_1; break;
1535 default: assert(plane >= 0 && plane <= 2); return;
1536 }
1537 if (level == 0 && prev_level != 0) {
1538 switch (plane) {
1539 case 0: lfm->lfl_y_ver[row_uv][col_uv] = prev_level; break;
1540 case 1: lfm->lfl_u_ver[row_uv][col_uv] = prev_level; break;
1541 case 2: lfm->lfl_v_ver[row_uv][col_uv] = prev_level; break;
1542 default: assert(plane >= 0 && plane <= 2); return;
1543 }
1544 }
1545 }
1546
1547 // update prev info
1548 prev_level = level;
1549 prev_skip = skip;
1550 prev_tx_size = tx_size;
1551 // advance
1552 col_in_unit += tx_size_wide_unit[tx_size];
1553 }
1554 }
1555 }
1556 }
1557
av1_build_bitmask_horz_info(AV1_COMMON * const cm,const struct macroblockd_plane * const plane_ptr,int plane)1558 void av1_build_bitmask_horz_info(
1559 AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
1560 int plane) {
1561 const int subsampling_x = plane_ptr->subsampling_x;
1562 const int subsampling_y = plane_ptr->subsampling_y;
1563 const int col_step = (MI_SIZE >> MI_SIZE_LOG2);
1564 const int is_uv = plane > 0;
1565 TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
1566 uint8_t level, prev_level = 1;
1567 uint64_t skip, prev_skip = 0;
1568 uint64_t is_coding_block_border;
1569
1570 for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width; c += col_step) {
1571 const int mi_col = c << subsampling_x;
1572 const int col = mi_col % MI_SIZE_64X64;
1573 const int col_uv = col | subsampling_x;
1574
1575 for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height;
1576 r += (tx_size_high_unit[TX_64X64] >> subsampling_y)) {
1577 const int mi_row = r << subsampling_y;
1578 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1579
1580 for (int r_in_unit = 0;
1581 r_in_unit < (tx_size_high_unit[TX_64X64] >> subsampling_y);) {
1582 const int y = (r + r_in_unit) << MI_SIZE_LOG2;
1583 if (y >= plane_ptr->dst.height) break;
1584 const int row = r_in_unit << subsampling_y;
1585 const int row_uv = row | subsampling_y;
1586 int index = 0;
1587 const int shift = get_index_shift(col, row, &index);
1588 const uint64_t mask = ((uint64_t)1 << shift);
1589 skip = lfm->skip.bits[index] & mask;
1590 is_coding_block_border = lfm->is_horz_border.bits[index] & mask;
1591 switch (plane) {
1592 case 0: level = lfm->lfl_y_hor[row_uv][col_uv]; break;
1593 case 1: level = lfm->lfl_u_hor[row_uv][col_uv]; break;
1594 case 2: level = lfm->lfl_v_hor[row_uv][col_uv]; break;
1595 default: assert(plane >= 0 && plane <= 2); return;
1596 }
1597 for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
1598 if (is_uv && ts == TX_64X64) continue;
1599 if (lfm->tx_size_hor[is_uv][ts].bits[index] & mask) {
1600 tx_size = ts;
1601 break;
1602 }
1603 }
1604 if ((r + r_in_unit > 0) && (level || prev_level) &&
1605 (!prev_skip || !skip || is_coding_block_border)) {
1606 const TX_SIZE min_tx_size =
1607 AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
1608 const int shift_1 = get_index_shift(col_uv, row_uv, &index);
1609 const uint64_t mask_1 = ((uint64_t)1 << shift_1);
1610
1611 switch (plane) {
1612 case 0: lfm->above_y[min_tx_size].bits[index] |= mask_1; break;
1613 case 1: lfm->above_u[min_tx_size].bits[index] |= mask_1; break;
1614 case 2: lfm->above_v[min_tx_size].bits[index] |= mask_1; break;
1615 default: assert(plane >= 0 && plane <= 2); return;
1616 }
1617 if (level == 0 && prev_level != 0) {
1618 switch (plane) {
1619 case 0: lfm->lfl_y_hor[row_uv][col_uv] = prev_level; break;
1620 case 1: lfm->lfl_u_hor[row_uv][col_uv] = prev_level; break;
1621 case 2: lfm->lfl_v_hor[row_uv][col_uv] = prev_level; break;
1622 default: assert(plane >= 0 && plane <= 2); return;
1623 }
1624 }
1625 }
1626
1627 // update prev info
1628 prev_level = level;
1629 prev_skip = skip;
1630 prev_tx_size = tx_size;
1631 // advance
1632 r_in_unit += tx_size_high_unit[tx_size];
1633 }
1634 }
1635 }
1636 }
1637
av1_filter_block_plane_bitmask_vert(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1638 void av1_filter_block_plane_bitmask_vert(
1639 AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
1640 int mi_row, int mi_col) {
1641 struct buf_2d *const dst = &plane_ptr->dst;
1642 uint8_t *const buf0 = dst->buf;
1643 const int ssx = plane_ptr->subsampling_x;
1644 const int ssy = plane_ptr->subsampling_y;
1645 const int mask_cutoff = 0xffff;
1646 const int row_step = 1 << ssy;
1647 const int two_row_step = 2 << ssy;
1648 const int row_stride = dst->stride << MI_SIZE_LOG2;
1649 const int two_row_stride = row_stride << 1;
1650 uint64_t mask_16x16 = 0;
1651 uint64_t mask_8x8 = 0;
1652 uint64_t mask_4x4 = 0;
1653 uint8_t *lfl;
1654 uint8_t *lfl2;
1655 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1656 assert(lfm);
1657
1658 // 1. vertical filtering. filter two rows at a time
1659 for (int r = 0;
1660 ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
1661 r += two_row_step) {
1662 const int row = r | ssy;
1663 const int row_next = row + row_step;
1664 const int col = ssx;
1665 int index = 0;
1666 const int shift = get_index_shift(col, row, &index);
1667 int index_next = 0;
1668 const int shift_next = get_index_shift(col, row_next, &index_next);
1669 const int has_next_row = row_next < cm->mi_rows;
1670 switch (pl) {
1671 case 0:
1672 mask_16x16 = lfm->left_y[TX_16X16].bits[index];
1673 mask_8x8 = lfm->left_y[TX_8X8].bits[index];
1674 mask_4x4 = lfm->left_y[TX_4X4].bits[index];
1675 lfl = &lfm->lfl_y_ver[row][col];
1676 lfl2 = &lfm->lfl_y_ver[row_next][col];
1677 break;
1678 case 1:
1679 mask_16x16 = lfm->left_u[TX_16X16].bits[index];
1680 mask_8x8 = lfm->left_u[TX_8X8].bits[index];
1681 mask_4x4 = lfm->left_u[TX_4X4].bits[index];
1682 lfl = &lfm->lfl_u_ver[row][col];
1683 lfl2 = &lfm->lfl_u_ver[row_next][col];
1684 break;
1685 case 2:
1686 mask_16x16 = lfm->left_v[TX_16X16].bits[index];
1687 mask_8x8 = lfm->left_v[TX_8X8].bits[index];
1688 mask_4x4 = lfm->left_v[TX_4X4].bits[index];
1689 lfl = &lfm->lfl_v_ver[row][col];
1690 lfl2 = &lfm->lfl_v_ver[row_next][col];
1691 break;
1692 default: assert(pl >= 0 && pl <= 2); return;
1693 }
1694 uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
1695 uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
1696 uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
1697 uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
1698 uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
1699 uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
1700 if (!has_next_row) {
1701 mask_16x16_1 = 0;
1702 mask_8x8_1 = 0;
1703 mask_4x4_1 = 0;
1704 }
1705
1706 if (cm->seq_params.use_highbitdepth)
1707 highbd_filter_selectively_vert_row2(
1708 ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
1709 mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
1710 &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
1711 else
1712 filter_selectively_vert_row2(
1713 ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
1714 mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
1715 dst->buf += two_row_stride;
1716 }
1717 // reset buf pointer for horizontal filtering
1718 dst->buf = buf0;
1719 }
1720
av1_filter_block_plane_bitmask_horz(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1721 void av1_filter_block_plane_bitmask_horz(
1722 AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
1723 int mi_row, int mi_col) {
1724 struct buf_2d *const dst = &plane_ptr->dst;
1725 uint8_t *const buf0 = dst->buf;
1726 const int ssx = plane_ptr->subsampling_x;
1727 const int ssy = plane_ptr->subsampling_y;
1728 const int mask_cutoff = 0xffff;
1729 const int row_step = 1 << ssy;
1730 const int row_stride = dst->stride << MI_SIZE_LOG2;
1731 uint64_t mask_16x16 = 0;
1732 uint64_t mask_8x8 = 0;
1733 uint64_t mask_4x4 = 0;
1734 uint8_t *lfl;
1735 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1736 assert(lfm);
1737 for (int r = 0;
1738 ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
1739 r += row_step) {
1740 if (mi_row + r == 0) {
1741 dst->buf += row_stride;
1742 continue;
1743 }
1744 const int row = r | ssy;
1745 const int col = ssx;
1746 int index = 0;
1747 const int shift = get_index_shift(col, row, &index);
1748 switch (pl) {
1749 case 0:
1750 mask_16x16 = lfm->above_y[TX_16X16].bits[index];
1751 mask_8x8 = lfm->above_y[TX_8X8].bits[index];
1752 mask_4x4 = lfm->above_y[TX_4X4].bits[index];
1753 lfl = &lfm->lfl_y_hor[row][col];
1754 break;
1755 case 1:
1756 mask_16x16 = lfm->above_u[TX_16X16].bits[index];
1757 mask_8x8 = lfm->above_u[TX_8X8].bits[index];
1758 mask_4x4 = lfm->above_u[TX_4X4].bits[index];
1759 lfl = &lfm->lfl_u_hor[row][col];
1760 break;
1761 case 2:
1762 mask_16x16 = lfm->above_v[TX_16X16].bits[index];
1763 mask_8x8 = lfm->above_v[TX_8X8].bits[index];
1764 mask_4x4 = lfm->above_v[TX_4X4].bits[index];
1765 lfl = &lfm->lfl_v_hor[row][col];
1766 break;
1767 default: assert(pl >= 0 && pl <= 2); return;
1768 }
1769 mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
1770 mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
1771 mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
1772
1773 if (cm->seq_params.use_highbitdepth)
1774 highbd_filter_selectively_horiz(
1775 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
1776 mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->seq_params.bit_depth);
1777 else
1778 filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1779 mask_8x8, mask_4x4, &cm->lf_info, lfl);
1780 dst->buf += row_stride;
1781 }
1782 // reset buf pointer for next block
1783 dst->buf = buf0;
1784 }
1785
av1_filter_block_plane_ver(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1786 void av1_filter_block_plane_ver(AV1_COMMON *const cm,
1787 struct macroblockd_plane *const plane_ptr,
1788 int pl, int mi_row, int mi_col) {
1789 struct buf_2d *const dst = &plane_ptr->dst;
1790 int r, c;
1791 const int ssx = plane_ptr->subsampling_x;
1792 const int ssy = plane_ptr->subsampling_y;
1793 const int mask_cutoff = 0xffff;
1794 const int single_step = 1 << ssy;
1795 const int r_step = 2 << ssy;
1796 uint64_t mask_16x16 = 0;
1797 uint64_t mask_8x8 = 0;
1798 uint64_t mask_4x4 = 0;
1799 uint8_t *lfl;
1800 uint8_t *lfl2;
1801
1802 // filter two rows at a time
1803 for (r = 0; r < cm->seq_params.mib_size &&
1804 ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
1805 r += r_step) {
1806 for (c = 0; c < cm->seq_params.mib_size &&
1807 ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
1808 c += MI_SIZE_64X64) {
1809 dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
1810 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
1811 assert(lfm);
1812 const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
1813 const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
1814 int index = 0;
1815 const int shift = get_index_shift(col, row, &index);
1816 // current and next row should belong to the same mask_idx and index
1817 // next row's shift
1818 const int row_next = row + single_step;
1819 int index_next = 0;
1820 const int shift_next = get_index_shift(col, row_next, &index_next);
1821 switch (pl) {
1822 case 0:
1823 mask_16x16 = lfm->left_y[TX_16X16].bits[index];
1824 mask_8x8 = lfm->left_y[TX_8X8].bits[index];
1825 mask_4x4 = lfm->left_y[TX_4X4].bits[index];
1826 lfl = &lfm->lfl_y_ver[row][col];
1827 lfl2 = &lfm->lfl_y_ver[row_next][col];
1828 break;
1829 case 1:
1830 mask_16x16 = lfm->left_u[TX_16X16].bits[index];
1831 mask_8x8 = lfm->left_u[TX_8X8].bits[index];
1832 mask_4x4 = lfm->left_u[TX_4X4].bits[index];
1833 lfl = &lfm->lfl_u_ver[row][col];
1834 lfl2 = &lfm->lfl_u_ver[row_next][col];
1835 break;
1836 case 2:
1837 mask_16x16 = lfm->left_v[TX_16X16].bits[index];
1838 mask_8x8 = lfm->left_v[TX_8X8].bits[index];
1839 mask_4x4 = lfm->left_v[TX_4X4].bits[index];
1840 lfl = &lfm->lfl_v_ver[row][col];
1841 lfl2 = &lfm->lfl_v_ver[row_next][col];
1842 break;
1843 default: assert(pl >= 0 && pl <= 2); return;
1844 }
1845 uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
1846 uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
1847 uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
1848 uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
1849 uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
1850 uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
1851
1852 if (cm->seq_params.use_highbitdepth)
1853 highbd_filter_selectively_vert_row2(
1854 ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
1855 mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
1856 &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
1857 else
1858 filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl,
1859 mask_16x16_0, mask_8x8_0, mask_4x4_0,
1860 mask_16x16_1, mask_8x8_1, mask_4x4_1,
1861 &cm->lf_info, lfl, lfl2);
1862 dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
1863 }
1864 dst->buf += 2 * MI_SIZE * dst->stride;
1865 }
1866 }
1867
av1_filter_block_plane_hor(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1868 void av1_filter_block_plane_hor(AV1_COMMON *const cm,
1869 struct macroblockd_plane *const plane_ptr,
1870 int pl, int mi_row, int mi_col) {
1871 struct buf_2d *const dst = &plane_ptr->dst;
1872 int r, c;
1873 const int ssx = plane_ptr->subsampling_x;
1874 const int ssy = plane_ptr->subsampling_y;
1875 const int mask_cutoff = 0xffff;
1876 const int r_step = 1 << ssy;
1877 uint64_t mask_16x16 = 0;
1878 uint64_t mask_8x8 = 0;
1879 uint64_t mask_4x4 = 0;
1880 uint8_t *lfl;
1881
1882 for (r = 0; r < cm->seq_params.mib_size &&
1883 ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
1884 r += r_step) {
1885 for (c = 0; c < cm->seq_params.mib_size &&
1886 ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
1887 c += MI_SIZE_64X64) {
1888 if (mi_row + r == 0) continue;
1889
1890 dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
1891 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
1892 assert(lfm);
1893 const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
1894 const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
1895 int index = 0;
1896 const int shift = get_index_shift(col, row, &index);
1897 switch (pl) {
1898 case 0:
1899 mask_16x16 = lfm->above_y[TX_16X16].bits[index];
1900 mask_8x8 = lfm->above_y[TX_8X8].bits[index];
1901 mask_4x4 = lfm->above_y[TX_4X4].bits[index];
1902 lfl = &lfm->lfl_y_hor[row][col];
1903 break;
1904 case 1:
1905 mask_16x16 = lfm->above_u[TX_16X16].bits[index];
1906 mask_8x8 = lfm->above_u[TX_8X8].bits[index];
1907 mask_4x4 = lfm->above_u[TX_4X4].bits[index];
1908 lfl = &lfm->lfl_u_hor[row][col];
1909 break;
1910 case 2:
1911 mask_16x16 = lfm->above_v[TX_16X16].bits[index];
1912 mask_8x8 = lfm->above_v[TX_8X8].bits[index];
1913 mask_4x4 = lfm->above_v[TX_4X4].bits[index];
1914 lfl = &lfm->lfl_v_hor[row][col];
1915 break;
1916 default: assert(pl >= 0 && pl <= 2); return;
1917 }
1918 mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
1919 mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
1920 mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
1921
1922 if (cm->seq_params.use_highbitdepth)
1923 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
1924 dst->stride, pl, ssx, mask_16x16,
1925 mask_8x8, mask_4x4, &cm->lf_info, lfl,
1926 (int)cm->seq_params.bit_depth);
1927 else
1928 filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1929 mask_8x8, mask_4x4, &cm->lf_info, lfl);
1930 dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
1931 }
1932 dst->buf += MI_SIZE * dst->stride;
1933 }
1934 }
1935 #endif // LOOP_FILTER_BITMASK
1936
get_transform_size(const MACROBLOCKD * const xd,const MB_MODE_INFO * const mbmi,const EDGE_DIR edge_dir,const int mi_row,const int mi_col,const int plane,const struct macroblockd_plane * plane_ptr)1937 static TX_SIZE get_transform_size(const MACROBLOCKD *const xd,
1938 const MB_MODE_INFO *const mbmi,
1939 const EDGE_DIR edge_dir, const int mi_row,
1940 const int mi_col, const int plane,
1941 const struct macroblockd_plane *plane_ptr) {
1942 assert(mbmi != NULL);
1943 if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4;
1944
1945 TX_SIZE tx_size =
1946 (plane == AOM_PLANE_Y)
1947 ? mbmi->tx_size
1948 : av1_get_max_uv_txsize(mbmi->sb_type, plane_ptr->subsampling_x,
1949 plane_ptr->subsampling_y);
1950 assert(tx_size < TX_SIZES_ALL);
1951 if ((plane == AOM_PLANE_Y) && is_inter_block(mbmi) && !mbmi->skip) {
1952 const BLOCK_SIZE sb_type = mbmi->sb_type;
1953 const int blk_row = mi_row & (mi_size_high[sb_type] - 1);
1954 const int blk_col = mi_col & (mi_size_wide[sb_type] - 1);
1955 const TX_SIZE mb_tx_size =
1956 mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)];
1957 assert(mb_tx_size < TX_SIZES_ALL);
1958 tx_size = mb_tx_size;
1959 }
1960
1961 // since in case of chrominance or non-square transorm need to convert
1962 // transform size into transform size in particular direction.
1963 // for vertical edge, filter direction is horizontal, for horizontal
1964 // edge, filter direction is vertical.
1965 tx_size = (VERT_EDGE == edge_dir) ? txsize_horz_map[tx_size]
1966 : txsize_vert_map[tx_size];
1967 return tx_size;
1968 }
1969
1970 typedef struct AV1_DEBLOCKING_PARAMETERS {
1971 // length of the filter applied to the outer edge
1972 uint32_t filter_length;
1973 // deblocking limits
1974 const uint8_t *lim;
1975 const uint8_t *mblim;
1976 const uint8_t *hev_thr;
1977 } AV1_DEBLOCKING_PARAMETERS;
1978
1979 // Return TX_SIZE from get_transform_size(), so it is plane and direction
1980 // awared
set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS * const params,const ptrdiff_t mode_step,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,const uint32_t x,const uint32_t y,const int plane,const struct macroblockd_plane * const plane_ptr)1981 static TX_SIZE set_lpf_parameters(
1982 AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
1983 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1984 const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y,
1985 const int plane, const struct macroblockd_plane *const plane_ptr) {
1986 // reset to initial values
1987 params->filter_length = 0;
1988
1989 // no deblocking is required
1990 const uint32_t width = plane_ptr->dst.width;
1991 const uint32_t height = plane_ptr->dst.height;
1992 if ((width <= x) || (height <= y)) {
1993 // just return the smallest transform unit size
1994 return TX_4X4;
1995 }
1996
1997 const uint32_t scale_horz = plane_ptr->subsampling_x;
1998 const uint32_t scale_vert = plane_ptr->subsampling_y;
1999 // for sub8x8 block, chroma prediction mode is obtained from the bottom/right
2000 // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row
2001 // and mi_col should map to the bottom/right mi structure, i.e, both mi_row
2002 // and mi_col should be odd number for chroma plane.
2003 const int mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2);
2004 const int mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2);
2005 MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
2006 const MB_MODE_INFO *mbmi = mi[0];
2007 // If current mbmi is not correctly setup, return an invalid value to stop
2008 // filtering. One example is that if this tile is not coded, then its mbmi
2009 // it not set up.
2010 if (mbmi == NULL) return TX_INVALID;
2011
2012 const TX_SIZE ts =
2013 get_transform_size(xd, mi[0], edge_dir, mi_row, mi_col, plane, plane_ptr);
2014
2015 {
2016 const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
2017 const uint32_t transform_masks =
2018 edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
2019 const int32_t tu_edge = (coord & transform_masks) ? (0) : (1);
2020
2021 if (!tu_edge) return ts;
2022
2023 // prepare outer edge parameters. deblock the edge if it's an edge of a TU
2024 {
2025 const uint32_t curr_level =
2026 get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
2027 const int curr_skipped = mbmi->skip && is_inter_block(mbmi);
2028 uint32_t level = curr_level;
2029 if (coord) {
2030 {
2031 const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
2032 if (mi_prev == NULL) return TX_INVALID;
2033 const int pv_row =
2034 (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
2035 const int pv_col =
2036 (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
2037 const TX_SIZE pv_ts = get_transform_size(
2038 xd, mi_prev, edge_dir, pv_row, pv_col, plane, plane_ptr);
2039
2040 const uint32_t pv_lvl =
2041 get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
2042
2043 const int pv_skip = mi_prev->skip && is_inter_block(mi_prev);
2044 const BLOCK_SIZE bsize =
2045 get_plane_block_size(mbmi->sb_type, plane_ptr->subsampling_x,
2046 plane_ptr->subsampling_y);
2047 const int prediction_masks = edge_dir == VERT_EDGE
2048 ? block_size_wide[bsize] - 1
2049 : block_size_high[bsize] - 1;
2050 const int32_t pu_edge = !(coord & prediction_masks);
2051 // if the current and the previous blocks are skipped,
2052 // deblock the edge if the edge belongs to a PU's edge only.
2053 if ((curr_level || pv_lvl) &&
2054 (!pv_skip || !curr_skipped || pu_edge)) {
2055 const TX_SIZE min_ts = AOMMIN(ts, pv_ts);
2056 if (TX_4X4 >= min_ts) {
2057 params->filter_length = 4;
2058 } else if (TX_8X8 == min_ts) {
2059 if (plane != 0)
2060 params->filter_length = 6;
2061 else
2062 params->filter_length = 8;
2063 } else {
2064 params->filter_length = 14;
2065 // No wide filtering for chroma plane
2066 if (plane != 0) {
2067 params->filter_length = 6;
2068 }
2069 }
2070
2071 // update the level if the current block is skipped,
2072 // but the previous one is not
2073 level = (curr_level) ? (curr_level) : (pv_lvl);
2074 }
2075 }
2076 }
2077 // prepare common parameters
2078 if (params->filter_length) {
2079 const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
2080 params->lim = limits->lim;
2081 params->mblim = limits->mblim;
2082 params->hev_thr = limits->hev_thr;
2083 }
2084 }
2085 }
2086
2087 return ts;
2088 }
2089
av1_filter_block_plane_vert(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)2090 void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
2091 const MACROBLOCKD *const xd, const int plane,
2092 const MACROBLOCKD_PLANE *const plane_ptr,
2093 const uint32_t mi_row, const uint32_t mi_col) {
2094 const int row_step = MI_SIZE >> MI_SIZE_LOG2;
2095 const uint32_t scale_horz = plane_ptr->subsampling_x;
2096 const uint32_t scale_vert = plane_ptr->subsampling_y;
2097 uint8_t *const dst_ptr = plane_ptr->dst.buf;
2098 const int dst_stride = plane_ptr->dst.stride;
2099 const int y_range = (MAX_MIB_SIZE >> scale_vert);
2100 const int x_range = (MAX_MIB_SIZE >> scale_horz);
2101 const int use_highbitdepth = cm->seq_params.use_highbitdepth;
2102 const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
2103 for (int y = 0; y < y_range; y += row_step) {
2104 uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
2105 for (int x = 0; x < x_range;) {
2106 // inner loop always filter vertical edges in a MI block. If MI size
2107 // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
2108 // If 4x4 trasnform is used, it will then filter the internal edge
2109 // aligned with a 4x4 block
2110 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
2111 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
2112 uint32_t advance_units;
2113 TX_SIZE tx_size;
2114 AV1_DEBLOCKING_PARAMETERS params;
2115 memset(¶ms, 0, sizeof(params));
2116
2117 tx_size =
2118 set_lpf_parameters(¶ms, ((ptrdiff_t)1 << scale_horz), cm, xd,
2119 VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
2120 if (tx_size == TX_INVALID) {
2121 params.filter_length = 0;
2122 tx_size = TX_4X4;
2123 }
2124
2125 switch (params.filter_length) {
2126 // apply 4-tap filtering
2127 case 4:
2128 if (use_highbitdepth)
2129 aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride,
2130 params.mblim, params.lim, params.hev_thr,
2131 bit_depth);
2132 else
2133 aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
2134 params.hev_thr);
2135 break;
2136 case 6: // apply 6-tap filter for chroma plane only
2137 assert(plane != 0);
2138 if (use_highbitdepth)
2139 aom_highbd_lpf_vertical_6(CONVERT_TO_SHORTPTR(p), dst_stride,
2140 params.mblim, params.lim, params.hev_thr,
2141 bit_depth);
2142 else
2143 aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim,
2144 params.hev_thr);
2145 break;
2146 // apply 8-tap filtering
2147 case 8:
2148 if (use_highbitdepth)
2149 aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride,
2150 params.mblim, params.lim, params.hev_thr,
2151 bit_depth);
2152 else
2153 aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
2154 params.hev_thr);
2155 break;
2156 // apply 14-tap filtering
2157 case 14:
2158 if (use_highbitdepth)
2159 aom_highbd_lpf_vertical_14(CONVERT_TO_SHORTPTR(p), dst_stride,
2160 params.mblim, params.lim, params.hev_thr,
2161 bit_depth);
2162 else
2163 aom_lpf_vertical_14(p, dst_stride, params.mblim, params.lim,
2164 params.hev_thr);
2165 break;
2166 // no filtering
2167 default: break;
2168 }
2169 // advance the destination pointer
2170 advance_units = tx_size_wide_unit[tx_size];
2171 x += advance_units;
2172 p += advance_units * MI_SIZE;
2173 }
2174 }
2175 }
2176
av1_filter_block_plane_horz(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)2177 void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
2178 const MACROBLOCKD *const xd, const int plane,
2179 const MACROBLOCKD_PLANE *const plane_ptr,
2180 const uint32_t mi_row, const uint32_t mi_col) {
2181 const int col_step = MI_SIZE >> MI_SIZE_LOG2;
2182 const uint32_t scale_horz = plane_ptr->subsampling_x;
2183 const uint32_t scale_vert = plane_ptr->subsampling_y;
2184 uint8_t *const dst_ptr = plane_ptr->dst.buf;
2185 const int dst_stride = plane_ptr->dst.stride;
2186 const int y_range = (MAX_MIB_SIZE >> scale_vert);
2187 const int x_range = (MAX_MIB_SIZE >> scale_horz);
2188 const int use_highbitdepth = cm->seq_params.use_highbitdepth;
2189 const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
2190 for (int x = 0; x < x_range; x += col_step) {
2191 uint8_t *p = dst_ptr + x * MI_SIZE;
2192 for (int y = 0; y < y_range;) {
2193 // inner loop always filter vertical edges in a MI block. If MI size
2194 // is 8x8, it will first filter the vertical edge aligned with a 8x8
2195 // block. If 4x4 trasnform is used, it will then filter the internal
2196 // edge aligned with a 4x4 block
2197 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
2198 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
2199 uint32_t advance_units;
2200 TX_SIZE tx_size;
2201 AV1_DEBLOCKING_PARAMETERS params;
2202 memset(¶ms, 0, sizeof(params));
2203
2204 tx_size =
2205 set_lpf_parameters(¶ms, (cm->mi_stride << scale_vert), cm, xd,
2206 HORZ_EDGE, curr_x, curr_y, plane, plane_ptr);
2207 if (tx_size == TX_INVALID) {
2208 params.filter_length = 0;
2209 tx_size = TX_4X4;
2210 }
2211
2212 switch (params.filter_length) {
2213 // apply 4-tap filtering
2214 case 4:
2215 if (use_highbitdepth)
2216 aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride,
2217 params.mblim, params.lim,
2218 params.hev_thr, bit_depth);
2219 else
2220 aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
2221 params.hev_thr);
2222 break;
2223 // apply 6-tap filtering
2224 case 6:
2225 assert(plane != 0);
2226 if (use_highbitdepth)
2227 aom_highbd_lpf_horizontal_6(CONVERT_TO_SHORTPTR(p), dst_stride,
2228 params.mblim, params.lim,
2229 params.hev_thr, bit_depth);
2230 else
2231 aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim,
2232 params.hev_thr);
2233 break;
2234 // apply 8-tap filtering
2235 case 8:
2236 if (use_highbitdepth)
2237 aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride,
2238 params.mblim, params.lim,
2239 params.hev_thr, bit_depth);
2240 else
2241 aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
2242 params.hev_thr);
2243 break;
2244 // apply 14-tap filtering
2245 case 14:
2246 if (use_highbitdepth)
2247 aom_highbd_lpf_horizontal_14(CONVERT_TO_SHORTPTR(p), dst_stride,
2248 params.mblim, params.lim,
2249 params.hev_thr, bit_depth);
2250 else
2251 aom_lpf_horizontal_14(p, dst_stride, params.mblim, params.lim,
2252 params.hev_thr);
2253 break;
2254 // no filtering
2255 default: break;
2256 }
2257
2258 // advance the destination pointer
2259 advance_units = tx_size_high_unit[tx_size];
2260 y += advance_units;
2261 p += advance_units * dst_stride * MI_SIZE;
2262 }
2263 }
2264 }
2265
av1_filter_block_plane_vert_test(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)2266 void av1_filter_block_plane_vert_test(const AV1_COMMON *const cm,
2267 const MACROBLOCKD *const xd,
2268 const int plane,
2269 const MACROBLOCKD_PLANE *const plane_ptr,
2270 const uint32_t mi_row,
2271 const uint32_t mi_col) {
2272 const int row_step = MI_SIZE >> MI_SIZE_LOG2;
2273 const uint32_t scale_horz = plane_ptr->subsampling_x;
2274 const uint32_t scale_vert = plane_ptr->subsampling_y;
2275 uint8_t *const dst_ptr = plane_ptr->dst.buf;
2276 const int dst_stride = plane_ptr->dst.stride;
2277 const int y_range = cm->mi_rows >> scale_vert;
2278 const int x_range = cm->mi_cols >> scale_horz;
2279 for (int y = 0; y < y_range; y += row_step) {
2280 uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
2281 for (int x = 0; x < x_range;) {
2282 // inner loop always filter vertical edges in a MI block. If MI size
2283 // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
2284 // If 4x4 trasnform is used, it will then filter the internal edge
2285 // aligned with a 4x4 block
2286 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
2287 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
2288 uint32_t advance_units;
2289 TX_SIZE tx_size;
2290 AV1_DEBLOCKING_PARAMETERS params;
2291 memset(¶ms, 0, sizeof(params));
2292
2293 tx_size =
2294 set_lpf_parameters(¶ms, ((ptrdiff_t)1 << scale_horz), cm, xd,
2295 VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
2296 if (tx_size == TX_INVALID) {
2297 params.filter_length = 0;
2298 tx_size = TX_4X4;
2299 }
2300
2301 // advance the destination pointer
2302 advance_units = tx_size_wide_unit[tx_size];
2303 x += advance_units;
2304 p += advance_units * MI_SIZE;
2305 }
2306 }
2307 }
2308
av1_filter_block_plane_horz_test(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)2309 void av1_filter_block_plane_horz_test(const AV1_COMMON *const cm,
2310 const MACROBLOCKD *const xd,
2311 const int plane,
2312 const MACROBLOCKD_PLANE *const plane_ptr,
2313 const uint32_t mi_row,
2314 const uint32_t mi_col) {
2315 const int col_step = MI_SIZE >> MI_SIZE_LOG2;
2316 const uint32_t scale_horz = plane_ptr->subsampling_x;
2317 const uint32_t scale_vert = plane_ptr->subsampling_y;
2318 uint8_t *const dst_ptr = plane_ptr->dst.buf;
2319 const int dst_stride = plane_ptr->dst.stride;
2320 const int y_range = cm->mi_rows >> scale_vert;
2321 const int x_range = cm->mi_cols >> scale_horz;
2322 for (int x = 0; x < x_range; x += col_step) {
2323 uint8_t *p = dst_ptr + x * MI_SIZE;
2324 for (int y = 0; y < y_range;) {
2325 // inner loop always filter vertical edges in a MI block. If MI size
2326 // is 8x8, it will first filter the vertical edge aligned with a 8x8
2327 // block. If 4x4 trasnform is used, it will then filter the internal
2328 // edge aligned with a 4x4 block
2329 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
2330 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
2331 uint32_t advance_units;
2332 TX_SIZE tx_size;
2333 AV1_DEBLOCKING_PARAMETERS params;
2334 memset(¶ms, 0, sizeof(params));
2335
2336 tx_size =
2337 set_lpf_parameters(¶ms, (cm->mi_stride << scale_vert), cm, xd,
2338 HORZ_EDGE, curr_x, curr_y, plane, plane_ptr);
2339 if (tx_size == TX_INVALID) {
2340 params.filter_length = 0;
2341 tx_size = TX_4X4;
2342 }
2343
2344 // advance the destination pointer
2345 advance_units = tx_size_high_unit[tx_size];
2346 y += advance_units;
2347 p += advance_units * dst_stride * MI_SIZE;
2348 }
2349 }
2350 }
2351
loop_filter_rows(YV12_BUFFER_CONFIG * frame_buffer,AV1_COMMON * cm,MACROBLOCKD * xd,int start,int stop,int is_decoding,int plane_start,int plane_end)2352 static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
2353 MACROBLOCKD *xd, int start, int stop,
2354 #if LOOP_FILTER_BITMASK
2355 int is_decoding,
2356 #endif
2357 int plane_start, int plane_end) {
2358 struct macroblockd_plane *pd = xd->plane;
2359 const int col_start = 0;
2360 const int col_end = cm->mi_cols;
2361 int mi_row, mi_col;
2362 int plane;
2363
2364 #if LOOP_FILTER_BITMASK
2365 if (is_decoding) {
2366 cm->is_decoding = is_decoding;
2367 for (plane = plane_start; plane < plane_end; plane++) {
2368 if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
2369 break;
2370 else if (plane == 1 && !(cm->lf.filter_level_u))
2371 continue;
2372 else if (plane == 2 && !(cm->lf.filter_level_v))
2373 continue;
2374
2375 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, 0, 0,
2376 plane, plane + 1);
2377
2378 av1_build_bitmask_vert_info(cm, &pd[plane], plane);
2379 av1_build_bitmask_horz_info(cm, &pd[plane], plane);
2380
2381 // apply loop filtering which only goes through buffer once
2382 for (mi_row = start; mi_row < stop; mi_row += MI_SIZE_64X64) {
2383 for (mi_col = col_start; mi_col < col_end; mi_col += MI_SIZE_64X64) {
2384 av1_setup_dst_planes(pd, BLOCK_64X64, frame_buffer, mi_row, mi_col,
2385 plane, plane + 1);
2386 av1_filter_block_plane_bitmask_vert(cm, &pd[plane], plane, mi_row,
2387 mi_col);
2388 if (mi_col - MI_SIZE_64X64 >= 0) {
2389 av1_setup_dst_planes(pd, BLOCK_64X64, frame_buffer, mi_row,
2390 mi_col - MI_SIZE_64X64, plane, plane + 1);
2391 av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row,
2392 mi_col - MI_SIZE_64X64);
2393 }
2394 }
2395 av1_setup_dst_planes(pd, BLOCK_64X64, frame_buffer, mi_row,
2396 mi_col - MI_SIZE_64X64, plane, plane + 1);
2397 av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row,
2398 mi_col - MI_SIZE_64X64);
2399 }
2400 }
2401 return;
2402 }
2403 #endif
2404
2405 for (plane = plane_start; plane < plane_end; plane++) {
2406 if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
2407 break;
2408 else if (plane == 1 && !(cm->lf.filter_level_u))
2409 continue;
2410 else if (plane == 2 && !(cm->lf.filter_level_v))
2411 continue;
2412
2413 if (cm->lf.combine_vert_horz_lf) {
2414 // filter all vertical and horizontal edges in every 128x128 super block
2415 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
2416 for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
2417 // filter vertical edges
2418 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2419 mi_col, plane, plane + 1);
2420 av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
2421 mi_col);
2422 // filter horizontal edges
2423 if (mi_col - MAX_MIB_SIZE >= 0) {
2424 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer,
2425 mi_row, mi_col - MAX_MIB_SIZE, plane,
2426 plane + 1);
2427 av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
2428 mi_col - MAX_MIB_SIZE);
2429 }
2430 }
2431 // filter horizontal edges
2432 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2433 mi_col - MAX_MIB_SIZE, plane, plane + 1);
2434 av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
2435 mi_col - MAX_MIB_SIZE);
2436 }
2437 } else {
2438 // filter all vertical edges in every 128x128 super block
2439 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
2440 for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
2441 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2442 mi_col, plane, plane + 1);
2443 av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
2444 mi_col);
2445 }
2446 }
2447
2448 // filter all horizontal edges in every 128x128 super block
2449 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
2450 for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
2451 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2452 mi_col, plane, plane + 1);
2453 av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
2454 mi_col);
2455 }
2456 }
2457 }
2458 }
2459 }
2460
av1_loop_filter_frame(YV12_BUFFER_CONFIG * frame,AV1_COMMON * cm,MACROBLOCKD * xd,int is_decoding,int plane_start,int plane_end,int partial_frame)2461 void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
2462 MACROBLOCKD *xd,
2463 #if LOOP_FILTER_BITMASK
2464 int is_decoding,
2465 #endif
2466 int plane_start, int plane_end, int partial_frame) {
2467 int start_mi_row, end_mi_row, mi_rows_to_filter;
2468
2469 start_mi_row = 0;
2470 mi_rows_to_filter = cm->mi_rows;
2471 if (partial_frame && cm->mi_rows > 8) {
2472 start_mi_row = cm->mi_rows >> 1;
2473 start_mi_row &= 0xfffffff8;
2474 mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
2475 }
2476 end_mi_row = start_mi_row + mi_rows_to_filter;
2477 av1_loop_filter_frame_init(cm, plane_start, plane_end);
2478 loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row,
2479 #if LOOP_FILTER_BITMASK
2480 is_decoding,
2481 #endif
2482 plane_start, plane_end);
2483 }
2484