1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/mem.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/av1_loopfilter.h"
22 #include "av1/common/reconinter.h"
23 #include "av1/common/seg_common.h"
24
25 // 256 bit masks (64x64 / 4x4) for left transform size for Y plane.
26 // We use 4 uint64_t to represent the 256 bit.
27 // Each 1 represents a position where we should apply a loop filter
28 // across the left border of an 4x4 block boundary.
29 //
30 // In the case of TX_8x8-> ( in low order byte first we end up with
31 // a mask that looks like this (-- and | are used for better view)
32 //
33 // 10101010|10101010
34 // 10101010|10101010
35 // 10101010|10101010
36 // 10101010|10101010
37 // 10101010|10101010
38 // 10101010|10101010
39 // 10101010|10101010
40 // 10101010|10101010
41 // -----------------
42 // 10101010|10101010
43 // 10101010|10101010
44 // 10101010|10101010
45 // 10101010|10101010
46 // 10101010|10101010
47 // 10101010|10101010
48 // 10101010|10101010
49 // 10101010|10101010
50 //
51 // A loopfilter should be applied to every other 4x4 horizontally.
52
53 // 256 bit masks (64x64 / 4x4) for above transform size for Y plane.
54 // We use 4 uint64_t to represent the 256 bit.
55 // Each 1 represents a position where we should apply a loop filter
56 // across the top border of an 4x4 block boundary.
57 //
58 // In the case of TX_8x8-> ( in low order byte first we end up with
59 // a mask that looks like this
60 //
61 // 11111111|11111111
62 // 00000000|00000000
63 // 11111111|11111111
64 // 00000000|00000000
65 // 11111111|11111111
66 // 00000000|00000000
67 // 11111111|11111111
68 // 00000000|00000000
69 // -----------------
70 // 11111111|11111111
71 // 00000000|00000000
72 // 11111111|11111111
73 // 00000000|00000000
74 // 11111111|11111111
75 // 00000000|00000000
76 // 11111111|11111111
77 // 00000000|00000000
78 //
79 // A loopfilter should be applied to every other 4x4 horizontally.
80 #if CONFIG_LPF_MASK
81 static const int mask_id_table_tx_4x4[BLOCK_SIZES_ALL] = {
82 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1, 13, 14, 15, 16, 17, 18
83 };
84
85 static const int mask_id_table_tx_8x8[BLOCK_SIZES_ALL] = {
86 -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, 10, 11, 12, 13
87 };
88
89 static const int mask_id_table_tx_16x16[BLOCK_SIZES_ALL] = {
90 -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, -1, -1, -1, -1, -1, -1, -1, 7, 8
91 };
92
93 static const int mask_id_table_tx_32x32[BLOCK_SIZES_ALL] = {
94 -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1,
95 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1
96 };
97 static const int mask_id_table_vert_border[BLOCK_SIZES_ALL] = {
98 0, 47, 49, 19, 51, 53, 33, 55, 57, 42, 59,
99 60, 46, -1, -1, -1, 61, 62, 63, 64, 65, 66
100 };
101
102 static const FilterMask left_mask_univariant_reordered[67] = {
103 // TX_4X4
104 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
105 0x0000000000000000ULL } }, // block size 4X4, TX_4X4
106 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
107 0x0000000000000000ULL } }, // block size 4X8, TX_4X4
108 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
109 0x0000000000000000ULL } }, // block size 8X4, TX_4X4
110 { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
111 0x0000000000000000ULL } }, // block size 8X8, TX_4X4
112 { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
113 0x0000000000000000ULL } }, // block size 8X16, TX_4X4
114 { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
115 0x0000000000000000ULL } }, // block size 16X8, TX_4X4
116 { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
117 0x0000000000000000ULL } }, // block size 16X16, TX_4X4
118 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
119 0x0000000000000000ULL } }, // block size 16X32, TX_4X4
120 { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
121 0x0000000000000000ULL } }, // block size 32X16, TX_4X4
122 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
123 0x0000000000000000ULL } }, // block size 32X32, TX_4X4
124 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
125 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4
126 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
127 0x0000000000000000ULL } }, // block size 64X32, TX_4X4
128 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
129 0xffffffffffffffffULL } }, // block size 64X64, TX_4X4
130 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
131 0x0000000000000000ULL } }, // block size 4X16, TX_4X4
132 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
133 0x0000000000000000ULL } }, // block size 16X4, TX_4X4
134 { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
135 0x0000000000000000ULL } }, // block size 8X32, TX_4X4
136 { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
137 0x0000000000000000ULL } }, // block size 32X8, TX_4X4
138 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
139 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4
140 { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
141 0x0000000000000000ULL } }, // block size 64X16, TX_4X4
142 // TX_8X8
143 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
144 0x0000000000000000ULL } }, // block size 8X8, TX_8X8
145 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
146 0x0000000000000000ULL } }, // block size 8X16, TX_8X8
147 { { 0x0000000000050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
148 0x0000000000000000ULL } }, // block size 16X8, TX_8X8
149 { { 0x0005000500050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
150 0x0000000000000000ULL } }, // block size 16X16, TX_8X8
151 { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0000000000000000ULL,
152 0x0000000000000000ULL } }, // block size 16X32, TX_8X8
153 { { 0x0055005500550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
154 0x0000000000000000ULL } }, // block size 32X16, TX_8X8
155 { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0000000000000000ULL,
156 0x0000000000000000ULL } }, // block size 32X32, TX_8X8
157 { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0055005500550055ULL,
158 0x0055005500550055ULL } }, // block size 32X64, TX_8X8
159 { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x0000000000000000ULL,
160 0x0000000000000000ULL } }, // block size 64X32, TX_8X8
161 { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x5555555555555555ULL,
162 0x5555555555555555ULL } }, // block size 64X64, TX_8X8
163 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
164 0x0000000000000000ULL } }, // block size 8X32, TX_8X8
165 { { 0x0000000000550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
166 0x0000000000000000ULL } }, // block size 32X8, TX_8X8
167 { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0005000500050005ULL,
168 0x0005000500050005ULL } }, // block size 16X64, TX_8X8
169 { { 0x5555555555555555ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
170 0x0000000000000000ULL } }, // block size 64X16, TX_8X8
171 // TX_16X16
172 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
173 0x0000000000000000ULL } }, // block size 16X16, TX_16X16
174 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
175 0x0000000000000000ULL } }, // block size 16X32, TX_16X16
176 { { 0x0011001100110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
177 0x0000000000000000ULL } }, // block size 32X16, TX_16X16
178 { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0000000000000000ULL,
179 0x0000000000000000ULL } }, // block size 32X32, TX_16X16
180 { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0011001100110011ULL,
181 0x0011001100110011ULL } }, // block size 32X64, TX_16X16
182 { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x0000000000000000ULL,
183 0x0000000000000000ULL } }, // block size 64X32, TX_16X16
184 { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x1111111111111111ULL,
185 0x1111111111111111ULL } }, // block size 64X64, TX_16X16
186 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
187 0x0001000100010001ULL } }, // block size 16X64, TX_16X16
188 { { 0x1111111111111111ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
189 0x0000000000000000ULL } }, // block size 64X16, TX_16X16
190 // TX_32X32
191 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
192 0x0000000000000000ULL } }, // block size 32X32, TX_32X32
193 { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
194 0x0101010101010101ULL } }, // block size 32X64, TX_32X32
195 { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0000000000000000ULL,
196 0x0000000000000000ULL } }, // block size 64X32, TX_32X32
197 { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
198 0x0101010101010101ULL } }, // block size 64X64, TX_32X32
199 // TX_64X64
200 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
201 0x0001000100010001ULL } }, // block size 64X64, TX_64X64
202 // 2:1, 1:2 transform sizes.
203 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
204 0x0000000000000000ULL } }, // block size 4X8, TX_4X8
205 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
206 0x0000000000000000ULL } }, // block size 4X16, TX_4X8
207 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
208 0x0000000000000000ULL } }, // block size 8X4, TX_8X4
209 { { 0x0000000000000005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
210 0x0000000000000000ULL } }, // block size 16X4, TX_8X4
211 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
212 0x0000000000000000ULL } }, // block size 8X16, TX_8X16
213 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
214 0x0000000000000000ULL } }, // block size 8X32, TX_8X16
215 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
216 0x0000000000000000ULL } }, // block size 16X8, TX_16X8
217 { { 0x0000000000110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
218 0x0000000000000000ULL } }, // block size 32X8, TX_16X8
219 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
220 0x0000000000000000ULL } }, // block size 16X32, TX_16X32
221 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
222 0x0001000100010001ULL } }, // block size 16X64, TX_16X32
223 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
224 0x0000000000000000ULL } }, // block size 32X16, TX_32X16
225 { { 0x0101010101010101ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
226 0x0000000000000000ULL } }, // block size 64X16, TX_32X16
227 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
228 0x0001000100010001ULL } }, // block size 32X64, TX_32X64
229 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
230 0x0000000000000000ULL } }, // block size 64X32, TX_64X32
231 // 4:1, 1:4 transform sizes.
232 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
233 0x0000000000000000ULL } }, // block size 4X16, TX_4X16
234 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
235 0x0000000000000000ULL } }, // block size 16X4, TX_16X4
236 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
237 0x0000000000000000ULL } }, // block size 8X32, TX_8X32
238 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
239 0x0000000000000000ULL } }, // block size 32X8, TX_32X8
240 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
241 0x0001000100010001ULL } }, // block size 16X64, TX_16X64
242 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
243 0x0000000000000000ULL } }, // block size 64X16, TX_64X16
244 };
245
246 static const FilterMask above_mask_univariant_reordered[67] = {
247 // TX_4X4
248 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
249 0x0000000000000000ULL } }, // block size 4X4, TX_4X4
250 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
251 0x0000000000000000ULL } }, // block size 4X8, TX_4X4
252 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
253 0x0000000000000000ULL } }, // block size 8X4, TX_4X4
254 { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
255 0x0000000000000000ULL } }, // block size 8X8, TX_4X4
256 { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
257 0x0000000000000000ULL } }, // block size 8X16, TX_4X4
258 { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
259 0x0000000000000000ULL } }, // block size 16X8, TX_4X4
260 { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
261 0x0000000000000000ULL } }, // block size 16X16, TX_4X4
262 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
263 0x0000000000000000ULL } }, // block size 16X32, TX_4X4
264 { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
265 0x0000000000000000ULL } }, // block size 32X16, TX_4X4
266 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
267 0x0000000000000000ULL } }, // block size 32X32, TX_4X4
268 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
269 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4
270 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
271 0x0000000000000000ULL } }, // block size 64X32, TX_4X4
272 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
273 0xffffffffffffffffULL } }, // block size 64X64, TX_4x4
274 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
275 0x0000000000000000ULL } }, // block size 4X16, TX_4X4
276 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
277 0x0000000000000000ULL } }, // block size 16X4, TX_4X4
278 { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
279 0x0000000000000000ULL } }, // block size 8X32, TX_4X4
280 { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
281 0x0000000000000000ULL } }, // block size 32X8, TX_4X4
282 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
283 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4
284 { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
285 0x0000000000000000ULL } }, // block size 64X16, TX_4X4
286 // TX_8X8
287 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
288 0x0000000000000000ULL } }, // block size 8X8, TX_8X8
289 { { 0x0000000300000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
290 0x0000000000000000ULL } }, // block size 8X16, TX_8X8
291 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
292 0x0000000000000000ULL } }, // block size 16X8, TX_8X8
293 { { 0x0000000f0000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
294 0x0000000000000000ULL } }, // block size 16X16, TX_8X8
295 { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000000000000ULL,
296 0x0000000000000000ULL } }, // block size 16X32, TX_8X8
297 { { 0x000000ff000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
298 0x0000000000000000ULL } }, // block size 32X16, TX_8X8
299 { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x0000000000000000ULL,
300 0x0000000000000000ULL } }, // block size 32X32, TX_8X8
301 { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x000000ff000000ffULL,
302 0x000000ff000000ffULL } }, // block size 32X64, TX_8X8
303 { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000000000000000ULL,
304 0x0000000000000000ULL } }, // block size 64X32, TX_8X8
305 { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
306 0x0000ffff0000ffffULL } }, // block size 64X64, TX_8X8
307 { { 0x0000000300000003ULL, 0x0000000300000003ULL, 0x0000000000000000ULL,
308 0x0000000000000000ULL } }, // block size 8X32, TX_8X8
309 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
310 0x0000000000000000ULL } }, // block size 32X8, TX_8X8
311 { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000f0000000fULL,
312 0x0000000f0000000fULL } }, // block size 16X64, TX_8X8
313 { { 0x0000ffff0000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
314 0x0000000000000000ULL } }, // block size 64X16, TX_8X8
315 // TX_16X16
316 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
317 0x0000000000000000ULL } }, // block size 16X16, TX_16X16
318 { { 0x000000000000000fULL, 0x000000000000000fULL, 0x0000000000000000ULL,
319 0x0000000000000000ULL } }, // block size 16X32, TX_16X16
320 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
321 0x0000000000000000ULL } }, // block size 32X16, TX_16X16
322 { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x0000000000000000ULL,
323 0x0000000000000000ULL } }, // block size 32X32, TX_16X16
324 { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x00000000000000ffULL,
325 0x00000000000000ffULL } }, // block size 32X64, TX_16X16
326 { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x0000000000000000ULL,
327 0x0000000000000000ULL } }, // block size 64X32, TX_16X16
328 { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x000000000000ffffULL,
329 0x000000000000ffffULL } }, // block size 64X64, TX_16X16
330 { { 0x000000000000000fULL, 0x000000000000000fULL, 0x000000000000000fULL,
331 0x000000000000000fULL } }, // block size 16X64, TX_16X16
332 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
333 0x0000000000000000ULL } }, // block size 64X16, TX_16X16
334 // TX_32X32
335 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
336 0x0000000000000000ULL } }, // block size 32X32, TX_32X32
337 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x00000000000000ffULL,
338 0x0000000000000000ULL } }, // block size 32X64, TX_32X32
339 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
340 0x0000000000000000ULL } }, // block size 64X32, TX_32X32
341 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x000000000000ffffULL,
342 0x0000000000000000ULL } }, // block size 64X64, TX_32X32
343 // TX_64X64
344 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
345 0x0000000000000000ULL } }, // block size 64X64, TX_64X64
346 // 2:1, 1:2 transform sizes.
347 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
348 0x0000000000000000ULL } }, // block size 4X8, TX_4X8
349 { { 0x0000000100000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
350 0x0000000000000000ULL } }, // block size 4X16, TX_4X8
351 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
352 0x0000000000000000ULL } }, // block size 8X4, TX_8X4
353 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
354 0x0000000000000000ULL } }, // block size 16X4, TX_8X4
355 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
356 0x0000000000000000ULL } }, // block size 8X16, TX_8X16
357 { { 0x0000000000000003ULL, 0x0000000000000003ULL, 0x0000000000000000ULL,
358 0x0000000000000000ULL } }, // block size 8X32, TX_8X16
359 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
360 0x0000000000000000ULL } }, // block size 16X8, TX_16X8
361 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
362 0x0000000000000000ULL } }, // block size 32X8, TX_16X8
363 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
364 0x0000000000000000ULL } }, // block size 16X32, TX_16X32
365 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x000000000000000fULL,
366 0x0000000000000000ULL } }, // block size 16X64, TX_16X32
367 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
368 0x0000000000000000ULL } }, // block size 32X16, TX_32X16
369 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
370 0x0000000000000000ULL } }, // block size 64X16, TX_32X16
371 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
372 0x0000000000000000ULL } }, // block size 32X64, TX_32X64
373 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
374 0x0000000000000000ULL } }, // block size 64X32, TX_64X32
375 // 4:1, 1:4 transform sizes.
376 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
377 0x0000000000000000ULL } }, // block size 4X16, TX_4X16
378 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
379 0x0000000000000000ULL } }, // block size 16X4, TX_16X4
380 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
381 0x0000000000000000ULL } }, // block size 8X32, TX_8X32
382 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
383 0x0000000000000000ULL } }, // block size 32X8, TX_32X8
384 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
385 0x0000000000000000ULL } }, // block size 16X64, TX_16X64
386 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
387 0x0000000000000000ULL } }, // block size 64X16, TX_64X16
388 };
389
get_loop_filter_mask(const AV1_COMMON * const cm,int mi_row,int mi_col)390 static LoopFilterMask *get_loop_filter_mask(const AV1_COMMON *const cm,
391 int mi_row, int mi_col) {
392 assert(cm->lf.lfm != NULL);
393 const int row = mi_row >> MIN_MIB_SIZE_LOG2; // 64x64
394 const int col = mi_col >> MIN_MIB_SIZE_LOG2;
395 return &cm->lf.lfm[row * cm->lf.lfm_stride + col];
396 }
397
398 typedef void (*LpfFunc)(uint8_t *s, int p, const uint8_t *blimit,
399 const uint8_t *limit, const uint8_t *thresh);
400
401 typedef void (*LpfDualFunc)(uint8_t *s, int p, const uint8_t *blimit0,
402 const uint8_t *limit0, const uint8_t *thresh0,
403 const uint8_t *blimit1, const uint8_t *limit1,
404 const uint8_t *thresh1);
405
406 typedef void (*HbdLpfFunc)(uint16_t *s, int p, const uint8_t *blimit,
407 const uint8_t *limit, const uint8_t *thresh, int bd);
408
409 typedef void (*HbdLpfDualFunc)(uint16_t *s, int p, const uint8_t *blimit0,
410 const uint8_t *limit0, const uint8_t *thresh0,
411 const uint8_t *blimit1, const uint8_t *limit1,
412 const uint8_t *thresh1, int bd);
413 // A 64x64 tx block requires 256 bits to represent each 4x4 tx block.
414 // Every 4 rows is represented by one uint64_t mask. Hence,
415 // there are 4 uint64_t bitmask[4] to represent the 64x64 block.
416 //
417 // Given a location by (mi_col, mi_row), This function returns the index
418 // 0, 1, 2, 3 to select which bitmask[] to use, and the shift value.
419 //
420 // For example, mi_row is the offset of pixels in mi size (4),
421 // (mi_row / 4) returns which uint64_t.
422 // After locating which uint64_t, mi_row % 4 is the
423 // row offset, and each row has 16 = 1 << stride_log2 4x4 units.
424 // Therefore, shift = (row << stride_log2) + mi_col;
get_index_shift(int mi_col,int mi_row,int * index)425 int get_index_shift(int mi_col, int mi_row, int *index) {
426 // *index = mi_row >> 2;
427 // rows = mi_row % 4;
428 // stride_log2 = 4;
429 // shift = (rows << stride_log2) + mi_col;
430 *index = mi_row >> 2;
431 return ((mi_row & 3) << 4) | mi_col;
432 }
433
filter_selectively_vert_row2(int subsampling_factor,uint8_t * s,int pitch,int plane,uint64_t mask_16x16_0,uint64_t mask_8x8_0,uint64_t mask_4x4_0,uint64_t mask_16x16_1,uint64_t mask_8x8_1,uint64_t mask_4x4_1,const loop_filter_info_n * lfi_n,uint8_t * lfl,uint8_t * lfl2)434 static void filter_selectively_vert_row2(
435 int subsampling_factor, uint8_t *s, int pitch, int plane,
436 uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
437 uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
438 const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2) {
439 uint64_t mask;
440 const int step = 1 << subsampling_factor;
441
442 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
443 mask_8x8_1 | mask_4x4_1;
444 mask; mask >>= step) {
445 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
446 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
447
448 if (mask & 1) {
449 if ((mask_16x16_0 | mask_16x16_1) & 1) {
450 // chroma plane filters less pixels introduced in deblock_13tap
451 // experiment
452 LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_14;
453
454 if ((mask_16x16_0 & mask_16x16_1) & 1) {
455 if (plane) {
456 aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
457 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
458 lfi1->hev_thr);
459 } else {
460 aom_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
461 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
462 lfi1->hev_thr);
463 }
464 } else if (mask_16x16_0 & 1) {
465 lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
466 } else {
467 lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
468 lfi1->hev_thr);
469 }
470 }
471
472 if ((mask_8x8_0 | mask_8x8_1) & 1) {
473 // chroma plane filters less pixels introduced in deblock_13tap
474 // experiment
475 LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_8;
476
477 if ((mask_8x8_0 & mask_8x8_1) & 1) {
478 if (plane) {
479 aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
480 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
481 lfi1->hev_thr);
482 } else {
483 aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
484 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
485 lfi1->hev_thr);
486 }
487 } else if (mask_8x8_0 & 1) {
488 lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
489 } else {
490 lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
491 lfi1->hev_thr);
492 }
493 }
494
495 if ((mask_4x4_0 | mask_4x4_1) & 1) {
496 if ((mask_4x4_0 & mask_4x4_1) & 1) {
497 aom_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
498 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
499 lfi1->hev_thr);
500 } else if (mask_4x4_0 & 1) {
501 aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
502 } else {
503 aom_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
504 lfi1->hev_thr);
505 }
506 }
507 }
508
509 s += 4;
510 lfl += step;
511 lfl2 += step;
512 mask_16x16_0 >>= step;
513 mask_8x8_0 >>= step;
514 mask_4x4_0 >>= step;
515 mask_16x16_1 >>= step;
516 mask_8x8_1 >>= step;
517 mask_4x4_1 >>= step;
518 }
519 }
520
521 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_selectively_vert_row2(int subsampling_factor,uint16_t * s,int pitch,int plane,uint64_t mask_16x16_0,uint64_t mask_8x8_0,uint64_t mask_4x4_0,uint64_t mask_16x16_1,uint64_t mask_8x8_1,uint64_t mask_4x4_1,const loop_filter_info_n * lfi_n,uint8_t * lfl,uint8_t * lfl2,int bd)522 static void highbd_filter_selectively_vert_row2(
523 int subsampling_factor, uint16_t *s, int pitch, int plane,
524 uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
525 uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
526 const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2, int bd) {
527 uint64_t mask;
528 const int step = 1 << subsampling_factor;
529
530 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
531 mask_8x8_1 | mask_4x4_1;
532 mask; mask >>= step) {
533 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
534 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
535
536 if (mask & 1) {
537 if ((mask_16x16_0 | mask_16x16_1) & 1) {
538 // chroma plane filters less pixels introduced in deblock_13tap
539 // experiment
540 HbdLpfFunc highbd_lpf_vertical =
541 plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_14;
542
543 if ((mask_16x16_0 & mask_16x16_1) & 1) {
544 if (plane) {
545 aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
546 lfi0->hev_thr, lfi1->mblim,
547 lfi1->lim, lfi1->hev_thr, bd);
548 } else {
549 aom_highbd_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
550 lfi0->hev_thr, lfi1->mblim,
551 lfi1->lim, lfi1->hev_thr, bd);
552 }
553 } else if (mask_16x16_0 & 1) {
554 highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
555 bd);
556 } else {
557 highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
558 lfi1->hev_thr, bd);
559 }
560 }
561
562 if ((mask_8x8_0 | mask_8x8_1) & 1) {
563 HbdLpfFunc highbd_lpf_vertical =
564 plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_8;
565
566 if ((mask_8x8_0 & mask_8x8_1) & 1) {
567 if (plane) {
568 aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
569 lfi0->hev_thr, lfi1->mblim,
570 lfi1->lim, lfi1->hev_thr, bd);
571 } else {
572 aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
573 lfi0->hev_thr, lfi1->mblim,
574 lfi1->lim, lfi1->hev_thr, bd);
575 }
576 } else if (mask_8x8_0 & 1) {
577 highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
578 bd);
579 } else {
580 highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
581 lfi1->hev_thr, bd);
582 }
583 }
584
585 if ((mask_4x4_0 | mask_4x4_1) & 1) {
586 if ((mask_4x4_0 & mask_4x4_1) & 1) {
587 aom_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
588 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
589 lfi1->hev_thr, bd);
590 } else if (mask_4x4_0 & 1) {
591 aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
592 lfi0->hev_thr, bd);
593 } else {
594 aom_highbd_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim,
595 lfi1->lim, lfi1->hev_thr, bd);
596 }
597 }
598 }
599
600 s += 4;
601 lfl += step;
602 lfl2 += step;
603 mask_16x16_0 >>= step;
604 mask_8x8_0 >>= step;
605 mask_4x4_0 >>= step;
606 mask_16x16_1 >>= step;
607 mask_8x8_1 >>= step;
608 mask_4x4_1 >>= step;
609 }
610 }
611 #endif // CONFIG_AV1_HIGHBITDEPTH
612
filter_selectively_horiz(uint8_t * s,int pitch,int plane,int subsampling,uint64_t mask_16x16,uint64_t mask_8x8,uint64_t mask_4x4,const loop_filter_info_n * lfi_n,const uint8_t * lfl)613 static void filter_selectively_horiz(uint8_t *s, int pitch, int plane,
614 int subsampling, uint64_t mask_16x16,
615 uint64_t mask_8x8, uint64_t mask_4x4,
616 const loop_filter_info_n *lfi_n,
617 const uint8_t *lfl) {
618 uint64_t mask;
619 int count;
620 const int step = 1 << subsampling;
621 const unsigned int two_block_mask = subsampling ? 5 : 3;
622 int offset = 0;
623
624 for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
625 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
626 // Next block's thresholds, when it is within current 64x64 block.
627 // If it is out of bound, its mask is zero, and it points to current edge's
628 // filter parameters, instead of next edge's.
629 int next_edge = step;
630 if (offset + next_edge >= MI_SIZE_64X64) next_edge = 0;
631 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + next_edge);
632
633 count = 1;
634 if (mask & 1) {
635 if (mask_16x16 & 1) {
636 // chroma plane filters less pixels introduced in deblock_13tap
637 // experiment
638 LpfFunc lpf_horizontal =
639 plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_14;
640
641 if ((mask_16x16 & two_block_mask) == two_block_mask) {
642 if (plane) {
643 aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
644 lfi->hev_thr, lfin->mblim, lfin->lim,
645 lfin->hev_thr);
646 } else {
647 aom_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
648 lfi->hev_thr, lfin->mblim, lfin->lim,
649 lfin->hev_thr);
650 }
651 count = 2;
652 } else {
653 lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
654 }
655 } else if (mask_8x8 & 1) {
656 // chroma plane filters less pixels introduced in deblock_13tap
657 // experiment
658 LpfFunc lpf_horizontal =
659 plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_8;
660
661 if ((mask_8x8 & two_block_mask) == two_block_mask) {
662 if (plane) {
663 aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
664 lfi->hev_thr, lfin->mblim, lfin->lim,
665 lfin->hev_thr);
666 } else {
667 aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
668 lfi->hev_thr, lfin->mblim, lfin->lim,
669 lfin->hev_thr);
670 }
671 count = 2;
672 } else {
673 lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
674 }
675 } else if (mask_4x4 & 1) {
676 if ((mask_4x4 & two_block_mask) == two_block_mask) {
677 aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
678 lfi->hev_thr, lfin->mblim, lfin->lim,
679 lfin->hev_thr);
680 count = 2;
681 } else {
682 aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
683 }
684 }
685 }
686
687 s += 4 * count;
688 lfl += step * count;
689 mask_16x16 >>= step * count;
690 mask_8x8 >>= step * count;
691 mask_4x4 >>= step * count;
692 offset += step * count;
693 }
694 }
695
696 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_selectively_horiz(uint16_t * s,int pitch,int plane,int subsampling,uint64_t mask_16x16,uint64_t mask_8x8,uint64_t mask_4x4,const loop_filter_info_n * lfi_n,uint8_t * lfl,int bd)697 static void highbd_filter_selectively_horiz(
698 uint16_t *s, int pitch, int plane, int subsampling, uint64_t mask_16x16,
699 uint64_t mask_8x8, uint64_t mask_4x4, const loop_filter_info_n *lfi_n,
700 uint8_t *lfl, int bd) {
701 uint64_t mask;
702 int count;
703 const int step = 1 << subsampling;
704 const unsigned int two_block_mask = subsampling ? 5 : 3;
705 int offset = 0;
706
707 for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
708 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
709 // Next block's thresholds, when it is within current 64x64 block.
710 // If it is out of bound, its mask is zero, and it points to current edge's
711 // filter parameters, instead of next edge's.
712 int next_edge = step;
713 if (offset + next_edge >= MI_SIZE_64X64) next_edge = 0;
714 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + next_edge);
715
716 count = 1;
717 if (mask & 1) {
718 if (mask_16x16 & 1) {
719 HbdLpfFunc highbd_lpf_horizontal =
720 plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_14;
721
722 if ((mask_16x16 & two_block_mask) == two_block_mask) {
723 if (plane) {
724 aom_highbd_lpf_horizontal_6_dual_c(s, pitch, lfi->mblim, lfi->lim,
725 lfi->hev_thr, lfin->mblim,
726 lfin->lim, lfin->hev_thr, bd);
727 } else {
728 aom_highbd_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
729 lfi->hev_thr, lfin->mblim,
730 lfin->lim, lfin->hev_thr, bd);
731 }
732 count = 2;
733 } else {
734 highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
735 bd);
736 }
737 } else if (mask_8x8 & 1) {
738 HbdLpfFunc highbd_lpf_horizontal =
739 plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_8;
740
741 if ((mask_8x8 & two_block_mask) == two_block_mask) {
742 if (plane) {
743 aom_highbd_lpf_horizontal_6_dual_c(s, pitch, lfi->mblim, lfi->lim,
744 lfi->hev_thr, lfin->mblim,
745 lfin->lim, lfin->hev_thr, bd);
746 } else {
747 aom_highbd_lpf_horizontal_8_dual_c(s, pitch, lfi->mblim, lfi->lim,
748 lfi->hev_thr, lfin->mblim,
749 lfin->lim, lfin->hev_thr, bd);
750 }
751 count = 2;
752 } else {
753 highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
754 bd);
755 }
756 } else if (mask_4x4 & 1) {
757 if ((mask_4x4 & two_block_mask) == two_block_mask) {
758 aom_highbd_lpf_horizontal_4_dual_c(s, pitch, lfi->mblim, lfi->lim,
759 lfi->hev_thr, lfin->mblim,
760 lfin->lim, lfin->hev_thr, bd);
761 count = 2;
762 } else {
763 aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
764 lfi->hev_thr, bd);
765 }
766 }
767 }
768
769 s += 4 * count;
770 lfl += step * count;
771 mask_16x16 >>= step * count;
772 mask_8x8 >>= step * count;
773 mask_4x4 >>= step * count;
774 offset += step * count;
775 }
776 }
777 #endif // CONFIG_AV1_HIGHBITDEPTH
778
av1_build_bitmask_vert_info(AV1_COMMON * const cm,const struct macroblockd_plane * const plane_ptr,int plane)779 void av1_build_bitmask_vert_info(
780 AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
781 int plane) {
782 const int subsampling_x = plane_ptr->subsampling_x;
783 const int subsampling_y = plane_ptr->subsampling_y;
784 const int is_uv = plane > 0;
785 TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
786 uint8_t level, prev_level = 1;
787 uint64_t skip, prev_skip = 0;
788 uint64_t is_coding_block_border;
789
790 for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height; r++) {
791 const int mi_row = r << subsampling_y;
792 const int row = mi_row % MI_SIZE_64X64;
793 const int row_uv = row | subsampling_y;
794 int index = 0;
795 const int shift = get_index_shift(0, row, &index);
796
797 for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width;
798 c += (tx_size_wide_unit[TX_64X64] >> subsampling_x)) {
799 const int mi_col = c << subsampling_x;
800 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
801
802 for (int col_in_unit = 0;
803 col_in_unit < (tx_size_wide_unit[TX_64X64] >> subsampling_x);) {
804 const int x = (c + col_in_unit) << MI_SIZE_LOG2;
805 if (x >= plane_ptr->dst.width) break;
806 const int col = col_in_unit << subsampling_x;
807 const int col_uv = col | subsampling_x;
808 const uint64_t mask = ((uint64_t)1 << (shift | col));
809 skip = lfm->skip.bits[index] & mask;
810 is_coding_block_border = lfm->is_vert_border.bits[index] & mask;
811 switch (plane) {
812 case 0: level = lfm->lfl_y_ver[row_uv][col_uv]; break;
813 case 1: level = lfm->lfl_u_ver[row_uv][col_uv]; break;
814 case 2: level = lfm->lfl_v_ver[row_uv][col_uv]; break;
815 default: assert(plane >= 0 && plane <= 2); return;
816 }
817 for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
818 if (is_uv && ts == TX_64X64) continue;
819 if (lfm->tx_size_ver[is_uv][ts].bits[index] & mask) {
820 tx_size = ts;
821 break;
822 }
823 }
824 if ((c + col_in_unit > 0) && (level || prev_level) &&
825 (!prev_skip || !skip || is_coding_block_border)) {
826 const TX_SIZE min_tx_size =
827 AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
828 const int shift_1 = get_index_shift(col_uv, row_uv, &index);
829 const uint64_t mask_1 = ((uint64_t)1 << shift_1);
830 switch (plane) {
831 case 0: lfm->left_y[min_tx_size].bits[index] |= mask_1; break;
832 case 1: lfm->left_u[min_tx_size].bits[index] |= mask_1; break;
833 case 2: lfm->left_v[min_tx_size].bits[index] |= mask_1; break;
834 default: assert(plane >= 0 && plane <= 2); return;
835 }
836 if (level == 0 && prev_level != 0) {
837 switch (plane) {
838 case 0: lfm->lfl_y_ver[row_uv][col_uv] = prev_level; break;
839 case 1: lfm->lfl_u_ver[row_uv][col_uv] = prev_level; break;
840 case 2: lfm->lfl_v_ver[row_uv][col_uv] = prev_level; break;
841 default: assert(plane >= 0 && plane <= 2); return;
842 }
843 }
844 }
845
846 // update prev info
847 prev_level = level;
848 prev_skip = skip;
849 prev_tx_size = tx_size;
850 // advance
851 col_in_unit += tx_size_wide_unit[tx_size];
852 }
853 }
854 }
855 }
856
av1_build_bitmask_horz_info(AV1_COMMON * const cm,const struct macroblockd_plane * const plane_ptr,int plane)857 void av1_build_bitmask_horz_info(
858 AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
859 int plane) {
860 const int subsampling_x = plane_ptr->subsampling_x;
861 const int subsampling_y = plane_ptr->subsampling_y;
862 const int is_uv = plane > 0;
863 TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
864 uint8_t level, prev_level = 1;
865 uint64_t skip, prev_skip = 0;
866 uint64_t is_coding_block_border;
867
868 for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width; c++) {
869 const int mi_col = c << subsampling_x;
870 const int col = mi_col % MI_SIZE_64X64;
871 const int col_uv = col | subsampling_x;
872
873 for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height;
874 r += (tx_size_high_unit[TX_64X64] >> subsampling_y)) {
875 const int mi_row = r << subsampling_y;
876 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
877
878 for (int r_in_unit = 0;
879 r_in_unit < (tx_size_high_unit[TX_64X64] >> subsampling_y);) {
880 const int y = (r + r_in_unit) << MI_SIZE_LOG2;
881 if (y >= plane_ptr->dst.height) break;
882 const int row = r_in_unit << subsampling_y;
883 const int row_uv = row | subsampling_y;
884 int index = 0;
885 const int shift = get_index_shift(col, row, &index);
886 const uint64_t mask = ((uint64_t)1 << shift);
887 skip = lfm->skip.bits[index] & mask;
888 is_coding_block_border = lfm->is_horz_border.bits[index] & mask;
889 switch (plane) {
890 case 0: level = lfm->lfl_y_hor[row_uv][col_uv]; break;
891 case 1: level = lfm->lfl_u_hor[row_uv][col_uv]; break;
892 case 2: level = lfm->lfl_v_hor[row_uv][col_uv]; break;
893 default: assert(plane >= 0 && plane <= 2); return;
894 }
895 for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
896 if (is_uv && ts == TX_64X64) continue;
897 if (lfm->tx_size_hor[is_uv][ts].bits[index] & mask) {
898 tx_size = ts;
899 break;
900 }
901 }
902 if ((r + r_in_unit > 0) && (level || prev_level) &&
903 (!prev_skip || !skip || is_coding_block_border)) {
904 const TX_SIZE min_tx_size =
905 AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
906 const int shift_1 = get_index_shift(col_uv, row_uv, &index);
907 const uint64_t mask_1 = ((uint64_t)1 << shift_1);
908
909 switch (plane) {
910 case 0: lfm->above_y[min_tx_size].bits[index] |= mask_1; break;
911 case 1: lfm->above_u[min_tx_size].bits[index] |= mask_1; break;
912 case 2: lfm->above_v[min_tx_size].bits[index] |= mask_1; break;
913 default: assert(plane >= 0 && plane <= 2); return;
914 }
915 if (level == 0 && prev_level != 0) {
916 switch (plane) {
917 case 0: lfm->lfl_y_hor[row_uv][col_uv] = prev_level; break;
918 case 1: lfm->lfl_u_hor[row_uv][col_uv] = prev_level; break;
919 case 2: lfm->lfl_v_hor[row_uv][col_uv] = prev_level; break;
920 default: assert(plane >= 0 && plane <= 2); return;
921 }
922 }
923 }
924
925 // update prev info
926 prev_level = level;
927 prev_skip = skip;
928 prev_tx_size = tx_size;
929 // advance
930 r_in_unit += tx_size_high_unit[tx_size];
931 }
932 }
933 }
934 }
935
av1_filter_block_plane_bitmask_vert(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)936 void av1_filter_block_plane_bitmask_vert(
937 AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
938 int mi_row, int mi_col) {
939 struct buf_2d *const dst = &plane_ptr->dst;
940 uint8_t *const buf0 = dst->buf;
941 const int ssx = plane_ptr->subsampling_x;
942 const int ssy = plane_ptr->subsampling_y;
943 const int mask_cutoff = 0xffff;
944 const int row_step = 1 << ssy;
945 const int two_row_step = 2 << ssy;
946 const int row_stride = dst->stride << MI_SIZE_LOG2;
947 const int two_row_stride = row_stride << 1;
948 uint64_t mask_16x16 = 0;
949 uint64_t mask_8x8 = 0;
950 uint64_t mask_4x4 = 0;
951 uint8_t *lfl;
952 uint8_t *lfl2;
953 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
954 assert(lfm);
955
956 // 1. vertical filtering. filter two rows at a time
957 for (int r = 0;
958 ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
959 r += two_row_step) {
960 const int row = r | ssy;
961 const int row_next = row + row_step;
962 const int col = ssx;
963 int index = 0;
964 const int shift = get_index_shift(col, row, &index);
965 int index_next = 0;
966 const int shift_next = get_index_shift(col, row_next, &index_next);
967 const int has_next_row = row_next < cm->mi_params.mi_rows;
968 switch (pl) {
969 case 0:
970 mask_16x16 = lfm->left_y[TX_16X16].bits[index];
971 mask_8x8 = lfm->left_y[TX_8X8].bits[index];
972 mask_4x4 = lfm->left_y[TX_4X4].bits[index];
973 lfl = &lfm->lfl_y_ver[row][col];
974 lfl2 = &lfm->lfl_y_ver[row_next][col];
975 break;
976 case 1:
977 mask_16x16 = lfm->left_u[TX_16X16].bits[index];
978 mask_8x8 = lfm->left_u[TX_8X8].bits[index];
979 mask_4x4 = lfm->left_u[TX_4X4].bits[index];
980 lfl = &lfm->lfl_u_ver[row][col];
981 lfl2 = &lfm->lfl_u_ver[row_next][col];
982 break;
983 case 2:
984 mask_16x16 = lfm->left_v[TX_16X16].bits[index];
985 mask_8x8 = lfm->left_v[TX_8X8].bits[index];
986 mask_4x4 = lfm->left_v[TX_4X4].bits[index];
987 lfl = &lfm->lfl_v_ver[row][col];
988 lfl2 = &lfm->lfl_v_ver[row_next][col];
989 break;
990 default: assert(pl >= 0 && pl <= 2); return;
991 }
992 uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
993 uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
994 uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
995 uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
996 uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
997 uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
998 if (!has_next_row) {
999 mask_16x16_1 = 0;
1000 mask_8x8_1 = 0;
1001 mask_4x4_1 = 0;
1002 }
1003
1004 #if CONFIG_AV1_HIGHBITDEPTH
1005 if (cm->seq_params.use_highbitdepth)
1006 highbd_filter_selectively_vert_row2(
1007 ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
1008 mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
1009 &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
1010 else
1011 filter_selectively_vert_row2(
1012 ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
1013 mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
1014 #else
1015 filter_selectively_vert_row2(
1016 ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
1017 mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
1018 #endif
1019 dst->buf += two_row_stride;
1020 }
1021 // reset buf pointer for horizontal filtering
1022 dst->buf = buf0;
1023 }
1024
av1_filter_block_plane_bitmask_horz(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1025 void av1_filter_block_plane_bitmask_horz(
1026 AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
1027 int mi_row, int mi_col) {
1028 struct buf_2d *const dst = &plane_ptr->dst;
1029 uint8_t *const buf0 = dst->buf;
1030 const int ssx = plane_ptr->subsampling_x;
1031 const int ssy = plane_ptr->subsampling_y;
1032 const int mask_cutoff = 0xffff;
1033 const int row_step = 1 << ssy;
1034 const int row_stride = dst->stride << MI_SIZE_LOG2;
1035 uint64_t mask_16x16 = 0;
1036 uint64_t mask_8x8 = 0;
1037 uint64_t mask_4x4 = 0;
1038 uint8_t *lfl;
1039 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1040 assert(lfm);
1041 for (int r = 0;
1042 ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
1043 r += row_step) {
1044 if (mi_row + r == 0) {
1045 dst->buf += row_stride;
1046 continue;
1047 }
1048 const int row = r | ssy;
1049 const int col = ssx;
1050 int index = 0;
1051 const int shift = get_index_shift(col, row, &index);
1052 switch (pl) {
1053 case 0:
1054 mask_16x16 = lfm->above_y[TX_16X16].bits[index];
1055 mask_8x8 = lfm->above_y[TX_8X8].bits[index];
1056 mask_4x4 = lfm->above_y[TX_4X4].bits[index];
1057 lfl = &lfm->lfl_y_hor[row][col];
1058 break;
1059 case 1:
1060 mask_16x16 = lfm->above_u[TX_16X16].bits[index];
1061 mask_8x8 = lfm->above_u[TX_8X8].bits[index];
1062 mask_4x4 = lfm->above_u[TX_4X4].bits[index];
1063 lfl = &lfm->lfl_u_hor[row][col];
1064 break;
1065 case 2:
1066 mask_16x16 = lfm->above_v[TX_16X16].bits[index];
1067 mask_8x8 = lfm->above_v[TX_8X8].bits[index];
1068 mask_4x4 = lfm->above_v[TX_4X4].bits[index];
1069 lfl = &lfm->lfl_v_hor[row][col];
1070 break;
1071 default: assert(pl >= 0 && pl <= 2); return;
1072 }
1073 mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
1074 mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
1075 mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
1076
1077 #if CONFIG_AV1_HIGHBITDEPTH
1078 if (cm->seq_params.use_highbitdepth)
1079 highbd_filter_selectively_horiz(
1080 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
1081 mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->seq_params.bit_depth);
1082 else
1083 filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1084 mask_8x8, mask_4x4, &cm->lf_info, lfl);
1085 #else
1086 filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1087 mask_8x8, mask_4x4, &cm->lf_info, lfl);
1088 #endif
1089 dst->buf += row_stride;
1090 }
1091 // reset buf pointer for next block
1092 dst->buf = buf0;
1093 }
1094
av1_filter_block_plane_ver(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1095 void av1_filter_block_plane_ver(AV1_COMMON *const cm,
1096 struct macroblockd_plane *const plane_ptr,
1097 int pl, int mi_row, int mi_col) {
1098 struct buf_2d *const dst = &plane_ptr->dst;
1099 int r, c;
1100 const int ssx = plane_ptr->subsampling_x;
1101 const int ssy = plane_ptr->subsampling_y;
1102 const int mask_cutoff = 0xffff;
1103 const int single_step = 1 << ssy;
1104 const int r_step = 2 << ssy;
1105 uint64_t mask_16x16 = 0;
1106 uint64_t mask_8x8 = 0;
1107 uint64_t mask_4x4 = 0;
1108 uint8_t *lfl;
1109 uint8_t *lfl2;
1110
1111 // filter two rows at a time
1112 for (r = 0; r < cm->seq_params.mib_size &&
1113 ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
1114 r += r_step) {
1115 for (c = 0; c < cm->seq_params.mib_size &&
1116 ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
1117 c += MI_SIZE_64X64) {
1118 dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
1119 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
1120 assert(lfm);
1121 const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
1122 const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
1123 int index = 0;
1124 const int shift = get_index_shift(col, row, &index);
1125 // current and next row should belong to the same mask_idx and index
1126 // next row's shift
1127 const int row_next = row + single_step;
1128 int index_next = 0;
1129 const int shift_next = get_index_shift(col, row_next, &index_next);
1130 switch (pl) {
1131 case 0:
1132 mask_16x16 = lfm->left_y[TX_16X16].bits[index];
1133 mask_8x8 = lfm->left_y[TX_8X8].bits[index];
1134 mask_4x4 = lfm->left_y[TX_4X4].bits[index];
1135 lfl = &lfm->lfl_y_ver[row][col];
1136 lfl2 = &lfm->lfl_y_ver[row_next][col];
1137 break;
1138 case 1:
1139 mask_16x16 = lfm->left_u[TX_16X16].bits[index];
1140 mask_8x8 = lfm->left_u[TX_8X8].bits[index];
1141 mask_4x4 = lfm->left_u[TX_4X4].bits[index];
1142 lfl = &lfm->lfl_u_ver[row][col];
1143 lfl2 = &lfm->lfl_u_ver[row_next][col];
1144 break;
1145 case 2:
1146 mask_16x16 = lfm->left_v[TX_16X16].bits[index];
1147 mask_8x8 = lfm->left_v[TX_8X8].bits[index];
1148 mask_4x4 = lfm->left_v[TX_4X4].bits[index];
1149 lfl = &lfm->lfl_v_ver[row][col];
1150 lfl2 = &lfm->lfl_v_ver[row_next][col];
1151 break;
1152 default: assert(pl >= 0 && pl <= 2); return;
1153 }
1154 uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
1155 uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
1156 uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
1157 uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
1158 uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
1159 uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
1160
1161 #if CONFIG_AV1_HIGHBITDEPTH
1162 if (cm->seq_params.use_highbitdepth)
1163 highbd_filter_selectively_vert_row2(
1164 ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
1165 mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
1166 &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
1167 else
1168 filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl,
1169 mask_16x16_0, mask_8x8_0, mask_4x4_0,
1170 mask_16x16_1, mask_8x8_1, mask_4x4_1,
1171 &cm->lf_info, lfl, lfl2);
1172 #else
1173 filter_selectively_vert_row2(
1174 ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
1175 mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
1176 #endif
1177 dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
1178 }
1179 dst->buf += 2 * MI_SIZE * dst->stride;
1180 }
1181 }
1182
av1_filter_block_plane_hor(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1183 void av1_filter_block_plane_hor(AV1_COMMON *const cm,
1184 struct macroblockd_plane *const plane_ptr,
1185 int pl, int mi_row, int mi_col) {
1186 struct buf_2d *const dst = &plane_ptr->dst;
1187 int r, c;
1188 const int ssx = plane_ptr->subsampling_x;
1189 const int ssy = plane_ptr->subsampling_y;
1190 const int mask_cutoff = 0xffff;
1191 const int r_step = 1 << ssy;
1192 uint64_t mask_16x16 = 0;
1193 uint64_t mask_8x8 = 0;
1194 uint64_t mask_4x4 = 0;
1195 uint8_t *lfl;
1196
1197 for (r = 0; r < cm->seq_params.mib_size &&
1198 ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
1199 r += r_step) {
1200 for (c = 0; c < cm->seq_params.mib_size &&
1201 ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
1202 c += MI_SIZE_64X64) {
1203 if (mi_row + r == 0) continue;
1204
1205 dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
1206 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
1207 assert(lfm);
1208 const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
1209 const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
1210 int index = 0;
1211 const int shift = get_index_shift(col, row, &index);
1212 switch (pl) {
1213 case 0:
1214 mask_16x16 = lfm->above_y[TX_16X16].bits[index];
1215 mask_8x8 = lfm->above_y[TX_8X8].bits[index];
1216 mask_4x4 = lfm->above_y[TX_4X4].bits[index];
1217 lfl = &lfm->lfl_y_hor[row][col];
1218 break;
1219 case 1:
1220 mask_16x16 = lfm->above_u[TX_16X16].bits[index];
1221 mask_8x8 = lfm->above_u[TX_8X8].bits[index];
1222 mask_4x4 = lfm->above_u[TX_4X4].bits[index];
1223 lfl = &lfm->lfl_u_hor[row][col];
1224 break;
1225 case 2:
1226 mask_16x16 = lfm->above_v[TX_16X16].bits[index];
1227 mask_8x8 = lfm->above_v[TX_8X8].bits[index];
1228 mask_4x4 = lfm->above_v[TX_4X4].bits[index];
1229 lfl = &lfm->lfl_v_hor[row][col];
1230 break;
1231 default: assert(pl >= 0 && pl <= 2); return;
1232 }
1233 mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
1234 mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
1235 mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
1236
1237 #if CONFIG_AV1_HIGHBITDEPTH
1238 if (cm->seq_params.use_highbitdepth)
1239 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
1240 dst->stride, pl, ssx, mask_16x16,
1241 mask_8x8, mask_4x4, &cm->lf_info, lfl,
1242 (int)cm->seq_params.bit_depth);
1243 else
1244 filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1245 mask_8x8, mask_4x4, &cm->lf_info, lfl);
1246 #else
1247 filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1248 mask_8x8, mask_4x4, &cm->lf_info, lfl);
1249 #endif
1250 dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
1251 }
1252 dst->buf += MI_SIZE * dst->stride;
1253 }
1254 }
1255
av1_store_bitmask_vartx(AV1_COMMON * cm,int mi_row,int mi_col,BLOCK_SIZE bsize,TX_SIZE tx_size,MB_MODE_INFO * mbmi)1256 void av1_store_bitmask_vartx(AV1_COMMON *cm, int mi_row, int mi_col,
1257 BLOCK_SIZE bsize, TX_SIZE tx_size,
1258 MB_MODE_INFO *mbmi) {
1259 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1260 const TX_SIZE tx_size_y_vert = txsize_vert_map[tx_size];
1261 const TX_SIZE tx_size_y_horz = txsize_horz_map[tx_size];
1262 const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize(
1263 mbmi->sb_type, cm->seq_params.subsampling_x,
1264 cm->seq_params.subsampling_y)];
1265 const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize(
1266 mbmi->sb_type, cm->seq_params.subsampling_x,
1267 cm->seq_params.subsampling_y)];
1268 const int is_square_transform_size = tx_size <= TX_64X64;
1269 int mask_id = 0;
1270 int offset = 0;
1271 const int half_ratio_tx_size_max32 =
1272 (tx_size > TX_64X64) & (tx_size <= TX_32X16);
1273 if (is_square_transform_size) {
1274 switch (tx_size) {
1275 case TX_4X4: mask_id = mask_id_table_tx_4x4[bsize]; break;
1276 case TX_8X8:
1277 mask_id = mask_id_table_tx_8x8[bsize];
1278 offset = 19;
1279 break;
1280 case TX_16X16:
1281 mask_id = mask_id_table_tx_16x16[bsize];
1282 offset = 33;
1283 break;
1284 case TX_32X32:
1285 mask_id = mask_id_table_tx_32x32[bsize];
1286 offset = 42;
1287 break;
1288 case TX_64X64: mask_id = 46; break;
1289 default: assert(!is_square_transform_size); return;
1290 }
1291 mask_id += offset;
1292 } else if (half_ratio_tx_size_max32) {
1293 int tx_size_equal_block_size = bsize == txsize_to_bsize[tx_size];
1294 mask_id = 47 + 2 * (tx_size - TX_4X8) + (tx_size_equal_block_size ? 0 : 1);
1295 } else if (tx_size == TX_32X64) {
1296 mask_id = 59;
1297 } else if (tx_size == TX_64X32) {
1298 mask_id = 60;
1299 } else { // quarter ratio tx size
1300 mask_id = 61 + (tx_size - TX_4X16);
1301 }
1302 int index = 0;
1303 const int row = mi_row % MI_SIZE_64X64;
1304 const int col = mi_col % MI_SIZE_64X64;
1305 const int shift = get_index_shift(col, row, &index);
1306 const int vert_shift = tx_size_y_vert <= TX_8X8 ? shift : col;
1307 for (int i = 0; i + index < 4; ++i) {
1308 // y vertical.
1309 lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |=
1310 (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
1311 // y horizontal.
1312 lfm->tx_size_hor[0][tx_size_y_vert].bits[i + index] |=
1313 (above_mask_univariant_reordered[mask_id].bits[i] << shift);
1314 // u/v vertical.
1315 lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |=
1316 (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
1317 // u/v horizontal.
1318 lfm->tx_size_hor[1][tx_size_uv_vert].bits[i + index] |=
1319 (above_mask_univariant_reordered[mask_id].bits[i] << shift);
1320 }
1321 }
1322
av1_store_bitmask_univariant_tx(AV1_COMMON * cm,int mi_row,int mi_col,BLOCK_SIZE bsize,MB_MODE_INFO * mbmi)1323 void av1_store_bitmask_univariant_tx(AV1_COMMON *cm, int mi_row, int mi_col,
1324 BLOCK_SIZE bsize, MB_MODE_INFO *mbmi) {
1325 // Use a lookup table that provides one bitmask for a given block size and
1326 // a univariant transform size.
1327 int index;
1328 int shift;
1329 int row;
1330 int col;
1331 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1332 const TX_SIZE tx_size_y_vert = txsize_vert_map[mbmi->tx_size];
1333 const TX_SIZE tx_size_y_horz = txsize_horz_map[mbmi->tx_size];
1334 const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize(
1335 mbmi->sb_type, cm->seq_params.subsampling_x,
1336 cm->seq_params.subsampling_y)];
1337 const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize(
1338 mbmi->sb_type, cm->seq_params.subsampling_x,
1339 cm->seq_params.subsampling_y)];
1340 const int is_square_transform_size = mbmi->tx_size <= TX_64X64;
1341 int mask_id = 0;
1342 int offset = 0;
1343 const int half_ratio_tx_size_max32 =
1344 (mbmi->tx_size > TX_64X64) & (mbmi->tx_size <= TX_32X16);
1345 if (is_square_transform_size) {
1346 switch (mbmi->tx_size) {
1347 case TX_4X4: mask_id = mask_id_table_tx_4x4[bsize]; break;
1348 case TX_8X8:
1349 mask_id = mask_id_table_tx_8x8[bsize];
1350 offset = 19;
1351 break;
1352 case TX_16X16:
1353 mask_id = mask_id_table_tx_16x16[bsize];
1354 offset = 33;
1355 break;
1356 case TX_32X32:
1357 mask_id = mask_id_table_tx_32x32[bsize];
1358 offset = 42;
1359 break;
1360 case TX_64X64: mask_id = 46; break;
1361 default: assert(!is_square_transform_size); return;
1362 }
1363 mask_id += offset;
1364 } else if (half_ratio_tx_size_max32) {
1365 int tx_size_equal_block_size = bsize == txsize_to_bsize[mbmi->tx_size];
1366 mask_id =
1367 47 + 2 * (mbmi->tx_size - TX_4X8) + (tx_size_equal_block_size ? 0 : 1);
1368 } else if (mbmi->tx_size == TX_32X64) {
1369 mask_id = 59;
1370 } else if (mbmi->tx_size == TX_64X32) {
1371 mask_id = 60;
1372 } else { // quarter ratio tx size
1373 mask_id = 61 + (mbmi->tx_size - TX_4X16);
1374 }
1375 row = mi_row % MI_SIZE_64X64;
1376 col = mi_col % MI_SIZE_64X64;
1377 shift = get_index_shift(col, row, &index);
1378 const int vert_shift = tx_size_y_vert <= TX_8X8 ? shift : col;
1379 for (int i = 0; i + index < 4; ++i) {
1380 // y vertical.
1381 lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |=
1382 (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
1383 // y horizontal.
1384 lfm->tx_size_hor[0][tx_size_y_vert].bits[i + index] |=
1385 (above_mask_univariant_reordered[mask_id].bits[i] << shift);
1386 // u/v vertical.
1387 lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |=
1388 (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
1389 // u/v horizontal.
1390 lfm->tx_size_hor[1][tx_size_uv_vert].bits[i + index] |=
1391 (above_mask_univariant_reordered[mask_id].bits[i] << shift);
1392 }
1393 }
1394
av1_store_bitmask_other_info(AV1_COMMON * cm,int mi_row,int mi_col,BLOCK_SIZE bsize,MB_MODE_INFO * mbmi,int is_horz_coding_block_border,int is_vert_coding_block_border)1395 void av1_store_bitmask_other_info(AV1_COMMON *cm, int mi_row, int mi_col,
1396 BLOCK_SIZE bsize, MB_MODE_INFO *mbmi,
1397 int is_horz_coding_block_border,
1398 int is_vert_coding_block_border) {
1399 int index;
1400 int shift;
1401 int row;
1402 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1403 const int row_start = mi_row % MI_SIZE_64X64;
1404 const int col_start = mi_col % MI_SIZE_64X64;
1405 shift = get_index_shift(col_start, row_start, &index);
1406 if (is_horz_coding_block_border) {
1407 const int block_shift = shift + mi_size_wide[bsize];
1408 assert(block_shift <= 64);
1409 const uint64_t right_edge_shift =
1410 (block_shift == 64) ? 0xffffffffffffffff : ((uint64_t)1 << block_shift);
1411 const uint64_t left_edge_shift = (block_shift == 64)
1412 ? (((uint64_t)1 << shift) - 1)
1413 : ((uint64_t)1 << shift);
1414 assert(right_edge_shift > left_edge_shift);
1415 const uint64_t top_edge_mask = right_edge_shift - left_edge_shift;
1416 lfm->is_horz_border.bits[index] |= top_edge_mask;
1417 }
1418 if (is_vert_coding_block_border) {
1419 const int is_vert_border = mask_id_table_vert_border[bsize];
1420 const int vert_shift = block_size_high[bsize] <= 8 ? shift : col_start;
1421 for (int i = 0; i + index < 4; ++i) {
1422 lfm->is_vert_border.bits[i + index] |=
1423 (left_mask_univariant_reordered[is_vert_border].bits[i]
1424 << vert_shift);
1425 }
1426 }
1427 const int is_skip = mbmi->skip && is_inter_block(mbmi);
1428 if (is_skip) {
1429 const int is_skip_mask = mask_id_table_tx_4x4[bsize];
1430 for (int i = 0; i + index < 4; ++i) {
1431 lfm->skip.bits[i + index] |=
1432 (above_mask_univariant_reordered[is_skip_mask].bits[i] << shift);
1433 }
1434 }
1435 const uint8_t level_vert_y =
1436 av1_get_filter_level(cm, &cm->lf_info, 0, 0, mbmi);
1437 const uint8_t level_horz_y =
1438 av1_get_filter_level(cm, &cm->lf_info, 1, 0, mbmi);
1439 const uint8_t level_u = av1_get_filter_level(cm, &cm->lf_info, 0, 1, mbmi);
1440 const uint8_t level_v = av1_get_filter_level(cm, &cm->lf_info, 0, 2, mbmi);
1441 for (int r = mi_row; r < mi_row + mi_size_high[bsize]; r++) {
1442 index = 0;
1443 row = r % MI_SIZE_64X64;
1444 memset(&lfm->lfl_y_ver[row][col_start], level_vert_y,
1445 sizeof(uint8_t) * mi_size_wide[bsize]);
1446 memset(&lfm->lfl_y_hor[row][col_start], level_horz_y,
1447 sizeof(uint8_t) * mi_size_wide[bsize]);
1448 memset(&lfm->lfl_u_ver[row][col_start], level_u,
1449 sizeof(uint8_t) * mi_size_wide[bsize]);
1450 memset(&lfm->lfl_u_hor[row][col_start], level_u,
1451 sizeof(uint8_t) * mi_size_wide[bsize]);
1452 memset(&lfm->lfl_v_ver[row][col_start], level_v,
1453 sizeof(uint8_t) * mi_size_wide[bsize]);
1454 memset(&lfm->lfl_v_hor[row][col_start], level_v,
1455 sizeof(uint8_t) * mi_size_wide[bsize]);
1456 }
1457 }
1458 #endif // CONFIG_LPF_MASK
1459