• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/mem.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/av1_loopfilter.h"
22 #include "av1/common/reconinter.h"
23 #include "av1/common/seg_common.h"
24 
25 // 256 bit masks (64x64 / 4x4) for left transform size for Y plane.
26 // We use 4 uint64_t to represent the 256 bit.
27 // Each 1 represents a position where we should apply a loop filter
28 // across the left border of an 4x4 block boundary.
29 //
30 // In the case of TX_8x8->  ( in low order byte first we end up with
31 // a mask that looks like this (-- and | are used for better view)
32 //
33 //    10101010|10101010
34 //    10101010|10101010
35 //    10101010|10101010
36 //    10101010|10101010
37 //    10101010|10101010
38 //    10101010|10101010
39 //    10101010|10101010
40 //    10101010|10101010
41 //    -----------------
42 //    10101010|10101010
43 //    10101010|10101010
44 //    10101010|10101010
45 //    10101010|10101010
46 //    10101010|10101010
47 //    10101010|10101010
48 //    10101010|10101010
49 //    10101010|10101010
50 //
51 // A loopfilter should be applied to every other 4x4 horizontally.
52 
53 // 256 bit masks (64x64 / 4x4) for above transform size for Y plane.
54 // We use 4 uint64_t to represent the 256 bit.
55 // Each 1 represents a position where we should apply a loop filter
56 // across the top border of an 4x4 block boundary.
57 //
58 // In the case of TX_8x8->  ( in low order byte first we end up with
59 // a mask that looks like this
60 //
61 //    11111111|11111111
62 //    00000000|00000000
63 //    11111111|11111111
64 //    00000000|00000000
65 //    11111111|11111111
66 //    00000000|00000000
67 //    11111111|11111111
68 //    00000000|00000000
69 //    -----------------
70 //    11111111|11111111
71 //    00000000|00000000
72 //    11111111|11111111
73 //    00000000|00000000
74 //    11111111|11111111
75 //    00000000|00000000
76 //    11111111|11111111
77 //    00000000|00000000
78 //
79 // A loopfilter should be applied to every other 4x4 horizontally.
80 #if CONFIG_LPF_MASK
81 static const int mask_id_table_tx_4x4[BLOCK_SIZES_ALL] = {
82   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1, 13, 14, 15, 16, 17, 18
83 };
84 
85 static const int mask_id_table_tx_8x8[BLOCK_SIZES_ALL] = {
86   -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, 10, 11, 12, 13
87 };
88 
89 static const int mask_id_table_tx_16x16[BLOCK_SIZES_ALL] = {
90   -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, -1, -1, -1, -1, -1, -1, -1, 7, 8
91 };
92 
93 static const int mask_id_table_tx_32x32[BLOCK_SIZES_ALL] = {
94   -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  1,
95   2,  3,  -1, -1, -1, -1, -1, -1, -1, -1, -1
96 };
97 static const int mask_id_table_vert_border[BLOCK_SIZES_ALL] = {
98   0,  47, 49, 19, 51, 53, 33, 55, 57, 42, 59,
99   60, 46, -1, -1, -1, 61, 62, 63, 64, 65, 66
100 };
101 
102 static const FilterMask left_mask_univariant_reordered[67] = {
103   // TX_4X4
104   { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
105       0x0000000000000000ULL } },  // block size 4X4, TX_4X4
106   { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
107       0x0000000000000000ULL } },  // block size 4X8, TX_4X4
108   { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
109       0x0000000000000000ULL } },  // block size 8X4, TX_4X4
110   { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
111       0x0000000000000000ULL } },  // block size 8X8, TX_4X4
112   { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
113       0x0000000000000000ULL } },  // block size 8X16, TX_4X4
114   { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
115       0x0000000000000000ULL } },  // block size 16X8, TX_4X4
116   { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
117       0x0000000000000000ULL } },  // block size 16X16, TX_4X4
118   { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
119       0x0000000000000000ULL } },  // block size 16X32, TX_4X4
120   { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
121       0x0000000000000000ULL } },  // block size 32X16, TX_4X4
122   { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
123       0x0000000000000000ULL } },  // block size 32X32, TX_4X4
124   { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
125       0x00ff00ff00ff00ffULL } },  // block size 32X64, TX_4X4
126   { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
127       0x0000000000000000ULL } },  // block size 64X32, TX_4X4
128   { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
129       0xffffffffffffffffULL } },  // block size 64X64, TX_4X4
130   { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
131       0x0000000000000000ULL } },  // block size 4X16, TX_4X4
132   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
133       0x0000000000000000ULL } },  // block size 16X4, TX_4X4
134   { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
135       0x0000000000000000ULL } },  // block size 8X32, TX_4X4
136   { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
137       0x0000000000000000ULL } },  // block size 32X8, TX_4X4
138   { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
139       0x000f000f000f000fULL } },  // block size 16X64, TX_4X4
140   { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
141       0x0000000000000000ULL } },  // block size 64X16, TX_4X4
142   // TX_8X8
143   { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
144       0x0000000000000000ULL } },  // block size 8X8, TX_8X8
145   { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
146       0x0000000000000000ULL } },  // block size 8X16, TX_8X8
147   { { 0x0000000000050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
148       0x0000000000000000ULL } },  // block size 16X8, TX_8X8
149   { { 0x0005000500050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
150       0x0000000000000000ULL } },  // block size 16X16, TX_8X8
151   { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0000000000000000ULL,
152       0x0000000000000000ULL } },  // block size 16X32, TX_8X8
153   { { 0x0055005500550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
154       0x0000000000000000ULL } },  // block size 32X16, TX_8X8
155   { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0000000000000000ULL,
156       0x0000000000000000ULL } },  // block size 32X32, TX_8X8
157   { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0055005500550055ULL,
158       0x0055005500550055ULL } },  // block size 32X64, TX_8X8
159   { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x0000000000000000ULL,
160       0x0000000000000000ULL } },  // block size 64X32, TX_8X8
161   { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x5555555555555555ULL,
162       0x5555555555555555ULL } },  // block size 64X64, TX_8X8
163   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
164       0x0000000000000000ULL } },  // block size 8X32, TX_8X8
165   { { 0x0000000000550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
166       0x0000000000000000ULL } },  // block size 32X8, TX_8X8
167   { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0005000500050005ULL,
168       0x0005000500050005ULL } },  // block size 16X64, TX_8X8
169   { { 0x5555555555555555ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
170       0x0000000000000000ULL } },  // block size 64X16, TX_8X8
171   // TX_16X16
172   { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
173       0x0000000000000000ULL } },  // block size 16X16, TX_16X16
174   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
175       0x0000000000000000ULL } },  // block size 16X32, TX_16X16
176   { { 0x0011001100110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
177       0x0000000000000000ULL } },  // block size 32X16, TX_16X16
178   { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0000000000000000ULL,
179       0x0000000000000000ULL } },  // block size 32X32, TX_16X16
180   { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0011001100110011ULL,
181       0x0011001100110011ULL } },  // block size 32X64, TX_16X16
182   { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x0000000000000000ULL,
183       0x0000000000000000ULL } },  // block size 64X32, TX_16X16
184   { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x1111111111111111ULL,
185       0x1111111111111111ULL } },  // block size 64X64, TX_16X16
186   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
187       0x0001000100010001ULL } },  // block size 16X64, TX_16X16
188   { { 0x1111111111111111ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
189       0x0000000000000000ULL } },  // block size 64X16, TX_16X16
190   // TX_32X32
191   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
192       0x0000000000000000ULL } },  // block size 32X32, TX_32X32
193   { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
194       0x0101010101010101ULL } },  // block size 32X64, TX_32X32
195   { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0000000000000000ULL,
196       0x0000000000000000ULL } },  // block size 64X32, TX_32X32
197   { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
198       0x0101010101010101ULL } },  // block size 64X64, TX_32X32
199   // TX_64X64
200   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
201       0x0001000100010001ULL } },  // block size 64X64, TX_64X64
202   // 2:1, 1:2 transform sizes.
203   { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
204       0x0000000000000000ULL } },  // block size 4X8, TX_4X8
205   { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
206       0x0000000000000000ULL } },  // block size 4X16, TX_4X8
207   { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
208       0x0000000000000000ULL } },  // block size 8X4, TX_8X4
209   { { 0x0000000000000005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
210       0x0000000000000000ULL } },  // block size 16X4, TX_8X4
211   { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
212       0x0000000000000000ULL } },  // block size 8X16, TX_8X16
213   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
214       0x0000000000000000ULL } },  // block size 8X32, TX_8X16
215   { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
216       0x0000000000000000ULL } },  // block size 16X8, TX_16X8
217   { { 0x0000000000110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
218       0x0000000000000000ULL } },  // block size 32X8, TX_16X8
219   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
220       0x0000000000000000ULL } },  // block size 16X32, TX_16X32
221   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
222       0x0001000100010001ULL } },  // block size 16X64, TX_16X32
223   { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
224       0x0000000000000000ULL } },  // block size 32X16, TX_32X16
225   { { 0x0101010101010101ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
226       0x0000000000000000ULL } },  // block size 64X16, TX_32X16
227   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
228       0x0001000100010001ULL } },  // block size 32X64, TX_32X64
229   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
230       0x0000000000000000ULL } },  // block size 64X32, TX_64X32
231   // 4:1, 1:4 transform sizes.
232   { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
233       0x0000000000000000ULL } },  // block size 4X16, TX_4X16
234   { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
235       0x0000000000000000ULL } },  // block size 16X4, TX_16X4
236   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
237       0x0000000000000000ULL } },  // block size 8X32, TX_8X32
238   { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
239       0x0000000000000000ULL } },  // block size 32X8, TX_32X8
240   { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
241       0x0001000100010001ULL } },  // block size 16X64, TX_16X64
242   { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
243       0x0000000000000000ULL } },  // block size 64X16, TX_64X16
244 };
245 
246 static const FilterMask above_mask_univariant_reordered[67] = {
247   // TX_4X4
248   { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
249       0x0000000000000000ULL } },  // block size 4X4, TX_4X4
250   { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
251       0x0000000000000000ULL } },  // block size 4X8, TX_4X4
252   { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
253       0x0000000000000000ULL } },  // block size 8X4, TX_4X4
254   { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
255       0x0000000000000000ULL } },  // block size 8X8, TX_4X4
256   { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
257       0x0000000000000000ULL } },  // block size 8X16, TX_4X4
258   { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
259       0x0000000000000000ULL } },  // block size 16X8, TX_4X4
260   { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
261       0x0000000000000000ULL } },  // block size 16X16, TX_4X4
262   { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
263       0x0000000000000000ULL } },  // block size 16X32, TX_4X4
264   { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
265       0x0000000000000000ULL } },  // block size 32X16, TX_4X4
266   { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
267       0x0000000000000000ULL } },  // block size 32X32, TX_4X4
268   { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
269       0x00ff00ff00ff00ffULL } },  // block size 32X64, TX_4X4
270   { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
271       0x0000000000000000ULL } },  // block size 64X32, TX_4X4
272   { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
273       0xffffffffffffffffULL } },  // block size 64X64, TX_4x4
274   { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
275       0x0000000000000000ULL } },  // block size 4X16, TX_4X4
276   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
277       0x0000000000000000ULL } },  // block size 16X4, TX_4X4
278   { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
279       0x0000000000000000ULL } },  // block size 8X32, TX_4X4
280   { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
281       0x0000000000000000ULL } },  // block size 32X8, TX_4X4
282   { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
283       0x000f000f000f000fULL } },  // block size 16X64, TX_4X4
284   { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
285       0x0000000000000000ULL } },  // block size 64X16, TX_4X4
286   // TX_8X8
287   { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
288       0x0000000000000000ULL } },  // block size 8X8, TX_8X8
289   { { 0x0000000300000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
290       0x0000000000000000ULL } },  // block size 8X16, TX_8X8
291   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
292       0x0000000000000000ULL } },  // block size 16X8, TX_8X8
293   { { 0x0000000f0000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
294       0x0000000000000000ULL } },  // block size 16X16, TX_8X8
295   { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000000000000ULL,
296       0x0000000000000000ULL } },  // block size 16X32, TX_8X8
297   { { 0x000000ff000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
298       0x0000000000000000ULL } },  // block size 32X16, TX_8X8
299   { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x0000000000000000ULL,
300       0x0000000000000000ULL } },  // block size 32X32, TX_8X8
301   { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x000000ff000000ffULL,
302       0x000000ff000000ffULL } },  // block size 32X64, TX_8X8
303   { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000000000000000ULL,
304       0x0000000000000000ULL } },  // block size 64X32, TX_8X8
305   { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
306       0x0000ffff0000ffffULL } },  // block size 64X64, TX_8X8
307   { { 0x0000000300000003ULL, 0x0000000300000003ULL, 0x0000000000000000ULL,
308       0x0000000000000000ULL } },  // block size 8X32, TX_8X8
309   { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
310       0x0000000000000000ULL } },  // block size 32X8, TX_8X8
311   { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000f0000000fULL,
312       0x0000000f0000000fULL } },  // block size 16X64, TX_8X8
313   { { 0x0000ffff0000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
314       0x0000000000000000ULL } },  // block size 64X16, TX_8X8
315   // TX_16X16
316   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
317       0x0000000000000000ULL } },  // block size 16X16, TX_16X16
318   { { 0x000000000000000fULL, 0x000000000000000fULL, 0x0000000000000000ULL,
319       0x0000000000000000ULL } },  // block size 16X32, TX_16X16
320   { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
321       0x0000000000000000ULL } },  // block size 32X16, TX_16X16
322   { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x0000000000000000ULL,
323       0x0000000000000000ULL } },  // block size 32X32, TX_16X16
324   { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x00000000000000ffULL,
325       0x00000000000000ffULL } },  // block size 32X64, TX_16X16
326   { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x0000000000000000ULL,
327       0x0000000000000000ULL } },  // block size 64X32, TX_16X16
328   { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x000000000000ffffULL,
329       0x000000000000ffffULL } },  // block size 64X64, TX_16X16
330   { { 0x000000000000000fULL, 0x000000000000000fULL, 0x000000000000000fULL,
331       0x000000000000000fULL } },  // block size 16X64, TX_16X16
332   { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
333       0x0000000000000000ULL } },  // block size 64X16, TX_16X16
334   // TX_32X32
335   { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
336       0x0000000000000000ULL } },  // block size 32X32, TX_32X32
337   { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x00000000000000ffULL,
338       0x0000000000000000ULL } },  // block size 32X64, TX_32X32
339   { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
340       0x0000000000000000ULL } },  // block size 64X32, TX_32X32
341   { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x000000000000ffffULL,
342       0x0000000000000000ULL } },  // block size 64X64, TX_32X32
343   // TX_64X64
344   { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
345       0x0000000000000000ULL } },  // block size 64X64, TX_64X64
346   // 2:1, 1:2 transform sizes.
347   { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
348       0x0000000000000000ULL } },  // block size 4X8, TX_4X8
349   { { 0x0000000100000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
350       0x0000000000000000ULL } },  // block size 4X16, TX_4X8
351   { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
352       0x0000000000000000ULL } },  // block size 8X4, TX_8X4
353   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
354       0x0000000000000000ULL } },  // block size 16X4, TX_8X4
355   { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
356       0x0000000000000000ULL } },  // block size 8X16, TX_8X16
357   { { 0x0000000000000003ULL, 0x0000000000000003ULL, 0x0000000000000000ULL,
358       0x0000000000000000ULL } },  // block size 8X32, TX_8X16
359   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
360       0x0000000000000000ULL } },  // block size 16X8, TX_16X8
361   { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
362       0x0000000000000000ULL } },  // block size 32X8, TX_16X8
363   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
364       0x0000000000000000ULL } },  // block size 16X32, TX_16X32
365   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x000000000000000fULL,
366       0x0000000000000000ULL } },  // block size 16X64, TX_16X32
367   { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
368       0x0000000000000000ULL } },  // block size 32X16, TX_32X16
369   { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
370       0x0000000000000000ULL } },  // block size 64X16, TX_32X16
371   { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
372       0x0000000000000000ULL } },  // block size 32X64, TX_32X64
373   { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
374       0x0000000000000000ULL } },  // block size 64X32, TX_64X32
375   // 4:1, 1:4 transform sizes.
376   { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
377       0x0000000000000000ULL } },  // block size 4X16, TX_4X16
378   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
379       0x0000000000000000ULL } },  // block size 16X4, TX_16X4
380   { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
381       0x0000000000000000ULL } },  // block size 8X32, TX_8X32
382   { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
383       0x0000000000000000ULL } },  // block size 32X8, TX_32X8
384   { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
385       0x0000000000000000ULL } },  // block size 16X64, TX_16X64
386   { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
387       0x0000000000000000ULL } },  // block size 64X16, TX_64X16
388 };
389 
get_loop_filter_mask(const AV1_COMMON * const cm,int mi_row,int mi_col)390 static LoopFilterMask *get_loop_filter_mask(const AV1_COMMON *const cm,
391                                             int mi_row, int mi_col) {
392   assert(cm->lf.lfm != NULL);
393   const int row = mi_row >> MIN_MIB_SIZE_LOG2;  // 64x64
394   const int col = mi_col >> MIN_MIB_SIZE_LOG2;
395   return &cm->lf.lfm[row * cm->lf.lfm_stride + col];
396 }
397 
398 typedef void (*LpfFunc)(uint8_t *s, int p, const uint8_t *blimit,
399                         const uint8_t *limit, const uint8_t *thresh);
400 
401 typedef void (*LpfDualFunc)(uint8_t *s, int p, const uint8_t *blimit0,
402                             const uint8_t *limit0, const uint8_t *thresh0,
403                             const uint8_t *blimit1, const uint8_t *limit1,
404                             const uint8_t *thresh1);
405 
406 typedef void (*HbdLpfFunc)(uint16_t *s, int p, const uint8_t *blimit,
407                            const uint8_t *limit, const uint8_t *thresh, int bd);
408 
409 typedef void (*HbdLpfDualFunc)(uint16_t *s, int p, const uint8_t *blimit0,
410                                const uint8_t *limit0, const uint8_t *thresh0,
411                                const uint8_t *blimit1, const uint8_t *limit1,
412                                const uint8_t *thresh1, int bd);
413 // A 64x64 tx block requires 256 bits to represent each 4x4 tx block.
414 // Every 4 rows is represented by one uint64_t mask. Hence,
415 // there are 4 uint64_t bitmask[4] to represent the 64x64 block.
416 //
417 // Given a location by (mi_col, mi_row), This function returns the index
418 // 0, 1, 2, 3 to select which bitmask[] to use, and the shift value.
419 //
420 // For example, mi_row is the offset of pixels in mi size (4),
421 // (mi_row / 4) returns which uint64_t.
422 // After locating which uint64_t, mi_row % 4 is the
423 // row offset, and each row has 16 = 1 << stride_log2 4x4 units.
424 // Therefore, shift = (row << stride_log2) + mi_col;
get_index_shift(int mi_col,int mi_row,int * index)425 int get_index_shift(int mi_col, int mi_row, int *index) {
426   // *index = mi_row >> 2;
427   // rows = mi_row % 4;
428   // stride_log2 = 4;
429   // shift = (rows << stride_log2) + mi_col;
430   *index = mi_row >> 2;
431   return ((mi_row & 3) << 4) | mi_col;
432 }
433 
filter_selectively_vert_row2(int subsampling_factor,uint8_t * s,int pitch,int plane,uint64_t mask_16x16_0,uint64_t mask_8x8_0,uint64_t mask_4x4_0,uint64_t mask_16x16_1,uint64_t mask_8x8_1,uint64_t mask_4x4_1,const loop_filter_info_n * lfi_n,uint8_t * lfl,uint8_t * lfl2)434 static void filter_selectively_vert_row2(
435     int subsampling_factor, uint8_t *s, int pitch, int plane,
436     uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
437     uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
438     const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2) {
439   uint64_t mask;
440   const int step = 1 << subsampling_factor;
441 
442   for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
443               mask_8x8_1 | mask_4x4_1;
444        mask; mask >>= step) {
445     const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
446     const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
447 
448     if (mask & 1) {
449       if ((mask_16x16_0 | mask_16x16_1) & 1) {
450         // chroma plane filters less pixels introduced in deblock_13tap
451         // experiment
452         LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_14;
453 
454         if ((mask_16x16_0 & mask_16x16_1) & 1) {
455           if (plane) {
456             aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
457                                     lfi0->hev_thr, lfi1->mblim, lfi1->lim,
458                                     lfi1->hev_thr);
459           } else {
460             aom_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
461                                      lfi0->hev_thr, lfi1->mblim, lfi1->lim,
462                                      lfi1->hev_thr);
463           }
464         } else if (mask_16x16_0 & 1) {
465           lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
466         } else {
467           lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
468                        lfi1->hev_thr);
469         }
470       }
471 
472       if ((mask_8x8_0 | mask_8x8_1) & 1) {
473         // chroma plane filters less pixels introduced in deblock_13tap
474         // experiment
475         LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_8;
476 
477         if ((mask_8x8_0 & mask_8x8_1) & 1) {
478           if (plane) {
479             aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
480                                     lfi0->hev_thr, lfi1->mblim, lfi1->lim,
481                                     lfi1->hev_thr);
482           } else {
483             aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
484                                     lfi0->hev_thr, lfi1->mblim, lfi1->lim,
485                                     lfi1->hev_thr);
486           }
487         } else if (mask_8x8_0 & 1) {
488           lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
489         } else {
490           lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
491                        lfi1->hev_thr);
492         }
493       }
494 
495       if ((mask_4x4_0 | mask_4x4_1) & 1) {
496         if ((mask_4x4_0 & mask_4x4_1) & 1) {
497           aom_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
498                                   lfi0->hev_thr, lfi1->mblim, lfi1->lim,
499                                   lfi1->hev_thr);
500         } else if (mask_4x4_0 & 1) {
501           aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
502         } else {
503           aom_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
504                              lfi1->hev_thr);
505         }
506       }
507     }
508 
509     s += 4;
510     lfl += step;
511     lfl2 += step;
512     mask_16x16_0 >>= step;
513     mask_8x8_0 >>= step;
514     mask_4x4_0 >>= step;
515     mask_16x16_1 >>= step;
516     mask_8x8_1 >>= step;
517     mask_4x4_1 >>= step;
518   }
519 }
520 
521 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_selectively_vert_row2(int subsampling_factor,uint16_t * s,int pitch,int plane,uint64_t mask_16x16_0,uint64_t mask_8x8_0,uint64_t mask_4x4_0,uint64_t mask_16x16_1,uint64_t mask_8x8_1,uint64_t mask_4x4_1,const loop_filter_info_n * lfi_n,uint8_t * lfl,uint8_t * lfl2,int bd)522 static void highbd_filter_selectively_vert_row2(
523     int subsampling_factor, uint16_t *s, int pitch, int plane,
524     uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
525     uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
526     const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2, int bd) {
527   uint64_t mask;
528   const int step = 1 << subsampling_factor;
529 
530   for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
531               mask_8x8_1 | mask_4x4_1;
532        mask; mask >>= step) {
533     const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
534     const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
535 
536     if (mask & 1) {
537       if ((mask_16x16_0 | mask_16x16_1) & 1) {
538         // chroma plane filters less pixels introduced in deblock_13tap
539         // experiment
540         HbdLpfFunc highbd_lpf_vertical =
541             plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_14;
542 
543         if ((mask_16x16_0 & mask_16x16_1) & 1) {
544           if (plane) {
545             aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
546                                            lfi0->hev_thr, lfi1->mblim,
547                                            lfi1->lim, lfi1->hev_thr, bd);
548           } else {
549             aom_highbd_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
550                                             lfi0->hev_thr, lfi1->mblim,
551                                             lfi1->lim, lfi1->hev_thr, bd);
552           }
553         } else if (mask_16x16_0 & 1) {
554           highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
555                               bd);
556         } else {
557           highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
558                               lfi1->hev_thr, bd);
559         }
560       }
561 
562       if ((mask_8x8_0 | mask_8x8_1) & 1) {
563         HbdLpfFunc highbd_lpf_vertical =
564             plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_8;
565 
566         if ((mask_8x8_0 & mask_8x8_1) & 1) {
567           if (plane) {
568             aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
569                                            lfi0->hev_thr, lfi1->mblim,
570                                            lfi1->lim, lfi1->hev_thr, bd);
571           } else {
572             aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
573                                            lfi0->hev_thr, lfi1->mblim,
574                                            lfi1->lim, lfi1->hev_thr, bd);
575           }
576         } else if (mask_8x8_0 & 1) {
577           highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
578                               bd);
579         } else {
580           highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
581                               lfi1->hev_thr, bd);
582         }
583       }
584 
585       if ((mask_4x4_0 | mask_4x4_1) & 1) {
586         if ((mask_4x4_0 & mask_4x4_1) & 1) {
587           aom_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
588                                          lfi0->hev_thr, lfi1->mblim, lfi1->lim,
589                                          lfi1->hev_thr, bd);
590         } else if (mask_4x4_0 & 1) {
591           aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
592                                     lfi0->hev_thr, bd);
593         } else {
594           aom_highbd_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim,
595                                     lfi1->lim, lfi1->hev_thr, bd);
596         }
597       }
598     }
599 
600     s += 4;
601     lfl += step;
602     lfl2 += step;
603     mask_16x16_0 >>= step;
604     mask_8x8_0 >>= step;
605     mask_4x4_0 >>= step;
606     mask_16x16_1 >>= step;
607     mask_8x8_1 >>= step;
608     mask_4x4_1 >>= step;
609   }
610 }
611 #endif  // CONFIG_AV1_HIGHBITDEPTH
612 
filter_selectively_horiz(uint8_t * s,int pitch,int plane,int subsampling,uint64_t mask_16x16,uint64_t mask_8x8,uint64_t mask_4x4,const loop_filter_info_n * lfi_n,const uint8_t * lfl)613 static void filter_selectively_horiz(uint8_t *s, int pitch, int plane,
614                                      int subsampling, uint64_t mask_16x16,
615                                      uint64_t mask_8x8, uint64_t mask_4x4,
616                                      const loop_filter_info_n *lfi_n,
617                                      const uint8_t *lfl) {
618   uint64_t mask;
619   int count;
620   const int step = 1 << subsampling;
621   const unsigned int two_block_mask = subsampling ? 5 : 3;
622   int offset = 0;
623 
624   for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
625     const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
626     // Next block's thresholds, when it is within current 64x64 block.
627     // If it is out of bound, its mask is zero, and it points to current edge's
628     // filter parameters, instead of next edge's.
629     int next_edge = step;
630     if (offset + next_edge >= MI_SIZE_64X64) next_edge = 0;
631     const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + next_edge);
632 
633     count = 1;
634     if (mask & 1) {
635       if (mask_16x16 & 1) {
636         // chroma plane filters less pixels introduced in deblock_13tap
637         // experiment
638         LpfFunc lpf_horizontal =
639             plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_14;
640 
641         if ((mask_16x16 & two_block_mask) == two_block_mask) {
642           if (plane) {
643             aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
644                                       lfi->hev_thr, lfin->mblim, lfin->lim,
645                                       lfin->hev_thr);
646           } else {
647             aom_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
648                                        lfi->hev_thr, lfin->mblim, lfin->lim,
649                                        lfin->hev_thr);
650           }
651           count = 2;
652         } else {
653           lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
654         }
655       } else if (mask_8x8 & 1) {
656         // chroma plane filters less pixels introduced in deblock_13tap
657         // experiment
658         LpfFunc lpf_horizontal =
659             plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_8;
660 
661         if ((mask_8x8 & two_block_mask) == two_block_mask) {
662           if (plane) {
663             aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
664                                       lfi->hev_thr, lfin->mblim, lfin->lim,
665                                       lfin->hev_thr);
666           } else {
667             aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
668                                       lfi->hev_thr, lfin->mblim, lfin->lim,
669                                       lfin->hev_thr);
670           }
671           count = 2;
672         } else {
673           lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
674         }
675       } else if (mask_4x4 & 1) {
676         if ((mask_4x4 & two_block_mask) == two_block_mask) {
677           aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
678                                     lfi->hev_thr, lfin->mblim, lfin->lim,
679                                     lfin->hev_thr);
680           count = 2;
681         } else {
682           aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
683         }
684       }
685     }
686 
687     s += 4 * count;
688     lfl += step * count;
689     mask_16x16 >>= step * count;
690     mask_8x8 >>= step * count;
691     mask_4x4 >>= step * count;
692     offset += step * count;
693   }
694 }
695 
696 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_selectively_horiz(uint16_t * s,int pitch,int plane,int subsampling,uint64_t mask_16x16,uint64_t mask_8x8,uint64_t mask_4x4,const loop_filter_info_n * lfi_n,uint8_t * lfl,int bd)697 static void highbd_filter_selectively_horiz(
698     uint16_t *s, int pitch, int plane, int subsampling, uint64_t mask_16x16,
699     uint64_t mask_8x8, uint64_t mask_4x4, const loop_filter_info_n *lfi_n,
700     uint8_t *lfl, int bd) {
701   uint64_t mask;
702   int count;
703   const int step = 1 << subsampling;
704   const unsigned int two_block_mask = subsampling ? 5 : 3;
705   int offset = 0;
706 
707   for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
708     const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
709     // Next block's thresholds, when it is within current 64x64 block.
710     // If it is out of bound, its mask is zero, and it points to current edge's
711     // filter parameters, instead of next edge's.
712     int next_edge = step;
713     if (offset + next_edge >= MI_SIZE_64X64) next_edge = 0;
714     const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + next_edge);
715 
716     count = 1;
717     if (mask & 1) {
718       if (mask_16x16 & 1) {
719         HbdLpfFunc highbd_lpf_horizontal =
720             plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_14;
721 
722         if ((mask_16x16 & two_block_mask) == two_block_mask) {
723           if (plane) {
724             aom_highbd_lpf_horizontal_6_dual_c(s, pitch, lfi->mblim, lfi->lim,
725                                                lfi->hev_thr, lfin->mblim,
726                                                lfin->lim, lfin->hev_thr, bd);
727           } else {
728             aom_highbd_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
729                                               lfi->hev_thr, lfin->mblim,
730                                               lfin->lim, lfin->hev_thr, bd);
731           }
732           count = 2;
733         } else {
734           highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
735                                 bd);
736         }
737       } else if (mask_8x8 & 1) {
738         HbdLpfFunc highbd_lpf_horizontal =
739             plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_8;
740 
741         if ((mask_8x8 & two_block_mask) == two_block_mask) {
742           if (plane) {
743             aom_highbd_lpf_horizontal_6_dual_c(s, pitch, lfi->mblim, lfi->lim,
744                                                lfi->hev_thr, lfin->mblim,
745                                                lfin->lim, lfin->hev_thr, bd);
746           } else {
747             aom_highbd_lpf_horizontal_8_dual_c(s, pitch, lfi->mblim, lfi->lim,
748                                                lfi->hev_thr, lfin->mblim,
749                                                lfin->lim, lfin->hev_thr, bd);
750           }
751           count = 2;
752         } else {
753           highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
754                                 bd);
755         }
756       } else if (mask_4x4 & 1) {
757         if ((mask_4x4 & two_block_mask) == two_block_mask) {
758           aom_highbd_lpf_horizontal_4_dual_c(s, pitch, lfi->mblim, lfi->lim,
759                                              lfi->hev_thr, lfin->mblim,
760                                              lfin->lim, lfin->hev_thr, bd);
761           count = 2;
762         } else {
763           aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
764                                       lfi->hev_thr, bd);
765         }
766       }
767     }
768 
769     s += 4 * count;
770     lfl += step * count;
771     mask_16x16 >>= step * count;
772     mask_8x8 >>= step * count;
773     mask_4x4 >>= step * count;
774     offset += step * count;
775   }
776 }
777 #endif  // CONFIG_AV1_HIGHBITDEPTH
778 
av1_build_bitmask_vert_info(AV1_COMMON * const cm,const struct macroblockd_plane * const plane_ptr,int plane)779 void av1_build_bitmask_vert_info(
780     AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
781     int plane) {
782   const int subsampling_x = plane_ptr->subsampling_x;
783   const int subsampling_y = plane_ptr->subsampling_y;
784   const int is_uv = plane > 0;
785   TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
786   uint8_t level, prev_level = 1;
787   uint64_t skip, prev_skip = 0;
788   uint64_t is_coding_block_border;
789 
790   for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height; r++) {
791     const int mi_row = r << subsampling_y;
792     const int row = mi_row % MI_SIZE_64X64;
793     const int row_uv = row | subsampling_y;
794     int index = 0;
795     const int shift = get_index_shift(0, row, &index);
796 
797     for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width;
798          c += (tx_size_wide_unit[TX_64X64] >> subsampling_x)) {
799       const int mi_col = c << subsampling_x;
800       LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
801 
802       for (int col_in_unit = 0;
803            col_in_unit < (tx_size_wide_unit[TX_64X64] >> subsampling_x);) {
804         const int x = (c + col_in_unit) << MI_SIZE_LOG2;
805         if (x >= plane_ptr->dst.width) break;
806         const int col = col_in_unit << subsampling_x;
807         const int col_uv = col | subsampling_x;
808         const uint64_t mask = ((uint64_t)1 << (shift | col));
809         skip = lfm->skip.bits[index] & mask;
810         is_coding_block_border = lfm->is_vert_border.bits[index] & mask;
811         switch (plane) {
812           case 0: level = lfm->lfl_y_ver[row_uv][col_uv]; break;
813           case 1: level = lfm->lfl_u_ver[row_uv][col_uv]; break;
814           case 2: level = lfm->lfl_v_ver[row_uv][col_uv]; break;
815           default: assert(plane >= 0 && plane <= 2); return;
816         }
817         for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
818           if (is_uv && ts == TX_64X64) continue;
819           if (lfm->tx_size_ver[is_uv][ts].bits[index] & mask) {
820             tx_size = ts;
821             break;
822           }
823         }
824         if ((c + col_in_unit > 0) && (level || prev_level) &&
825             (!prev_skip || !skip || is_coding_block_border)) {
826           const TX_SIZE min_tx_size =
827               AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
828           const int shift_1 = get_index_shift(col_uv, row_uv, &index);
829           const uint64_t mask_1 = ((uint64_t)1 << shift_1);
830           switch (plane) {
831             case 0: lfm->left_y[min_tx_size].bits[index] |= mask_1; break;
832             case 1: lfm->left_u[min_tx_size].bits[index] |= mask_1; break;
833             case 2: lfm->left_v[min_tx_size].bits[index] |= mask_1; break;
834             default: assert(plane >= 0 && plane <= 2); return;
835           }
836           if (level == 0 && prev_level != 0) {
837             switch (plane) {
838               case 0: lfm->lfl_y_ver[row_uv][col_uv] = prev_level; break;
839               case 1: lfm->lfl_u_ver[row_uv][col_uv] = prev_level; break;
840               case 2: lfm->lfl_v_ver[row_uv][col_uv] = prev_level; break;
841               default: assert(plane >= 0 && plane <= 2); return;
842             }
843           }
844         }
845 
846         // update prev info
847         prev_level = level;
848         prev_skip = skip;
849         prev_tx_size = tx_size;
850         // advance
851         col_in_unit += tx_size_wide_unit[tx_size];
852       }
853     }
854   }
855 }
856 
av1_build_bitmask_horz_info(AV1_COMMON * const cm,const struct macroblockd_plane * const plane_ptr,int plane)857 void av1_build_bitmask_horz_info(
858     AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
859     int plane) {
860   const int subsampling_x = plane_ptr->subsampling_x;
861   const int subsampling_y = plane_ptr->subsampling_y;
862   const int is_uv = plane > 0;
863   TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
864   uint8_t level, prev_level = 1;
865   uint64_t skip, prev_skip = 0;
866   uint64_t is_coding_block_border;
867 
868   for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width; c++) {
869     const int mi_col = c << subsampling_x;
870     const int col = mi_col % MI_SIZE_64X64;
871     const int col_uv = col | subsampling_x;
872 
873     for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height;
874          r += (tx_size_high_unit[TX_64X64] >> subsampling_y)) {
875       const int mi_row = r << subsampling_y;
876       LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
877 
878       for (int r_in_unit = 0;
879            r_in_unit < (tx_size_high_unit[TX_64X64] >> subsampling_y);) {
880         const int y = (r + r_in_unit) << MI_SIZE_LOG2;
881         if (y >= plane_ptr->dst.height) break;
882         const int row = r_in_unit << subsampling_y;
883         const int row_uv = row | subsampling_y;
884         int index = 0;
885         const int shift = get_index_shift(col, row, &index);
886         const uint64_t mask = ((uint64_t)1 << shift);
887         skip = lfm->skip.bits[index] & mask;
888         is_coding_block_border = lfm->is_horz_border.bits[index] & mask;
889         switch (plane) {
890           case 0: level = lfm->lfl_y_hor[row_uv][col_uv]; break;
891           case 1: level = lfm->lfl_u_hor[row_uv][col_uv]; break;
892           case 2: level = lfm->lfl_v_hor[row_uv][col_uv]; break;
893           default: assert(plane >= 0 && plane <= 2); return;
894         }
895         for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
896           if (is_uv && ts == TX_64X64) continue;
897           if (lfm->tx_size_hor[is_uv][ts].bits[index] & mask) {
898             tx_size = ts;
899             break;
900           }
901         }
902         if ((r + r_in_unit > 0) && (level || prev_level) &&
903             (!prev_skip || !skip || is_coding_block_border)) {
904           const TX_SIZE min_tx_size =
905               AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
906           const int shift_1 = get_index_shift(col_uv, row_uv, &index);
907           const uint64_t mask_1 = ((uint64_t)1 << shift_1);
908 
909           switch (plane) {
910             case 0: lfm->above_y[min_tx_size].bits[index] |= mask_1; break;
911             case 1: lfm->above_u[min_tx_size].bits[index] |= mask_1; break;
912             case 2: lfm->above_v[min_tx_size].bits[index] |= mask_1; break;
913             default: assert(plane >= 0 && plane <= 2); return;
914           }
915           if (level == 0 && prev_level != 0) {
916             switch (plane) {
917               case 0: lfm->lfl_y_hor[row_uv][col_uv] = prev_level; break;
918               case 1: lfm->lfl_u_hor[row_uv][col_uv] = prev_level; break;
919               case 2: lfm->lfl_v_hor[row_uv][col_uv] = prev_level; break;
920               default: assert(plane >= 0 && plane <= 2); return;
921             }
922           }
923         }
924 
925         // update prev info
926         prev_level = level;
927         prev_skip = skip;
928         prev_tx_size = tx_size;
929         // advance
930         r_in_unit += tx_size_high_unit[tx_size];
931       }
932     }
933   }
934 }
935 
av1_filter_block_plane_bitmask_vert(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)936 void av1_filter_block_plane_bitmask_vert(
937     AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
938     int mi_row, int mi_col) {
939   struct buf_2d *const dst = &plane_ptr->dst;
940   uint8_t *const buf0 = dst->buf;
941   const int ssx = plane_ptr->subsampling_x;
942   const int ssy = plane_ptr->subsampling_y;
943   const int mask_cutoff = 0xffff;
944   const int row_step = 1 << ssy;
945   const int two_row_step = 2 << ssy;
946   const int row_stride = dst->stride << MI_SIZE_LOG2;
947   const int two_row_stride = row_stride << 1;
948   uint64_t mask_16x16 = 0;
949   uint64_t mask_8x8 = 0;
950   uint64_t mask_4x4 = 0;
951   uint8_t *lfl;
952   uint8_t *lfl2;
953   LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
954   assert(lfm);
955 
956   // 1. vertical filtering. filter two rows at a time
957   for (int r = 0;
958        ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
959        r += two_row_step) {
960     const int row = r | ssy;
961     const int row_next = row + row_step;
962     const int col = ssx;
963     int index = 0;
964     const int shift = get_index_shift(col, row, &index);
965     int index_next = 0;
966     const int shift_next = get_index_shift(col, row_next, &index_next);
967     const int has_next_row = row_next < cm->mi_params.mi_rows;
968     switch (pl) {
969       case 0:
970         mask_16x16 = lfm->left_y[TX_16X16].bits[index];
971         mask_8x8 = lfm->left_y[TX_8X8].bits[index];
972         mask_4x4 = lfm->left_y[TX_4X4].bits[index];
973         lfl = &lfm->lfl_y_ver[row][col];
974         lfl2 = &lfm->lfl_y_ver[row_next][col];
975         break;
976       case 1:
977         mask_16x16 = lfm->left_u[TX_16X16].bits[index];
978         mask_8x8 = lfm->left_u[TX_8X8].bits[index];
979         mask_4x4 = lfm->left_u[TX_4X4].bits[index];
980         lfl = &lfm->lfl_u_ver[row][col];
981         lfl2 = &lfm->lfl_u_ver[row_next][col];
982         break;
983       case 2:
984         mask_16x16 = lfm->left_v[TX_16X16].bits[index];
985         mask_8x8 = lfm->left_v[TX_8X8].bits[index];
986         mask_4x4 = lfm->left_v[TX_4X4].bits[index];
987         lfl = &lfm->lfl_v_ver[row][col];
988         lfl2 = &lfm->lfl_v_ver[row_next][col];
989         break;
990       default: assert(pl >= 0 && pl <= 2); return;
991     }
992     uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
993     uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
994     uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
995     uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
996     uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
997     uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
998     if (!has_next_row) {
999       mask_16x16_1 = 0;
1000       mask_8x8_1 = 0;
1001       mask_4x4_1 = 0;
1002     }
1003 
1004 #if CONFIG_AV1_HIGHBITDEPTH
1005     if (cm->seq_params.use_highbitdepth)
1006       highbd_filter_selectively_vert_row2(
1007           ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
1008           mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
1009           &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
1010     else
1011       filter_selectively_vert_row2(
1012           ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
1013           mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
1014 #else
1015     filter_selectively_vert_row2(
1016         ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
1017         mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
1018 #endif
1019     dst->buf += two_row_stride;
1020   }
1021   // reset buf pointer for horizontal filtering
1022   dst->buf = buf0;
1023 }
1024 
av1_filter_block_plane_bitmask_horz(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1025 void av1_filter_block_plane_bitmask_horz(
1026     AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
1027     int mi_row, int mi_col) {
1028   struct buf_2d *const dst = &plane_ptr->dst;
1029   uint8_t *const buf0 = dst->buf;
1030   const int ssx = plane_ptr->subsampling_x;
1031   const int ssy = plane_ptr->subsampling_y;
1032   const int mask_cutoff = 0xffff;
1033   const int row_step = 1 << ssy;
1034   const int row_stride = dst->stride << MI_SIZE_LOG2;
1035   uint64_t mask_16x16 = 0;
1036   uint64_t mask_8x8 = 0;
1037   uint64_t mask_4x4 = 0;
1038   uint8_t *lfl;
1039   LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1040   assert(lfm);
1041   for (int r = 0;
1042        ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
1043        r += row_step) {
1044     if (mi_row + r == 0) {
1045       dst->buf += row_stride;
1046       continue;
1047     }
1048     const int row = r | ssy;
1049     const int col = ssx;
1050     int index = 0;
1051     const int shift = get_index_shift(col, row, &index);
1052     switch (pl) {
1053       case 0:
1054         mask_16x16 = lfm->above_y[TX_16X16].bits[index];
1055         mask_8x8 = lfm->above_y[TX_8X8].bits[index];
1056         mask_4x4 = lfm->above_y[TX_4X4].bits[index];
1057         lfl = &lfm->lfl_y_hor[row][col];
1058         break;
1059       case 1:
1060         mask_16x16 = lfm->above_u[TX_16X16].bits[index];
1061         mask_8x8 = lfm->above_u[TX_8X8].bits[index];
1062         mask_4x4 = lfm->above_u[TX_4X4].bits[index];
1063         lfl = &lfm->lfl_u_hor[row][col];
1064         break;
1065       case 2:
1066         mask_16x16 = lfm->above_v[TX_16X16].bits[index];
1067         mask_8x8 = lfm->above_v[TX_8X8].bits[index];
1068         mask_4x4 = lfm->above_v[TX_4X4].bits[index];
1069         lfl = &lfm->lfl_v_hor[row][col];
1070         break;
1071       default: assert(pl >= 0 && pl <= 2); return;
1072     }
1073     mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
1074     mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
1075     mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
1076 
1077 #if CONFIG_AV1_HIGHBITDEPTH
1078     if (cm->seq_params.use_highbitdepth)
1079       highbd_filter_selectively_horiz(
1080           CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
1081           mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->seq_params.bit_depth);
1082     else
1083       filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1084                                mask_8x8, mask_4x4, &cm->lf_info, lfl);
1085 #else
1086     filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1087                              mask_8x8, mask_4x4, &cm->lf_info, lfl);
1088 #endif
1089     dst->buf += row_stride;
1090   }
1091   // reset buf pointer for next block
1092   dst->buf = buf0;
1093 }
1094 
av1_filter_block_plane_ver(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1095 void av1_filter_block_plane_ver(AV1_COMMON *const cm,
1096                                 struct macroblockd_plane *const plane_ptr,
1097                                 int pl, int mi_row, int mi_col) {
1098   struct buf_2d *const dst = &plane_ptr->dst;
1099   int r, c;
1100   const int ssx = plane_ptr->subsampling_x;
1101   const int ssy = plane_ptr->subsampling_y;
1102   const int mask_cutoff = 0xffff;
1103   const int single_step = 1 << ssy;
1104   const int r_step = 2 << ssy;
1105   uint64_t mask_16x16 = 0;
1106   uint64_t mask_8x8 = 0;
1107   uint64_t mask_4x4 = 0;
1108   uint8_t *lfl;
1109   uint8_t *lfl2;
1110 
1111   // filter two rows at a time
1112   for (r = 0; r < cm->seq_params.mib_size &&
1113               ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
1114        r += r_step) {
1115     for (c = 0; c < cm->seq_params.mib_size &&
1116                 ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
1117          c += MI_SIZE_64X64) {
1118       dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
1119       LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
1120       assert(lfm);
1121       const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
1122       const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
1123       int index = 0;
1124       const int shift = get_index_shift(col, row, &index);
1125       // current and next row should belong to the same mask_idx and index
1126       // next row's shift
1127       const int row_next = row + single_step;
1128       int index_next = 0;
1129       const int shift_next = get_index_shift(col, row_next, &index_next);
1130       switch (pl) {
1131         case 0:
1132           mask_16x16 = lfm->left_y[TX_16X16].bits[index];
1133           mask_8x8 = lfm->left_y[TX_8X8].bits[index];
1134           mask_4x4 = lfm->left_y[TX_4X4].bits[index];
1135           lfl = &lfm->lfl_y_ver[row][col];
1136           lfl2 = &lfm->lfl_y_ver[row_next][col];
1137           break;
1138         case 1:
1139           mask_16x16 = lfm->left_u[TX_16X16].bits[index];
1140           mask_8x8 = lfm->left_u[TX_8X8].bits[index];
1141           mask_4x4 = lfm->left_u[TX_4X4].bits[index];
1142           lfl = &lfm->lfl_u_ver[row][col];
1143           lfl2 = &lfm->lfl_u_ver[row_next][col];
1144           break;
1145         case 2:
1146           mask_16x16 = lfm->left_v[TX_16X16].bits[index];
1147           mask_8x8 = lfm->left_v[TX_8X8].bits[index];
1148           mask_4x4 = lfm->left_v[TX_4X4].bits[index];
1149           lfl = &lfm->lfl_v_ver[row][col];
1150           lfl2 = &lfm->lfl_v_ver[row_next][col];
1151           break;
1152         default: assert(pl >= 0 && pl <= 2); return;
1153       }
1154       uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
1155       uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
1156       uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
1157       uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
1158       uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
1159       uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
1160 
1161 #if CONFIG_AV1_HIGHBITDEPTH
1162       if (cm->seq_params.use_highbitdepth)
1163         highbd_filter_selectively_vert_row2(
1164             ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
1165             mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
1166             &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
1167       else
1168         filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl,
1169                                      mask_16x16_0, mask_8x8_0, mask_4x4_0,
1170                                      mask_16x16_1, mask_8x8_1, mask_4x4_1,
1171                                      &cm->lf_info, lfl, lfl2);
1172 #else
1173       filter_selectively_vert_row2(
1174           ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
1175           mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
1176 #endif
1177       dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
1178     }
1179     dst->buf += 2 * MI_SIZE * dst->stride;
1180   }
1181 }
1182 
av1_filter_block_plane_hor(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1183 void av1_filter_block_plane_hor(AV1_COMMON *const cm,
1184                                 struct macroblockd_plane *const plane_ptr,
1185                                 int pl, int mi_row, int mi_col) {
1186   struct buf_2d *const dst = &plane_ptr->dst;
1187   int r, c;
1188   const int ssx = plane_ptr->subsampling_x;
1189   const int ssy = plane_ptr->subsampling_y;
1190   const int mask_cutoff = 0xffff;
1191   const int r_step = 1 << ssy;
1192   uint64_t mask_16x16 = 0;
1193   uint64_t mask_8x8 = 0;
1194   uint64_t mask_4x4 = 0;
1195   uint8_t *lfl;
1196 
1197   for (r = 0; r < cm->seq_params.mib_size &&
1198               ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
1199        r += r_step) {
1200     for (c = 0; c < cm->seq_params.mib_size &&
1201                 ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
1202          c += MI_SIZE_64X64) {
1203       if (mi_row + r == 0) continue;
1204 
1205       dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
1206       LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
1207       assert(lfm);
1208       const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
1209       const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
1210       int index = 0;
1211       const int shift = get_index_shift(col, row, &index);
1212       switch (pl) {
1213         case 0:
1214           mask_16x16 = lfm->above_y[TX_16X16].bits[index];
1215           mask_8x8 = lfm->above_y[TX_8X8].bits[index];
1216           mask_4x4 = lfm->above_y[TX_4X4].bits[index];
1217           lfl = &lfm->lfl_y_hor[row][col];
1218           break;
1219         case 1:
1220           mask_16x16 = lfm->above_u[TX_16X16].bits[index];
1221           mask_8x8 = lfm->above_u[TX_8X8].bits[index];
1222           mask_4x4 = lfm->above_u[TX_4X4].bits[index];
1223           lfl = &lfm->lfl_u_hor[row][col];
1224           break;
1225         case 2:
1226           mask_16x16 = lfm->above_v[TX_16X16].bits[index];
1227           mask_8x8 = lfm->above_v[TX_8X8].bits[index];
1228           mask_4x4 = lfm->above_v[TX_4X4].bits[index];
1229           lfl = &lfm->lfl_v_hor[row][col];
1230           break;
1231         default: assert(pl >= 0 && pl <= 2); return;
1232       }
1233       mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
1234       mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
1235       mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
1236 
1237 #if CONFIG_AV1_HIGHBITDEPTH
1238       if (cm->seq_params.use_highbitdepth)
1239         highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
1240                                         dst->stride, pl, ssx, mask_16x16,
1241                                         mask_8x8, mask_4x4, &cm->lf_info, lfl,
1242                                         (int)cm->seq_params.bit_depth);
1243       else
1244         filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1245                                  mask_8x8, mask_4x4, &cm->lf_info, lfl);
1246 #else
1247       filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1248                                mask_8x8, mask_4x4, &cm->lf_info, lfl);
1249 #endif
1250       dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
1251     }
1252     dst->buf += MI_SIZE * dst->stride;
1253   }
1254 }
1255 
av1_store_bitmask_vartx(AV1_COMMON * cm,int mi_row,int mi_col,BLOCK_SIZE bsize,TX_SIZE tx_size,MB_MODE_INFO * mbmi)1256 void av1_store_bitmask_vartx(AV1_COMMON *cm, int mi_row, int mi_col,
1257                              BLOCK_SIZE bsize, TX_SIZE tx_size,
1258                              MB_MODE_INFO *mbmi) {
1259   LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1260   const TX_SIZE tx_size_y_vert = txsize_vert_map[tx_size];
1261   const TX_SIZE tx_size_y_horz = txsize_horz_map[tx_size];
1262   const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize(
1263       mbmi->sb_type, cm->seq_params.subsampling_x,
1264       cm->seq_params.subsampling_y)];
1265   const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize(
1266       mbmi->sb_type, cm->seq_params.subsampling_x,
1267       cm->seq_params.subsampling_y)];
1268   const int is_square_transform_size = tx_size <= TX_64X64;
1269   int mask_id = 0;
1270   int offset = 0;
1271   const int half_ratio_tx_size_max32 =
1272       (tx_size > TX_64X64) & (tx_size <= TX_32X16);
1273   if (is_square_transform_size) {
1274     switch (tx_size) {
1275       case TX_4X4: mask_id = mask_id_table_tx_4x4[bsize]; break;
1276       case TX_8X8:
1277         mask_id = mask_id_table_tx_8x8[bsize];
1278         offset = 19;
1279         break;
1280       case TX_16X16:
1281         mask_id = mask_id_table_tx_16x16[bsize];
1282         offset = 33;
1283         break;
1284       case TX_32X32:
1285         mask_id = mask_id_table_tx_32x32[bsize];
1286         offset = 42;
1287         break;
1288       case TX_64X64: mask_id = 46; break;
1289       default: assert(!is_square_transform_size); return;
1290     }
1291     mask_id += offset;
1292   } else if (half_ratio_tx_size_max32) {
1293     int tx_size_equal_block_size = bsize == txsize_to_bsize[tx_size];
1294     mask_id = 47 + 2 * (tx_size - TX_4X8) + (tx_size_equal_block_size ? 0 : 1);
1295   } else if (tx_size == TX_32X64) {
1296     mask_id = 59;
1297   } else if (tx_size == TX_64X32) {
1298     mask_id = 60;
1299   } else {  // quarter ratio tx size
1300     mask_id = 61 + (tx_size - TX_4X16);
1301   }
1302   int index = 0;
1303   const int row = mi_row % MI_SIZE_64X64;
1304   const int col = mi_col % MI_SIZE_64X64;
1305   const int shift = get_index_shift(col, row, &index);
1306   const int vert_shift = tx_size_y_vert <= TX_8X8 ? shift : col;
1307   for (int i = 0; i + index < 4; ++i) {
1308     // y vertical.
1309     lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |=
1310         (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
1311     // y horizontal.
1312     lfm->tx_size_hor[0][tx_size_y_vert].bits[i + index] |=
1313         (above_mask_univariant_reordered[mask_id].bits[i] << shift);
1314     // u/v vertical.
1315     lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |=
1316         (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
1317     // u/v horizontal.
1318     lfm->tx_size_hor[1][tx_size_uv_vert].bits[i + index] |=
1319         (above_mask_univariant_reordered[mask_id].bits[i] << shift);
1320   }
1321 }
1322 
av1_store_bitmask_univariant_tx(AV1_COMMON * cm,int mi_row,int mi_col,BLOCK_SIZE bsize,MB_MODE_INFO * mbmi)1323 void av1_store_bitmask_univariant_tx(AV1_COMMON *cm, int mi_row, int mi_col,
1324                                      BLOCK_SIZE bsize, MB_MODE_INFO *mbmi) {
1325   // Use a lookup table that provides one bitmask for a given block size and
1326   // a univariant transform size.
1327   int index;
1328   int shift;
1329   int row;
1330   int col;
1331   LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1332   const TX_SIZE tx_size_y_vert = txsize_vert_map[mbmi->tx_size];
1333   const TX_SIZE tx_size_y_horz = txsize_horz_map[mbmi->tx_size];
1334   const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize(
1335       mbmi->sb_type, cm->seq_params.subsampling_x,
1336       cm->seq_params.subsampling_y)];
1337   const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize(
1338       mbmi->sb_type, cm->seq_params.subsampling_x,
1339       cm->seq_params.subsampling_y)];
1340   const int is_square_transform_size = mbmi->tx_size <= TX_64X64;
1341   int mask_id = 0;
1342   int offset = 0;
1343   const int half_ratio_tx_size_max32 =
1344       (mbmi->tx_size > TX_64X64) & (mbmi->tx_size <= TX_32X16);
1345   if (is_square_transform_size) {
1346     switch (mbmi->tx_size) {
1347       case TX_4X4: mask_id = mask_id_table_tx_4x4[bsize]; break;
1348       case TX_8X8:
1349         mask_id = mask_id_table_tx_8x8[bsize];
1350         offset = 19;
1351         break;
1352       case TX_16X16:
1353         mask_id = mask_id_table_tx_16x16[bsize];
1354         offset = 33;
1355         break;
1356       case TX_32X32:
1357         mask_id = mask_id_table_tx_32x32[bsize];
1358         offset = 42;
1359         break;
1360       case TX_64X64: mask_id = 46; break;
1361       default: assert(!is_square_transform_size); return;
1362     }
1363     mask_id += offset;
1364   } else if (half_ratio_tx_size_max32) {
1365     int tx_size_equal_block_size = bsize == txsize_to_bsize[mbmi->tx_size];
1366     mask_id =
1367         47 + 2 * (mbmi->tx_size - TX_4X8) + (tx_size_equal_block_size ? 0 : 1);
1368   } else if (mbmi->tx_size == TX_32X64) {
1369     mask_id = 59;
1370   } else if (mbmi->tx_size == TX_64X32) {
1371     mask_id = 60;
1372   } else {  // quarter ratio tx size
1373     mask_id = 61 + (mbmi->tx_size - TX_4X16);
1374   }
1375   row = mi_row % MI_SIZE_64X64;
1376   col = mi_col % MI_SIZE_64X64;
1377   shift = get_index_shift(col, row, &index);
1378   const int vert_shift = tx_size_y_vert <= TX_8X8 ? shift : col;
1379   for (int i = 0; i + index < 4; ++i) {
1380     // y vertical.
1381     lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |=
1382         (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
1383     // y horizontal.
1384     lfm->tx_size_hor[0][tx_size_y_vert].bits[i + index] |=
1385         (above_mask_univariant_reordered[mask_id].bits[i] << shift);
1386     // u/v vertical.
1387     lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |=
1388         (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift);
1389     // u/v horizontal.
1390     lfm->tx_size_hor[1][tx_size_uv_vert].bits[i + index] |=
1391         (above_mask_univariant_reordered[mask_id].bits[i] << shift);
1392   }
1393 }
1394 
av1_store_bitmask_other_info(AV1_COMMON * cm,int mi_row,int mi_col,BLOCK_SIZE bsize,MB_MODE_INFO * mbmi,int is_horz_coding_block_border,int is_vert_coding_block_border)1395 void av1_store_bitmask_other_info(AV1_COMMON *cm, int mi_row, int mi_col,
1396                                   BLOCK_SIZE bsize, MB_MODE_INFO *mbmi,
1397                                   int is_horz_coding_block_border,
1398                                   int is_vert_coding_block_border) {
1399   int index;
1400   int shift;
1401   int row;
1402   LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1403   const int row_start = mi_row % MI_SIZE_64X64;
1404   const int col_start = mi_col % MI_SIZE_64X64;
1405   shift = get_index_shift(col_start, row_start, &index);
1406   if (is_horz_coding_block_border) {
1407     const int block_shift = shift + mi_size_wide[bsize];
1408     assert(block_shift <= 64);
1409     const uint64_t right_edge_shift =
1410         (block_shift == 64) ? 0xffffffffffffffff : ((uint64_t)1 << block_shift);
1411     const uint64_t left_edge_shift = (block_shift == 64)
1412                                          ? (((uint64_t)1 << shift) - 1)
1413                                          : ((uint64_t)1 << shift);
1414     assert(right_edge_shift > left_edge_shift);
1415     const uint64_t top_edge_mask = right_edge_shift - left_edge_shift;
1416     lfm->is_horz_border.bits[index] |= top_edge_mask;
1417   }
1418   if (is_vert_coding_block_border) {
1419     const int is_vert_border = mask_id_table_vert_border[bsize];
1420     const int vert_shift = block_size_high[bsize] <= 8 ? shift : col_start;
1421     for (int i = 0; i + index < 4; ++i) {
1422       lfm->is_vert_border.bits[i + index] |=
1423           (left_mask_univariant_reordered[is_vert_border].bits[i]
1424            << vert_shift);
1425     }
1426   }
1427   const int is_skip = mbmi->skip && is_inter_block(mbmi);
1428   if (is_skip) {
1429     const int is_skip_mask = mask_id_table_tx_4x4[bsize];
1430     for (int i = 0; i + index < 4; ++i) {
1431       lfm->skip.bits[i + index] |=
1432           (above_mask_univariant_reordered[is_skip_mask].bits[i] << shift);
1433     }
1434   }
1435   const uint8_t level_vert_y =
1436       av1_get_filter_level(cm, &cm->lf_info, 0, 0, mbmi);
1437   const uint8_t level_horz_y =
1438       av1_get_filter_level(cm, &cm->lf_info, 1, 0, mbmi);
1439   const uint8_t level_u = av1_get_filter_level(cm, &cm->lf_info, 0, 1, mbmi);
1440   const uint8_t level_v = av1_get_filter_level(cm, &cm->lf_info, 0, 2, mbmi);
1441   for (int r = mi_row; r < mi_row + mi_size_high[bsize]; r++) {
1442     index = 0;
1443     row = r % MI_SIZE_64X64;
1444     memset(&lfm->lfl_y_ver[row][col_start], level_vert_y,
1445            sizeof(uint8_t) * mi_size_wide[bsize]);
1446     memset(&lfm->lfl_y_hor[row][col_start], level_horz_y,
1447            sizeof(uint8_t) * mi_size_wide[bsize]);
1448     memset(&lfm->lfl_u_ver[row][col_start], level_u,
1449            sizeof(uint8_t) * mi_size_wide[bsize]);
1450     memset(&lfm->lfl_u_hor[row][col_start], level_u,
1451            sizeof(uint8_t) * mi_size_wide[bsize]);
1452     memset(&lfm->lfl_v_ver[row][col_start], level_v,
1453            sizeof(uint8_t) * mi_size_wide[bsize]);
1454     memset(&lfm->lfl_v_hor[row][col_start], level_v,
1455            sizeof(uint8_t) * mi_size_wide[bsize]);
1456   }
1457 }
1458 #endif  // CONFIG_LPF_MASK
1459