1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/loop_filter.h"
16
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21
22 #include "src/dsp/dsp.h"
23 #include "src/utils/common.h"
24
25 namespace libgav1 {
26 namespace dsp {
27 namespace {
28
29 // 7.14.6.1.
30 template <int bitdepth, typename Pixel>
31 struct LoopFilterFuncs_C {
32 LoopFilterFuncs_C() = delete;
33
34 static constexpr int kMaxPixel = (1 << bitdepth) - 1;
35 static constexpr int kMinSignedPixel = -(1 << (bitdepth - 1));
36 static constexpr int kMaxSignedPixel = (1 << (bitdepth - 1)) - 1;
37 static constexpr int kFlatThresh = 1 << (bitdepth - 8);
38
39 static void Vertical4(void* dest, ptrdiff_t stride, int outer_thresh,
40 int inner_thresh, int hev_thresh);
41 static void Horizontal4(void* dest, ptrdiff_t stride, int outer_thresh,
42 int inner_thresh, int hev_thresh);
43 static void Vertical6(void* dest, ptrdiff_t stride, int outer_thresh,
44 int inner_thresh, int hev_thresh);
45 static void Horizontal6(void* dest, ptrdiff_t stride, int outer_thresh,
46 int inner_thresh, int hev_thresh);
47 static void Vertical8(void* dest, ptrdiff_t stride, int outer_thresh,
48 int inner_thresh, int hev_thresh);
49 static void Horizontal8(void* dest, ptrdiff_t stride, int outer_thresh,
50 int inner_thresh, int hev_thresh);
51 static void Vertical14(void* dest, ptrdiff_t stride, int outer_thresh,
52 int inner_thresh, int hev_thresh);
53 static void Horizontal14(void* dest, ptrdiff_t stride, int outer_thresh,
54 int inner_thresh, int hev_thresh);
55 };
56
AdjustThresholds(const int bitdepth,int * const outer_thresh,int * const inner_thresh,int * const hev_thresh)57 inline void AdjustThresholds(const int bitdepth, int* const outer_thresh,
58 int* const inner_thresh, int* const hev_thresh) {
59 assert(*outer_thresh >= 7 && *outer_thresh <= 3 * kMaxLoopFilterValue + 4);
60 assert(*inner_thresh >= 1 && *inner_thresh <= kMaxLoopFilterValue);
61 assert(*hev_thresh >= 0 && *hev_thresh <= 3);
62 *outer_thresh <<= bitdepth - 8;
63 *inner_thresh <<= bitdepth - 8;
64 *hev_thresh <<= bitdepth - 8;
65 }
66
67 //------------------------------------------------------------------------------
68 // 4-tap filters
69
70 // 7.14.6.2.
71 template <typename Pixel>
NeedsFilter4(const Pixel * p,ptrdiff_t step,int outer_thresh,int inner_thresh)72 inline bool NeedsFilter4(const Pixel* p, ptrdiff_t step, int outer_thresh,
73 int inner_thresh) {
74 const int p1 = p[-2 * step], p0 = p[-step];
75 const int q0 = p[0], q1 = p[step];
76 return std::abs(p1 - p0) <= inner_thresh &&
77 std::abs(q1 - q0) <= inner_thresh &&
78 std::abs(p0 - q0) * 2 + std::abs(p1 - q1) / 2 <= outer_thresh;
79 }
80
81 // 7.14.6.2.
82 template <typename Pixel>
Hev(const Pixel * p,ptrdiff_t step,int thresh)83 inline bool Hev(const Pixel* p, ptrdiff_t step, int thresh) {
84 const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
85 return (std::abs(p1 - p0) > thresh) || (std::abs(q1 - q0) > thresh);
86 }
87
88 // 7.14.6.3.
89 // 4 pixels in, 2 pixels out.
90 template <int bitdepth, typename Pixel>
Filter2_C(Pixel * p,ptrdiff_t step)91 inline void Filter2_C(Pixel* p, ptrdiff_t step) {
92 const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
93 const int min_signed_val =
94 LoopFilterFuncs_C<bitdepth, Pixel>::kMinSignedPixel;
95 const int max_signed_val =
96 LoopFilterFuncs_C<bitdepth, Pixel>::kMaxSignedPixel;
97 // 8bpp: [-893,892], 10bpp: [-3581,3580], 12bpp [-14333,14332]
98 const int a = 3 * (q0 - p0) + Clip3(p1 - q1, min_signed_val, max_signed_val);
99 // 8bpp: [-16,15], 10bpp: [-64,63], 12bpp: [-256,255]
100 const int a1 = Clip3(a + 4, min_signed_val, max_signed_val) >> 3;
101 const int a2 = Clip3(a + 3, min_signed_val, max_signed_val) >> 3;
102 const int max_unsigned_val = LoopFilterFuncs_C<bitdepth, Pixel>::kMaxPixel;
103 p[-step] = Clip3(p0 + a2, 0, max_unsigned_val);
104 p[0] = Clip3(q0 - a1, 0, max_unsigned_val);
105 }
106
107 // 7.14.6.3.
108 // 4 pixels in, 4 pixels out.
109 template <int bitdepth, typename Pixel>
Filter4_C(Pixel * p,ptrdiff_t step)110 inline void Filter4_C(Pixel* p, ptrdiff_t step) {
111 const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
112 const int a = 3 * (q0 - p0);
113 const int min_signed_val =
114 LoopFilterFuncs_C<bitdepth, Pixel>::kMinSignedPixel;
115 const int max_signed_val =
116 LoopFilterFuncs_C<bitdepth, Pixel>::kMaxSignedPixel;
117 const int a1 = Clip3(a + 4, min_signed_val, max_signed_val) >> 3;
118 const int a2 = Clip3(a + 3, min_signed_val, max_signed_val) >> 3;
119 const int a3 = (a1 + 1) >> 1;
120 const int max_unsigned_val = LoopFilterFuncs_C<bitdepth, Pixel>::kMaxPixel;
121 p[-2 * step] = Clip3(p1 + a3, 0, max_unsigned_val);
122 p[-1 * step] = Clip3(p0 + a2, 0, max_unsigned_val);
123 p[0 * step] = Clip3(q0 - a1, 0, max_unsigned_val);
124 p[1 * step] = Clip3(q1 - a3, 0, max_unsigned_val);
125 }
126
127 template <int bitdepth, typename Pixel>
Vertical4(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)128 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical4(void* dest, ptrdiff_t stride,
129 int outer_thresh,
130 int inner_thresh,
131 int hev_thresh) {
132 AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
133 auto* dst = static_cast<Pixel*>(dest);
134 stride /= sizeof(Pixel);
135 for (int i = 0; i < 4; ++i) {
136 if (NeedsFilter4(dst, 1, outer_thresh, inner_thresh)) {
137 if (Hev(dst, 1, hev_thresh)) {
138 Filter2_C<bitdepth>(dst, 1);
139 } else {
140 Filter4_C<bitdepth>(dst, 1);
141 }
142 }
143 dst += stride;
144 }
145 }
146
147 template <int bitdepth, typename Pixel>
Horizontal4(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)148 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal4(void* dest,
149 ptrdiff_t stride,
150 int outer_thresh,
151 int inner_thresh,
152 int hev_thresh) {
153 AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
154 auto* dst = static_cast<Pixel*>(dest);
155 stride /= sizeof(Pixel);
156 for (int i = 0; i < 4; ++i) {
157 if (NeedsFilter4(dst, stride, outer_thresh, inner_thresh)) {
158 if (Hev(dst, stride, hev_thresh)) {
159 Filter2_C<bitdepth>(dst, stride);
160 } else {
161 Filter4_C<bitdepth>(dst, stride);
162 }
163 }
164 ++dst;
165 }
166 }
167
168 //------------------------------------------------------------------------------
169 // 5-tap (chroma) filters
170
171 // 7.14.6.2.
172 template <typename Pixel>
NeedsFilter6(const Pixel * p,ptrdiff_t step,int outer_thresh,int inner_thresh)173 inline bool NeedsFilter6(const Pixel* p, ptrdiff_t step, int outer_thresh,
174 int inner_thresh) {
175 const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
176 const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
177 return std::abs(p2 - p1) <= inner_thresh &&
178 std::abs(p1 - p0) <= inner_thresh &&
179 std::abs(q1 - q0) <= inner_thresh &&
180 std::abs(q2 - q1) <= inner_thresh &&
181 std::abs(p0 - q0) * 2 + std::abs(p1 - q1) / 2 <= outer_thresh;
182 }
183
184 // 7.14.6.2.
185 template <typename Pixel>
IsFlat3(const Pixel * p,ptrdiff_t step,int flat_thresh)186 inline bool IsFlat3(const Pixel* p, ptrdiff_t step, int flat_thresh) {
187 const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
188 const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
189 return std::abs(p1 - p0) <= flat_thresh && std::abs(q1 - q0) <= flat_thresh &&
190 std::abs(p2 - p0) <= flat_thresh && std::abs(q2 - q0) <= flat_thresh;
191 }
192
193 template <typename Pixel>
ApplyFilter6(int filter_value)194 inline Pixel ApplyFilter6(int filter_value) {
195 return static_cast<Pixel>(RightShiftWithRounding(filter_value, 3));
196 }
197
198 // 7.14.6.4.
199 // 6 pixels in, 4 pixels out.
200 template <typename Pixel>
Filter6_C(Pixel * p,ptrdiff_t step)201 inline void Filter6_C(Pixel* p, ptrdiff_t step) {
202 const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
203 const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
204 const int a1 = 2 * p1;
205 const int a0 = 2 * p0;
206 const int b0 = 2 * q0;
207 const int b1 = 2 * q1;
208 // The max is 8 * max_pixel + 4 for the rounder.
209 // 8bpp: 2044 (11 bits), 10bpp: 8188 (13 bits), 12bpp: 32764 (15 bits)
210 p[-2 * step] = ApplyFilter6<Pixel>(3 * p2 + a1 + a0 + q0);
211 p[-1 * step] = ApplyFilter6<Pixel>(p2 + a1 + a0 + b0 + q1);
212 p[0 * step] = ApplyFilter6<Pixel>(p1 + a0 + b0 + b1 + q2);
213 p[1 * step] = ApplyFilter6<Pixel>(p0 + b0 + b1 + 3 * q2);
214 }
215
216 template <int bitdepth, typename Pixel>
Vertical6(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)217 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical6(void* dest, ptrdiff_t stride,
218 int outer_thresh,
219 int inner_thresh,
220 int hev_thresh) {
221 const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
222 AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
223 auto* dst = static_cast<Pixel*>(dest);
224 stride /= sizeof(Pixel);
225 for (int i = 0; i < 4; ++i) {
226 if (NeedsFilter6(dst, 1, outer_thresh, inner_thresh)) {
227 if (IsFlat3(dst, 1, flat_thresh)) {
228 Filter6_C(dst, 1);
229 } else if (Hev(dst, 1, hev_thresh)) {
230 Filter2_C<bitdepth>(dst, 1);
231 } else {
232 Filter4_C<bitdepth>(dst, 1);
233 }
234 }
235 dst += stride;
236 }
237 }
238
239 template <int bitdepth, typename Pixel>
Horizontal6(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)240 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal6(void* dest,
241 ptrdiff_t stride,
242 int outer_thresh,
243 int inner_thresh,
244 int hev_thresh) {
245 const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
246 AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
247 auto* dst = static_cast<Pixel*>(dest);
248 stride /= sizeof(Pixel);
249 for (int i = 0; i < 4; ++i) {
250 if (NeedsFilter6(dst, stride, outer_thresh, inner_thresh)) {
251 if (IsFlat3(dst, stride, flat_thresh)) {
252 Filter6_C(dst, stride);
253 } else if (Hev(dst, stride, hev_thresh)) {
254 Filter2_C<bitdepth>(dst, stride);
255 } else {
256 Filter4_C<bitdepth>(dst, stride);
257 }
258 }
259 ++dst;
260 }
261 }
262
263 //------------------------------------------------------------------------------
264 // 7-tap filters
265
266 // 7.14.6.2.
267 template <typename Pixel>
NeedsFilter8(const Pixel * p,ptrdiff_t step,int outer_thresh,int inner_thresh)268 inline bool NeedsFilter8(const Pixel* p, ptrdiff_t step, int outer_thresh,
269 int inner_thresh) {
270 const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
271 p0 = p[-step];
272 const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
273 return std::abs(p3 - p2) <= inner_thresh &&
274 std::abs(p2 - p1) <= inner_thresh &&
275 std::abs(p1 - p0) <= inner_thresh &&
276 std::abs(q1 - q0) <= inner_thresh &&
277 std::abs(q2 - q1) <= inner_thresh &&
278 std::abs(q3 - q2) <= inner_thresh &&
279 std::abs(p0 - q0) * 2 + std::abs(p1 - q1) / 2 <= outer_thresh;
280 }
281
282 // 7.14.6.2.
283 template <typename Pixel>
IsFlat4(const Pixel * p,ptrdiff_t step,int flat_thresh)284 inline bool IsFlat4(const Pixel* p, ptrdiff_t step, int flat_thresh) {
285 const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
286 p0 = p[-step];
287 const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
288 return std::abs(p1 - p0) <= flat_thresh && std::abs(q1 - q0) <= flat_thresh &&
289 std::abs(p2 - p0) <= flat_thresh && std::abs(q2 - q0) <= flat_thresh &&
290 std::abs(p3 - p0) <= flat_thresh && std::abs(q3 - q0) <= flat_thresh;
291 }
292
293 template <typename Pixel>
ApplyFilter8(int filter_value)294 inline Pixel ApplyFilter8(int filter_value) {
295 return static_cast<Pixel>(RightShiftWithRounding(filter_value, 3));
296 }
297
298 // 7.14.6.4.
299 // 8 pixels in, 6 pixels out.
300 template <typename Pixel>
Filter8_C(Pixel * p,ptrdiff_t step)301 inline void Filter8_C(Pixel* p, ptrdiff_t step) {
302 const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
303 p0 = p[-step];
304 const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
305 // The max is 8 * max_pixel + 4 for the rounder.
306 // 8bpp: 2044 (11 bits), 10bpp: 8188 (13 bits), 12bpp: 32764 (15 bits)
307 p[-3 * step] = ApplyFilter8<Pixel>(3 * p3 + 2 * p2 + p1 + p0 + q0);
308 p[-2 * step] = ApplyFilter8<Pixel>(2 * p3 + p2 + 2 * p1 + p0 + q0 + q1);
309 p[-1 * step] = ApplyFilter8<Pixel>(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2);
310 p[0 * step] = ApplyFilter8<Pixel>(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3);
311 p[1 * step] = ApplyFilter8<Pixel>(p1 + p0 + q0 + 2 * q1 + q2 + 2 * q3);
312 p[2 * step] = ApplyFilter8<Pixel>(p0 + q0 + q1 + 2 * q2 + 3 * q3);
313 }
314
315 template <int bitdepth, typename Pixel>
Vertical8(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)316 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical8(void* dest, ptrdiff_t stride,
317 int outer_thresh,
318 int inner_thresh,
319 int hev_thresh) {
320 const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
321 AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
322 auto* dst = static_cast<Pixel*>(dest);
323 stride /= sizeof(Pixel);
324 for (int i = 0; i < 4; ++i) {
325 if (NeedsFilter8(dst, 1, outer_thresh, inner_thresh)) {
326 if (IsFlat4(dst, 1, flat_thresh)) {
327 Filter8_C(dst, 1);
328 } else if (Hev(dst, 1, hev_thresh)) {
329 Filter2_C<bitdepth>(dst, 1);
330 } else {
331 Filter4_C<bitdepth>(dst, 1);
332 }
333 }
334 dst += stride;
335 }
336 }
337
338 template <int bitdepth, typename Pixel>
Horizontal8(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)339 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal8(void* dest,
340 ptrdiff_t stride,
341 int outer_thresh,
342 int inner_thresh,
343 int hev_thresh) {
344 const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
345 AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
346 auto* dst = static_cast<Pixel*>(dest);
347 stride /= sizeof(Pixel);
348 for (int i = 0; i < 4; ++i) {
349 if (NeedsFilter8(dst, stride, outer_thresh, inner_thresh)) {
350 if (IsFlat4(dst, stride, flat_thresh)) {
351 Filter8_C(dst, stride);
352 } else if (Hev(dst, stride, hev_thresh)) {
353 Filter2_C<bitdepth>(dst, stride);
354 } else {
355 Filter4_C<bitdepth>(dst, stride);
356 }
357 }
358 ++dst;
359 }
360 }
361
362 //------------------------------------------------------------------------------
363 // 13-tap filters
364
365 // 7.14.6.2.
366 template <typename Pixel>
IsFlatOuter4(const Pixel * p,ptrdiff_t step,int flat_thresh)367 inline bool IsFlatOuter4(const Pixel* p, ptrdiff_t step, int flat_thresh) {
368 const int p6 = p[-7 * step], p5 = p[-6 * step], p4 = p[-5 * step],
369 p0 = p[-step];
370 const int q0 = p[0], q4 = p[4 * step], q5 = p[5 * step], q6 = p[6 * step];
371 return std::abs(p4 - p0) <= flat_thresh && std::abs(q4 - q0) <= flat_thresh &&
372 std::abs(p5 - p0) <= flat_thresh && std::abs(q5 - q0) <= flat_thresh &&
373 std::abs(p6 - p0) <= flat_thresh && std::abs(q6 - q0) <= flat_thresh;
374 }
375
376 template <typename Pixel>
ApplyFilter14(int filter_value)377 inline Pixel ApplyFilter14(int filter_value) {
378 return static_cast<Pixel>(RightShiftWithRounding(filter_value, 4));
379 }
380
381 // 7.14.6.4.
382 // 14 pixels in, 12 pixels out.
383 template <typename Pixel>
Filter14_C(Pixel * p,ptrdiff_t step)384 inline void Filter14_C(Pixel* p, ptrdiff_t step) {
385 const int p6 = p[-7 * step], p5 = p[-6 * step], p4 = p[-5 * step],
386 p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
387 p0 = p[-step];
388 const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step],
389 q4 = p[4 * step], q5 = p[5 * step], q6 = p[6 * step];
390 // The max is 16 * max_pixel + 8 for the rounder.
391 // 8bpp: 4088 (12 bits), 10bpp: 16376 (14 bits), 12bpp: 65528 (16 bits)
392 p[-6 * step] =
393 ApplyFilter14<Pixel>(p6 * 7 + p5 * 2 + p4 * 2 + p3 + p2 + p1 + p0 + q0);
394 p[-5 * step] = ApplyFilter14<Pixel>(p6 * 5 + p5 * 2 + p4 * 2 + p3 * 2 + p2 +
395 p1 + p0 + q0 + q1);
396 p[-4 * step] = ApplyFilter14<Pixel>(p6 * 4 + p5 + p4 * 2 + p3 * 2 + p2 * 2 +
397 p1 + p0 + q0 + q1 + q2);
398 p[-3 * step] = ApplyFilter14<Pixel>(p6 * 3 + p5 + p4 + p3 * 2 + p2 * 2 +
399 p1 * 2 + p0 + q0 + q1 + q2 + q3);
400 p[-2 * step] = ApplyFilter14<Pixel>(p6 * 2 + p5 + p4 + p3 + p2 * 2 + p1 * 2 +
401 p0 * 2 + q0 + q1 + q2 + q3 + q4);
402 p[-1 * step] = ApplyFilter14<Pixel>(p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
403 q0 * 2 + q1 + q2 + q3 + q4 + q5);
404 p[0 * step] = ApplyFilter14<Pixel>(p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
405 q1 * 2 + q2 + q3 + q4 + q5 + q6);
406 p[1 * step] = ApplyFilter14<Pixel>(p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
407 q2 * 2 + q3 + q4 + q5 + q6 * 2);
408 p[2 * step] = ApplyFilter14<Pixel>(p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 +
409 q3 * 2 + q4 + q5 + q6 * 3);
410 p[3 * step] = ApplyFilter14<Pixel>(p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 +
411 q4 * 2 + q5 + q6 * 4);
412 p[4 * step] = ApplyFilter14<Pixel>(p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 +
413 q5 * 2 + q6 * 5);
414 p[5 * step] =
415 ApplyFilter14<Pixel>(p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 + q6 * 7);
416 }
417
418 template <int bitdepth, typename Pixel>
Vertical14(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)419 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical14(void* dest,
420 ptrdiff_t stride,
421 int outer_thresh,
422 int inner_thresh,
423 int hev_thresh) {
424 const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
425 AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
426 auto* dst = static_cast<Pixel*>(dest);
427 stride /= sizeof(Pixel);
428 for (int i = 0; i < 4; ++i) {
429 if (NeedsFilter8(dst, 1, outer_thresh, inner_thresh)) {
430 if (IsFlat4(dst, 1, flat_thresh)) {
431 if (IsFlatOuter4(dst, 1, flat_thresh)) {
432 Filter14_C(dst, 1);
433 } else {
434 Filter8_C(dst, 1);
435 }
436 } else if (Hev(dst, 1, hev_thresh)) {
437 Filter2_C<bitdepth>(dst, 1);
438 } else {
439 Filter4_C<bitdepth>(dst, 1);
440 }
441 }
442 dst += stride;
443 }
444 }
445
446 template <int bitdepth, typename Pixel>
Horizontal14(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)447 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal14(void* dest,
448 ptrdiff_t stride,
449 int outer_thresh,
450 int inner_thresh,
451 int hev_thresh) {
452 const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
453 AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
454 auto* dst = static_cast<Pixel*>(dest);
455 stride /= sizeof(Pixel);
456 for (int i = 0; i < 4; ++i) {
457 if (NeedsFilter8(dst, stride, outer_thresh, inner_thresh)) {
458 if (IsFlat4(dst, stride, flat_thresh)) {
459 if (IsFlatOuter4(dst, stride, flat_thresh)) {
460 Filter14_C(dst, stride);
461 } else {
462 Filter8_C(dst, stride);
463 }
464 } else if (Hev(dst, stride, hev_thresh)) {
465 Filter2_C<bitdepth>(dst, stride);
466 } else {
467 Filter4_C<bitdepth>(dst, stride);
468 }
469 }
470 ++dst;
471 }
472 }
473
474 using Defs8bpp = LoopFilterFuncs_C<8, uint8_t>;
475
Init8bpp()476 void Init8bpp() {
477 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
478 assert(dsp != nullptr);
479 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
480 dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
481 Defs8bpp::Horizontal4;
482 dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
483 Defs8bpp::Vertical4;
484
485 dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
486 Defs8bpp::Horizontal6;
487 dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
488 Defs8bpp::Vertical6;
489
490 dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
491 Defs8bpp::Horizontal8;
492 dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
493 Defs8bpp::Vertical8;
494
495 dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
496 Defs8bpp::Horizontal14;
497 dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
498 Defs8bpp::Vertical14;
499 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
500 static_cast<void>(dsp);
501 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize4_LoopFilterTypeHorizontal
502 dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
503 Defs8bpp::Horizontal4;
504 #endif
505 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize4_LoopFilterTypeVertical
506 dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
507 Defs8bpp::Vertical4;
508 #endif
509
510 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize6_LoopFilterTypeHorizontal
511 dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
512 Defs8bpp::Horizontal6;
513 #endif
514 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize6_LoopFilterTypeVertical
515 dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
516 Defs8bpp::Vertical6;
517 #endif
518
519 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize8_LoopFilterTypeHorizontal
520 dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
521 Defs8bpp::Horizontal8;
522 #endif
523 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize8_LoopFilterTypeVertical
524 dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
525 Defs8bpp::Vertical8;
526 #endif
527
528 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize14_LoopFilterTypeHorizontal
529 dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
530 Defs8bpp::Horizontal14;
531 #endif
532 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize14_LoopFilterTypeVertical
533 dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
534 Defs8bpp::Vertical14;
535 #endif
536 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
537 }
538
539 #if LIBGAV1_MAX_BITDEPTH >= 10
540 using Defs10bpp = LoopFilterFuncs_C<10, uint16_t>;
541
Init10bpp()542 void Init10bpp() {
543 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
544 assert(dsp != nullptr);
545 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
546 dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
547 Defs10bpp::Horizontal4;
548 dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
549 Defs10bpp::Vertical4;
550
551 dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
552 Defs10bpp::Horizontal6;
553 dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
554 Defs10bpp::Vertical6;
555
556 dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
557 Defs10bpp::Horizontal8;
558 dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
559 Defs10bpp::Vertical8;
560
561 dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
562 Defs10bpp::Horizontal14;
563 dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
564 Defs10bpp::Vertical14;
565 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
566 static_cast<void>(dsp);
567 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize4_LoopFilterTypeHorizontal
568 dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
569 Defs10bpp::Horizontal4;
570 #endif
571 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize4_LoopFilterTypeVertical
572 dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
573 Defs10bpp::Vertical4;
574 #endif
575
576 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize6_LoopFilterTypeHorizontal
577 dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
578 Defs10bpp::Horizontal6;
579 #endif
580 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize6_LoopFilterTypeVertical
581 dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
582 Defs10bpp::Vertical6;
583 #endif
584
585 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize8_LoopFilterTypeHorizontal
586 dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
587 Defs10bpp::Horizontal8;
588 #endif
589 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize8_LoopFilterTypeVertical
590 dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
591 Defs10bpp::Vertical8;
592 #endif
593
594 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize14_LoopFilterTypeHorizontal
595 dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
596 Defs10bpp::Horizontal14;
597 #endif
598 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize14_LoopFilterTypeVertical
599 dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
600 Defs10bpp::Vertical14;
601 #endif
602 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
603 }
604 #endif // LIBGAV1_MAX_BITDEPTH >= 10
605
606 } // namespace
607
LoopFilterInit_C()608 void LoopFilterInit_C() {
609 Init8bpp();
610 #if LIBGAV1_MAX_BITDEPTH >= 10
611 Init10bpp();
612 #endif
613 // Local functions that may be unused depending on the optimizations
614 // available.
615 static_cast<void>(AdjustThresholds);
616 }
617
618 } // namespace dsp
619 } // namespace libgav1
620