• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/loop_filter.h"
16 
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 
22 #include "src/dsp/dsp.h"
23 #include "src/utils/common.h"
24 
25 namespace libgav1 {
26 namespace dsp {
27 namespace {
28 
29 // 7.14.6.1.
30 template <int bitdepth, typename Pixel>
31 struct LoopFilterFuncs_C {
32   LoopFilterFuncs_C() = delete;
33 
34   static constexpr int kMaxPixel = (1 << bitdepth) - 1;
35   static constexpr int kMinSignedPixel = -(1 << (bitdepth - 1));
36   static constexpr int kMaxSignedPixel = (1 << (bitdepth - 1)) - 1;
37   static constexpr int kFlatThresh = 1 << (bitdepth - 8);
38 
39   static void Vertical4(void* dest, ptrdiff_t stride, int outer_thresh,
40                         int inner_thresh, int hev_thresh);
41   static void Horizontal4(void* dest, ptrdiff_t stride, int outer_thresh,
42                           int inner_thresh, int hev_thresh);
43   static void Vertical6(void* dest, ptrdiff_t stride, int outer_thresh,
44                         int inner_thresh, int hev_thresh);
45   static void Horizontal6(void* dest, ptrdiff_t stride, int outer_thresh,
46                           int inner_thresh, int hev_thresh);
47   static void Vertical8(void* dest, ptrdiff_t stride, int outer_thresh,
48                         int inner_thresh, int hev_thresh);
49   static void Horizontal8(void* dest, ptrdiff_t stride, int outer_thresh,
50                           int inner_thresh, int hev_thresh);
51   static void Vertical14(void* dest, ptrdiff_t stride, int outer_thresh,
52                          int inner_thresh, int hev_thresh);
53   static void Horizontal14(void* dest, ptrdiff_t stride, int outer_thresh,
54                            int inner_thresh, int hev_thresh);
55 };
56 
AdjustThresholds(const int bitdepth,int * const outer_thresh,int * const inner_thresh,int * const hev_thresh)57 inline void AdjustThresholds(const int bitdepth, int* const outer_thresh,
58                              int* const inner_thresh, int* const hev_thresh) {
59   assert(*outer_thresh >= 7 && *outer_thresh <= 3 * kMaxLoopFilterValue + 4);
60   assert(*inner_thresh >= 1 && *inner_thresh <= kMaxLoopFilterValue);
61   assert(*hev_thresh >= 0 && *hev_thresh <= 3);
62   *outer_thresh <<= bitdepth - 8;
63   *inner_thresh <<= bitdepth - 8;
64   *hev_thresh <<= bitdepth - 8;
65 }
66 
67 //------------------------------------------------------------------------------
68 // 4-tap filters
69 
70 // 7.14.6.2.
71 template <typename Pixel>
NeedsFilter4(const Pixel * p,ptrdiff_t step,int outer_thresh,int inner_thresh)72 inline bool NeedsFilter4(const Pixel* p, ptrdiff_t step, int outer_thresh,
73                          int inner_thresh) {
74   const int p1 = p[-2 * step], p0 = p[-step];
75   const int q0 = p[0], q1 = p[step];
76   return std::abs(p1 - p0) <= inner_thresh &&
77          std::abs(q1 - q0) <= inner_thresh &&
78          std::abs(p0 - q0) * 2 + std::abs(p1 - q1) / 2 <= outer_thresh;
79 }
80 
81 // 7.14.6.2.
82 template <typename Pixel>
Hev(const Pixel * p,ptrdiff_t step,int thresh)83 inline bool Hev(const Pixel* p, ptrdiff_t step, int thresh) {
84   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
85   return (std::abs(p1 - p0) > thresh) || (std::abs(q1 - q0) > thresh);
86 }
87 
88 // 7.14.6.3.
89 // 4 pixels in, 2 pixels out.
90 template <int bitdepth, typename Pixel>
Filter2_C(Pixel * p,ptrdiff_t step)91 inline void Filter2_C(Pixel* p, ptrdiff_t step) {
92   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
93   const int min_signed_val =
94       LoopFilterFuncs_C<bitdepth, Pixel>::kMinSignedPixel;
95   const int max_signed_val =
96       LoopFilterFuncs_C<bitdepth, Pixel>::kMaxSignedPixel;
97   // 8bpp: [-893,892], 10bpp: [-3581,3580], 12bpp [-14333,14332]
98   const int a = 3 * (q0 - p0) + Clip3(p1 - q1, min_signed_val, max_signed_val);
99   // 8bpp: [-16,15], 10bpp: [-64,63], 12bpp: [-256,255]
100   const int a1 = Clip3(a + 4, min_signed_val, max_signed_val) >> 3;
101   const int a2 = Clip3(a + 3, min_signed_val, max_signed_val) >> 3;
102   const int max_unsigned_val = LoopFilterFuncs_C<bitdepth, Pixel>::kMaxPixel;
103   p[-step] = Clip3(p0 + a2, 0, max_unsigned_val);
104   p[0] = Clip3(q0 - a1, 0, max_unsigned_val);
105 }
106 
107 // 7.14.6.3.
108 // 4 pixels in, 4 pixels out.
109 template <int bitdepth, typename Pixel>
Filter4_C(Pixel * p,ptrdiff_t step)110 inline void Filter4_C(Pixel* p, ptrdiff_t step) {
111   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
112   const int a = 3 * (q0 - p0);
113   const int min_signed_val =
114       LoopFilterFuncs_C<bitdepth, Pixel>::kMinSignedPixel;
115   const int max_signed_val =
116       LoopFilterFuncs_C<bitdepth, Pixel>::kMaxSignedPixel;
117   const int a1 = Clip3(a + 4, min_signed_val, max_signed_val) >> 3;
118   const int a2 = Clip3(a + 3, min_signed_val, max_signed_val) >> 3;
119   const int a3 = (a1 + 1) >> 1;
120   const int max_unsigned_val = LoopFilterFuncs_C<bitdepth, Pixel>::kMaxPixel;
121   p[-2 * step] = Clip3(p1 + a3, 0, max_unsigned_val);
122   p[-1 * step] = Clip3(p0 + a2, 0, max_unsigned_val);
123   p[0 * step] = Clip3(q0 - a1, 0, max_unsigned_val);
124   p[1 * step] = Clip3(q1 - a3, 0, max_unsigned_val);
125 }
126 
127 template <int bitdepth, typename Pixel>
Vertical4(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)128 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical4(void* dest, ptrdiff_t stride,
129                                                    int outer_thresh,
130                                                    int inner_thresh,
131                                                    int hev_thresh) {
132   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
133   auto* dst = static_cast<Pixel*>(dest);
134   stride /= sizeof(Pixel);
135   for (int i = 0; i < 4; ++i) {
136     if (NeedsFilter4(dst, 1, outer_thresh, inner_thresh)) {
137       if (Hev(dst, 1, hev_thresh)) {
138         Filter2_C<bitdepth>(dst, 1);
139       } else {
140         Filter4_C<bitdepth>(dst, 1);
141       }
142     }
143     dst += stride;
144   }
145 }
146 
147 template <int bitdepth, typename Pixel>
Horizontal4(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)148 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal4(void* dest,
149                                                      ptrdiff_t stride,
150                                                      int outer_thresh,
151                                                      int inner_thresh,
152                                                      int hev_thresh) {
153   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
154   auto* dst = static_cast<Pixel*>(dest);
155   stride /= sizeof(Pixel);
156   for (int i = 0; i < 4; ++i) {
157     if (NeedsFilter4(dst, stride, outer_thresh, inner_thresh)) {
158       if (Hev(dst, stride, hev_thresh)) {
159         Filter2_C<bitdepth>(dst, stride);
160       } else {
161         Filter4_C<bitdepth>(dst, stride);
162       }
163     }
164     ++dst;
165   }
166 }
167 
168 //------------------------------------------------------------------------------
169 // 5-tap (chroma) filters
170 
171 // 7.14.6.2.
172 template <typename Pixel>
NeedsFilter6(const Pixel * p,ptrdiff_t step,int outer_thresh,int inner_thresh)173 inline bool NeedsFilter6(const Pixel* p, ptrdiff_t step, int outer_thresh,
174                          int inner_thresh) {
175   const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
176   const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
177   return std::abs(p2 - p1) <= inner_thresh &&
178          std::abs(p1 - p0) <= inner_thresh &&
179          std::abs(q1 - q0) <= inner_thresh &&
180          std::abs(q2 - q1) <= inner_thresh &&
181          std::abs(p0 - q0) * 2 + std::abs(p1 - q1) / 2 <= outer_thresh;
182 }
183 
184 // 7.14.6.2.
185 template <typename Pixel>
IsFlat3(const Pixel * p,ptrdiff_t step,int flat_thresh)186 inline bool IsFlat3(const Pixel* p, ptrdiff_t step, int flat_thresh) {
187   const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
188   const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
189   return std::abs(p1 - p0) <= flat_thresh && std::abs(q1 - q0) <= flat_thresh &&
190          std::abs(p2 - p0) <= flat_thresh && std::abs(q2 - q0) <= flat_thresh;
191 }
192 
193 template <typename Pixel>
ApplyFilter6(int filter_value)194 inline Pixel ApplyFilter6(int filter_value) {
195   return static_cast<Pixel>(RightShiftWithRounding(filter_value, 3));
196 }
197 
198 // 7.14.6.4.
199 // 6 pixels in, 4 pixels out.
200 template <typename Pixel>
Filter6_C(Pixel * p,ptrdiff_t step)201 inline void Filter6_C(Pixel* p, ptrdiff_t step) {
202   const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
203   const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
204   const int a1 = 2 * p1;
205   const int a0 = 2 * p0;
206   const int b0 = 2 * q0;
207   const int b1 = 2 * q1;
208   // The max is 8 * max_pixel + 4 for the rounder.
209   // 8bpp: 2044 (11 bits), 10bpp: 8188 (13 bits), 12bpp: 32764 (15 bits)
210   p[-2 * step] = ApplyFilter6<Pixel>(3 * p2 + a1 + a0 + q0);
211   p[-1 * step] = ApplyFilter6<Pixel>(p2 + a1 + a0 + b0 + q1);
212   p[0 * step] = ApplyFilter6<Pixel>(p1 + a0 + b0 + b1 + q2);
213   p[1 * step] = ApplyFilter6<Pixel>(p0 + b0 + b1 + 3 * q2);
214 }
215 
216 template <int bitdepth, typename Pixel>
Vertical6(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)217 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical6(void* dest, ptrdiff_t stride,
218                                                    int outer_thresh,
219                                                    int inner_thresh,
220                                                    int hev_thresh) {
221   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
222   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
223   auto* dst = static_cast<Pixel*>(dest);
224   stride /= sizeof(Pixel);
225   for (int i = 0; i < 4; ++i) {
226     if (NeedsFilter6(dst, 1, outer_thresh, inner_thresh)) {
227       if (IsFlat3(dst, 1, flat_thresh)) {
228         Filter6_C(dst, 1);
229       } else if (Hev(dst, 1, hev_thresh)) {
230         Filter2_C<bitdepth>(dst, 1);
231       } else {
232         Filter4_C<bitdepth>(dst, 1);
233       }
234     }
235     dst += stride;
236   }
237 }
238 
239 template <int bitdepth, typename Pixel>
Horizontal6(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)240 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal6(void* dest,
241                                                      ptrdiff_t stride,
242                                                      int outer_thresh,
243                                                      int inner_thresh,
244                                                      int hev_thresh) {
245   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
246   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
247   auto* dst = static_cast<Pixel*>(dest);
248   stride /= sizeof(Pixel);
249   for (int i = 0; i < 4; ++i) {
250     if (NeedsFilter6(dst, stride, outer_thresh, inner_thresh)) {
251       if (IsFlat3(dst, stride, flat_thresh)) {
252         Filter6_C(dst, stride);
253       } else if (Hev(dst, stride, hev_thresh)) {
254         Filter2_C<bitdepth>(dst, stride);
255       } else {
256         Filter4_C<bitdepth>(dst, stride);
257       }
258     }
259     ++dst;
260   }
261 }
262 
263 //------------------------------------------------------------------------------
264 // 7-tap filters
265 
266 // 7.14.6.2.
267 template <typename Pixel>
NeedsFilter8(const Pixel * p,ptrdiff_t step,int outer_thresh,int inner_thresh)268 inline bool NeedsFilter8(const Pixel* p, ptrdiff_t step, int outer_thresh,
269                          int inner_thresh) {
270   const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
271             p0 = p[-step];
272   const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
273   return std::abs(p3 - p2) <= inner_thresh &&
274          std::abs(p2 - p1) <= inner_thresh &&
275          std::abs(p1 - p0) <= inner_thresh &&
276          std::abs(q1 - q0) <= inner_thresh &&
277          std::abs(q2 - q1) <= inner_thresh &&
278          std::abs(q3 - q2) <= inner_thresh &&
279          std::abs(p0 - q0) * 2 + std::abs(p1 - q1) / 2 <= outer_thresh;
280 }
281 
282 // 7.14.6.2.
283 template <typename Pixel>
IsFlat4(const Pixel * p,ptrdiff_t step,int flat_thresh)284 inline bool IsFlat4(const Pixel* p, ptrdiff_t step, int flat_thresh) {
285   const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
286             p0 = p[-step];
287   const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
288   return std::abs(p1 - p0) <= flat_thresh && std::abs(q1 - q0) <= flat_thresh &&
289          std::abs(p2 - p0) <= flat_thresh && std::abs(q2 - q0) <= flat_thresh &&
290          std::abs(p3 - p0) <= flat_thresh && std::abs(q3 - q0) <= flat_thresh;
291 }
292 
293 template <typename Pixel>
ApplyFilter8(int filter_value)294 inline Pixel ApplyFilter8(int filter_value) {
295   return static_cast<Pixel>(RightShiftWithRounding(filter_value, 3));
296 }
297 
298 // 7.14.6.4.
299 // 8 pixels in, 6 pixels out.
300 template <typename Pixel>
Filter8_C(Pixel * p,ptrdiff_t step)301 inline void Filter8_C(Pixel* p, ptrdiff_t step) {
302   const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
303             p0 = p[-step];
304   const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
305   // The max is 8 * max_pixel + 4 for the rounder.
306   // 8bpp: 2044 (11 bits), 10bpp: 8188 (13 bits), 12bpp: 32764 (15 bits)
307   p[-3 * step] = ApplyFilter8<Pixel>(3 * p3 + 2 * p2 + p1 + p0 + q0);
308   p[-2 * step] = ApplyFilter8<Pixel>(2 * p3 + p2 + 2 * p1 + p0 + q0 + q1);
309   p[-1 * step] = ApplyFilter8<Pixel>(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2);
310   p[0 * step] = ApplyFilter8<Pixel>(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3);
311   p[1 * step] = ApplyFilter8<Pixel>(p1 + p0 + q0 + 2 * q1 + q2 + 2 * q3);
312   p[2 * step] = ApplyFilter8<Pixel>(p0 + q0 + q1 + 2 * q2 + 3 * q3);
313 }
314 
315 template <int bitdepth, typename Pixel>
Vertical8(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)316 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical8(void* dest, ptrdiff_t stride,
317                                                    int outer_thresh,
318                                                    int inner_thresh,
319                                                    int hev_thresh) {
320   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
321   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
322   auto* dst = static_cast<Pixel*>(dest);
323   stride /= sizeof(Pixel);
324   for (int i = 0; i < 4; ++i) {
325     if (NeedsFilter8(dst, 1, outer_thresh, inner_thresh)) {
326       if (IsFlat4(dst, 1, flat_thresh)) {
327         Filter8_C(dst, 1);
328       } else if (Hev(dst, 1, hev_thresh)) {
329         Filter2_C<bitdepth>(dst, 1);
330       } else {
331         Filter4_C<bitdepth>(dst, 1);
332       }
333     }
334     dst += stride;
335   }
336 }
337 
338 template <int bitdepth, typename Pixel>
Horizontal8(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)339 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal8(void* dest,
340                                                      ptrdiff_t stride,
341                                                      int outer_thresh,
342                                                      int inner_thresh,
343                                                      int hev_thresh) {
344   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
345   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
346   auto* dst = static_cast<Pixel*>(dest);
347   stride /= sizeof(Pixel);
348   for (int i = 0; i < 4; ++i) {
349     if (NeedsFilter8(dst, stride, outer_thresh, inner_thresh)) {
350       if (IsFlat4(dst, stride, flat_thresh)) {
351         Filter8_C(dst, stride);
352       } else if (Hev(dst, stride, hev_thresh)) {
353         Filter2_C<bitdepth>(dst, stride);
354       } else {
355         Filter4_C<bitdepth>(dst, stride);
356       }
357     }
358     ++dst;
359   }
360 }
361 
362 //------------------------------------------------------------------------------
363 // 13-tap filters
364 
365 // 7.14.6.2.
366 template <typename Pixel>
IsFlatOuter4(const Pixel * p,ptrdiff_t step,int flat_thresh)367 inline bool IsFlatOuter4(const Pixel* p, ptrdiff_t step, int flat_thresh) {
368   const int p6 = p[-7 * step], p5 = p[-6 * step], p4 = p[-5 * step],
369             p0 = p[-step];
370   const int q0 = p[0], q4 = p[4 * step], q5 = p[5 * step], q6 = p[6 * step];
371   return std::abs(p4 - p0) <= flat_thresh && std::abs(q4 - q0) <= flat_thresh &&
372          std::abs(p5 - p0) <= flat_thresh && std::abs(q5 - q0) <= flat_thresh &&
373          std::abs(p6 - p0) <= flat_thresh && std::abs(q6 - q0) <= flat_thresh;
374 }
375 
376 template <typename Pixel>
ApplyFilter14(int filter_value)377 inline Pixel ApplyFilter14(int filter_value) {
378   return static_cast<Pixel>(RightShiftWithRounding(filter_value, 4));
379 }
380 
381 // 7.14.6.4.
382 // 14 pixels in, 12 pixels out.
383 template <typename Pixel>
Filter14_C(Pixel * p,ptrdiff_t step)384 inline void Filter14_C(Pixel* p, ptrdiff_t step) {
385   const int p6 = p[-7 * step], p5 = p[-6 * step], p4 = p[-5 * step],
386             p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
387             p0 = p[-step];
388   const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step],
389             q4 = p[4 * step], q5 = p[5 * step], q6 = p[6 * step];
390   // The max is 16 * max_pixel + 8 for the rounder.
391   // 8bpp: 4088 (12 bits), 10bpp: 16376 (14 bits), 12bpp: 65528 (16 bits)
392   p[-6 * step] =
393       ApplyFilter14<Pixel>(p6 * 7 + p5 * 2 + p4 * 2 + p3 + p2 + p1 + p0 + q0);
394   p[-5 * step] = ApplyFilter14<Pixel>(p6 * 5 + p5 * 2 + p4 * 2 + p3 * 2 + p2 +
395                                       p1 + p0 + q0 + q1);
396   p[-4 * step] = ApplyFilter14<Pixel>(p6 * 4 + p5 + p4 * 2 + p3 * 2 + p2 * 2 +
397                                       p1 + p0 + q0 + q1 + q2);
398   p[-3 * step] = ApplyFilter14<Pixel>(p6 * 3 + p5 + p4 + p3 * 2 + p2 * 2 +
399                                       p1 * 2 + p0 + q0 + q1 + q2 + q3);
400   p[-2 * step] = ApplyFilter14<Pixel>(p6 * 2 + p5 + p4 + p3 + p2 * 2 + p1 * 2 +
401                                       p0 * 2 + q0 + q1 + q2 + q3 + q4);
402   p[-1 * step] = ApplyFilter14<Pixel>(p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
403                                       q0 * 2 + q1 + q2 + q3 + q4 + q5);
404   p[0 * step] = ApplyFilter14<Pixel>(p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
405                                      q1 * 2 + q2 + q3 + q4 + q5 + q6);
406   p[1 * step] = ApplyFilter14<Pixel>(p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
407                                      q2 * 2 + q3 + q4 + q5 + q6 * 2);
408   p[2 * step] = ApplyFilter14<Pixel>(p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 +
409                                      q3 * 2 + q4 + q5 + q6 * 3);
410   p[3 * step] = ApplyFilter14<Pixel>(p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 +
411                                      q4 * 2 + q5 + q6 * 4);
412   p[4 * step] = ApplyFilter14<Pixel>(p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 +
413                                      q5 * 2 + q6 * 5);
414   p[5 * step] =
415       ApplyFilter14<Pixel>(p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 + q6 * 7);
416 }
417 
418 template <int bitdepth, typename Pixel>
Vertical14(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)419 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical14(void* dest,
420                                                     ptrdiff_t stride,
421                                                     int outer_thresh,
422                                                     int inner_thresh,
423                                                     int hev_thresh) {
424   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
425   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
426   auto* dst = static_cast<Pixel*>(dest);
427   stride /= sizeof(Pixel);
428   for (int i = 0; i < 4; ++i) {
429     if (NeedsFilter8(dst, 1, outer_thresh, inner_thresh)) {
430       if (IsFlat4(dst, 1, flat_thresh)) {
431         if (IsFlatOuter4(dst, 1, flat_thresh)) {
432           Filter14_C(dst, 1);
433         } else {
434           Filter8_C(dst, 1);
435         }
436       } else if (Hev(dst, 1, hev_thresh)) {
437         Filter2_C<bitdepth>(dst, 1);
438       } else {
439         Filter4_C<bitdepth>(dst, 1);
440       }
441     }
442     dst += stride;
443   }
444 }
445 
446 template <int bitdepth, typename Pixel>
Horizontal14(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)447 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal14(void* dest,
448                                                       ptrdiff_t stride,
449                                                       int outer_thresh,
450                                                       int inner_thresh,
451                                                       int hev_thresh) {
452   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
453   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
454   auto* dst = static_cast<Pixel*>(dest);
455   stride /= sizeof(Pixel);
456   for (int i = 0; i < 4; ++i) {
457     if (NeedsFilter8(dst, stride, outer_thresh, inner_thresh)) {
458       if (IsFlat4(dst, stride, flat_thresh)) {
459         if (IsFlatOuter4(dst, stride, flat_thresh)) {
460           Filter14_C(dst, stride);
461         } else {
462           Filter8_C(dst, stride);
463         }
464       } else if (Hev(dst, stride, hev_thresh)) {
465         Filter2_C<bitdepth>(dst, stride);
466       } else {
467         Filter4_C<bitdepth>(dst, stride);
468       }
469     }
470     ++dst;
471   }
472 }
473 
474 using Defs8bpp = LoopFilterFuncs_C<8, uint8_t>;
475 
Init8bpp()476 void Init8bpp() {
477   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
478   assert(dsp != nullptr);
479 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
480   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
481       Defs8bpp::Horizontal4;
482   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
483       Defs8bpp::Vertical4;
484 
485   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
486       Defs8bpp::Horizontal6;
487   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
488       Defs8bpp::Vertical6;
489 
490   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
491       Defs8bpp::Horizontal8;
492   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
493       Defs8bpp::Vertical8;
494 
495   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
496       Defs8bpp::Horizontal14;
497   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
498       Defs8bpp::Vertical14;
499 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
500   static_cast<void>(dsp);
501 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize4_LoopFilterTypeHorizontal
502   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
503       Defs8bpp::Horizontal4;
504 #endif
505 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize4_LoopFilterTypeVertical
506   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
507       Defs8bpp::Vertical4;
508 #endif
509 
510 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize6_LoopFilterTypeHorizontal
511   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
512       Defs8bpp::Horizontal6;
513 #endif
514 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize6_LoopFilterTypeVertical
515   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
516       Defs8bpp::Vertical6;
517 #endif
518 
519 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize8_LoopFilterTypeHorizontal
520   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
521       Defs8bpp::Horizontal8;
522 #endif
523 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize8_LoopFilterTypeVertical
524   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
525       Defs8bpp::Vertical8;
526 #endif
527 
528 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize14_LoopFilterTypeHorizontal
529   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
530       Defs8bpp::Horizontal14;
531 #endif
532 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize14_LoopFilterTypeVertical
533   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
534       Defs8bpp::Vertical14;
535 #endif
536 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
537 }
538 
539 #if LIBGAV1_MAX_BITDEPTH >= 10
540 using Defs10bpp = LoopFilterFuncs_C<10, uint16_t>;
541 
Init10bpp()542 void Init10bpp() {
543   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
544   assert(dsp != nullptr);
545 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
546   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
547       Defs10bpp::Horizontal4;
548   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
549       Defs10bpp::Vertical4;
550 
551   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
552       Defs10bpp::Horizontal6;
553   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
554       Defs10bpp::Vertical6;
555 
556   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
557       Defs10bpp::Horizontal8;
558   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
559       Defs10bpp::Vertical8;
560 
561   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
562       Defs10bpp::Horizontal14;
563   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
564       Defs10bpp::Vertical14;
565 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
566   static_cast<void>(dsp);
567 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize4_LoopFilterTypeHorizontal
568   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
569       Defs10bpp::Horizontal4;
570 #endif
571 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize4_LoopFilterTypeVertical
572   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
573       Defs10bpp::Vertical4;
574 #endif
575 
576 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize6_LoopFilterTypeHorizontal
577   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
578       Defs10bpp::Horizontal6;
579 #endif
580 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize6_LoopFilterTypeVertical
581   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
582       Defs10bpp::Vertical6;
583 #endif
584 
585 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize8_LoopFilterTypeHorizontal
586   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
587       Defs10bpp::Horizontal8;
588 #endif
589 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize8_LoopFilterTypeVertical
590   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
591       Defs10bpp::Vertical8;
592 #endif
593 
594 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize14_LoopFilterTypeHorizontal
595   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
596       Defs10bpp::Horizontal14;
597 #endif
598 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize14_LoopFilterTypeVertical
599   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
600       Defs10bpp::Vertical14;
601 #endif
602 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
603 }
604 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
605 
606 }  // namespace
607 
LoopFilterInit_C()608 void LoopFilterInit_C() {
609   Init8bpp();
610 #if LIBGAV1_MAX_BITDEPTH >= 10
611   Init10bpp();
612 #endif
613   // Local functions that may be unused depending on the optimizations
614   // available.
615   static_cast<void>(AdjustThresholds);
616 }
617 
618 }  // namespace dsp
619 }  // namespace libgav1
620