• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/cdef.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstdlib>
22 #include <cstring>
23 
24 #include "src/dsp/constants.h"
25 #include "src/dsp/dsp.h"
26 #include "src/utils/common.h"
27 #include "src/utils/constants.h"
28 
29 namespace libgav1 {
30 namespace dsp {
31 namespace {
32 
33 #include "src/dsp/cdef.inc"
34 
35 // Silence unused function warnings when CdefDirection_C is obviated.
36 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||           \
37     !defined(LIBGAV1_Dsp8bpp_CdefDirection) ||    \
38     (LIBGAV1_MAX_BITDEPTH >= 10 &&                \
39      !defined(LIBGAV1_Dsp10bpp_CdefDirection)) || \
40     (LIBGAV1_MAX_BITDEPTH == 12 && !defined(LIBGAV1_Dsp12bpp_CdefDirection))
41 constexpr int16_t kDivisionTable[] = {840, 420, 280, 210, 168, 140, 120, 105};
42 
Square(int32_t x)43 int32_t Square(int32_t x) { return x * x; }
44 
45 template <int bitdepth, typename Pixel>
CdefDirection_C(const void * LIBGAV1_RESTRICT const source,ptrdiff_t stride,uint8_t * LIBGAV1_RESTRICT const direction,int * LIBGAV1_RESTRICT const variance)46 void CdefDirection_C(const void* LIBGAV1_RESTRICT const source,
47                      ptrdiff_t stride,
48                      uint8_t* LIBGAV1_RESTRICT const direction,
49                      int* LIBGAV1_RESTRICT const variance) {
50   assert(direction != nullptr);
51   assert(variance != nullptr);
52   const auto* src = static_cast<const Pixel*>(source);
53   stride /= sizeof(Pixel);
54   int32_t cost[8] = {};
55   // |partial| does not have to be int32_t for 8bpp. int16_t will suffice. We
56   // use int32_t to keep it simple since |cost| will have to be int32_t.
57   int32_t partial[8][15] = {};
58   for (int i = 0; i < 8; ++i) {
59     for (int j = 0; j < 8; ++j) {
60       const int x = (src[j] >> (bitdepth - 8)) - 128;
61       partial[0][i + j] += x;
62       partial[1][i + j / 2] += x;
63       partial[2][i] += x;
64       partial[3][3 + i - j / 2] += x;
65       partial[4][7 + i - j] += x;
66       partial[5][3 - i / 2 + j] += x;
67       partial[6][j] += x;
68       partial[7][i / 2 + j] += x;
69     }
70     src += stride;
71   }
72   for (int i = 0; i < 8; ++i) {
73     cost[2] += Square(partial[2][i]);
74     cost[6] += Square(partial[6][i]);
75   }
76   cost[2] *= kDivisionTable[7];
77   cost[6] *= kDivisionTable[7];
78   for (int i = 0; i < 7; ++i) {
79     cost[0] += (Square(partial[0][i]) + Square(partial[0][14 - i])) *
80                kDivisionTable[i];
81     cost[4] += (Square(partial[4][i]) + Square(partial[4][14 - i])) *
82                kDivisionTable[i];
83   }
84   cost[0] += Square(partial[0][7]) * kDivisionTable[7];
85   cost[4] += Square(partial[4][7]) * kDivisionTable[7];
86   for (int i = 1; i < 8; i += 2) {
87     for (int j = 0; j < 5; ++j) {
88       cost[i] += Square(partial[i][3 + j]);
89     }
90     cost[i] *= kDivisionTable[7];
91     for (int j = 0; j < 3; ++j) {
92       cost[i] += (Square(partial[i][j]) + Square(partial[i][10 - j])) *
93                  kDivisionTable[2 * j + 1];
94     }
95   }
96   int32_t best_cost = 0;
97   *direction = 0;
98   for (int i = 0; i < 8; ++i) {
99     if (cost[i] > best_cost) {
100       best_cost = cost[i];
101       *direction = i;
102     }
103   }
104   *variance = (best_cost - cost[(*direction + 4) & 7]) >> 10;
105 }
106 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
107         // !defined(LIBGAV1_Dsp8bpp_CdefDirection) ||
108         // (LIBGAV1_MAX_BITDEPTH >= 10 &&
109         //  !defined(LIBGAV1_Dsp10bpp_CdefDirection))
110         // (LIBGAV1_MAX_BITDEPTH == 12 &&
111         //  !defined(LIBGAV1_Dsp12bpp_CdefDirection))
112 
113 // Silence unused function warnings when CdefFilter_C is obviated.
114 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||                                       \
115     !defined(LIBGAV1_Dsp8bpp_CdefFilters) ||                                  \
116     (LIBGAV1_MAX_BITDEPTH >= 10 && !defined(LIBGAV1_Dsp10bpp_CdefFilters)) || \
117     (LIBGAV1_MAX_BITDEPTH == 12 && !defined(LIBGAV1_Dsp12bpp_CdefFilters))
118 
Constrain(int diff,int threshold,int damping)119 int Constrain(int diff, int threshold, int damping) {
120   assert(threshold != 0);
121   damping = std::max(0, damping - FloorLog2(threshold));
122   const int sign = (diff < 0) ? -1 : 1;
123   return sign *
124          Clip3(threshold - (std::abs(diff) >> damping), 0, std::abs(diff));
125 }
126 
127 // Filters the source block. It doesn't check whether the candidate pixel is
128 // inside the frame. However it requires the source input to be padded with a
129 // constant large value (kCdefLargeValue) if at the boundary.
130 template <int block_width, int bitdepth, typename Pixel,
131           bool enable_primary = true, bool enable_secondary = true>
CdefFilter_C(const uint16_t * LIBGAV1_RESTRICT src,const ptrdiff_t src_stride,const int block_height,const int primary_strength,const int secondary_strength,const int damping,const int direction,void * LIBGAV1_RESTRICT const dest,const ptrdiff_t dest_stride)132 void CdefFilter_C(const uint16_t* LIBGAV1_RESTRICT src,
133                   const ptrdiff_t src_stride, const int block_height,
134                   const int primary_strength, const int secondary_strength,
135                   const int damping, const int direction,
136                   void* LIBGAV1_RESTRICT const dest,
137                   const ptrdiff_t dest_stride) {
138   static_assert(block_width == 4 || block_width == 8, "Invalid CDEF width.");
139   static_assert(enable_primary || enable_secondary, "");
140   assert(block_height == 4 || block_height == 8);
141   assert(direction >= 0 && direction <= 7);
142   constexpr int coeff_shift = bitdepth - 8;
143   // Section 5.9.19. CDEF params syntax.
144   assert(primary_strength >= 0 && primary_strength <= 15 << coeff_shift);
145   assert(secondary_strength >= 0 && secondary_strength <= 4 << coeff_shift &&
146          secondary_strength != 3 << coeff_shift);
147   assert(primary_strength != 0 || secondary_strength != 0);
148   // damping is decreased by 1 for chroma.
149   assert((damping >= 3 && damping <= 6 + coeff_shift) ||
150          (damping >= 2 && damping <= 5 + coeff_shift));
151   // When only primary_strength or secondary_strength are non-zero the number
152   // of pixels inspected (4 for primary_strength, 8 for secondary_strength) and
153   // the taps used don't exceed the amount the sum is
154   // descaled by (16) so we can skip tracking and clipping to the minimum and
155   // maximum value observed.
156   constexpr bool clipping_required = enable_primary && enable_secondary;
157   static constexpr int kCdefSecondaryTaps[2] = {kCdefSecondaryTap0,
158                                                 kCdefSecondaryTap1};
159   auto* dst = static_cast<Pixel*>(dest);
160   const ptrdiff_t dst_stride = dest_stride / sizeof(Pixel);
161   int y = block_height;
162   do {
163     int x = 0;
164     do {
165       int16_t sum = 0;
166       const uint16_t pixel_value = src[x];
167       uint16_t max_value = pixel_value;
168       uint16_t min_value = pixel_value;
169       for (int k = 0; k < 2; ++k) {
170         static constexpr int signs[] = {-1, 1};
171         for (const int& sign : signs) {
172           if (enable_primary) {
173             const int dy = sign * kCdefDirections[direction][k][0];
174             const int dx = sign * kCdefDirections[direction][k][1];
175             const uint16_t value = src[dy * src_stride + dx + x];
176             // Note: the summation can ignore the condition check in SIMD
177             // implementation, because Constrain() will return 0 when
178             // value == kCdefLargeValue.
179             if (value != kCdefLargeValue) {
180               sum += Constrain(value - pixel_value, primary_strength, damping) *
181                      kCdefPrimaryTaps[(primary_strength >> coeff_shift) & 1][k];
182               if (clipping_required) {
183                 max_value = std::max(value, max_value);
184                 min_value = std::min(value, min_value);
185               }
186             }
187           }
188 
189           if (enable_secondary) {
190             static constexpr int offsets[] = {-2, 2};
191             for (const int& offset : offsets) {
192               const int dy = sign * kCdefDirections[direction + offset][k][0];
193               const int dx = sign * kCdefDirections[direction + offset][k][1];
194               const uint16_t value = src[dy * src_stride + dx + x];
195               // Note: the summation can ignore the condition check in SIMD
196               // implementation.
197               if (value != kCdefLargeValue) {
198                 sum += Constrain(value - pixel_value, secondary_strength,
199                                  damping) *
200                        kCdefSecondaryTaps[k];
201                 if (clipping_required) {
202                   max_value = std::max(value, max_value);
203                   min_value = std::min(value, min_value);
204                 }
205               }
206             }
207           }
208         }
209       }
210 
211       const int offset = (8 + sum - (sum < 0)) >> 4;
212       if (clipping_required) {
213         dst[x] = static_cast<Pixel>(
214             Clip3(pixel_value + offset, min_value, max_value));
215       } else {
216         dst[x] = static_cast<Pixel>(pixel_value + offset);
217       }
218     } while (++x < block_width);
219 
220     src += src_stride;
221     dst += dst_stride;
222   } while (--y != 0);
223 }
224 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
225         // !defined(LIBGAV1_Dsp8bpp_CdefFilters) ||
226         // (LIBGAV1_MAX_BITDEPTH >= 10 &&
227         //  !defined(LIBGAV1_Dsp10bpp_CdefFilters))
228         // (LIBGAV1_MAX_BITDEPTH == 12 &&
229         //  !defined(LIBGAV1_Dsp12bpp_CdefFilters))
230 
Init8bpp()231 void Init8bpp() {
232   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
233   assert(dsp != nullptr);
234 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
235   dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
236   dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
237   dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
238                                          /*enable_secondary=*/false>;
239   dsp->cdef_filters[0][2] =
240       CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
241   dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
242   dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
243                                          /*enable_secondary=*/false>;
244   dsp->cdef_filters[1][2] =
245       CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
246 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
247   static_cast<void>(dsp);
248 #ifndef LIBGAV1_Dsp8bpp_CdefDirection
249   dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
250 #endif
251 #ifndef LIBGAV1_Dsp8bpp_CdefFilters
252   dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
253   dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
254                                          /*enable_secondary=*/false>;
255   dsp->cdef_filters[0][2] =
256       CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
257   dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
258   dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
259                                          /*enable_secondary=*/false>;
260   dsp->cdef_filters[1][2] =
261       CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
262 #endif
263 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
264 }
265 
266 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()267 void Init10bpp() {
268   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
269   assert(dsp != nullptr);
270 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
271   dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
272   dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
273   dsp->cdef_filters[0][1] =
274       CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
275                    /*enable_secondary=*/false>;
276   dsp->cdef_filters[0][2] =
277       CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
278   dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
279   dsp->cdef_filters[1][1] =
280       CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
281                    /*enable_secondary=*/false>;
282   dsp->cdef_filters[1][2] =
283       CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
284 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
285   static_cast<void>(dsp);
286 #ifndef LIBGAV1_Dsp10bpp_CdefDirection
287   dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
288 #endif
289 #ifndef LIBGAV1_Dsp10bpp_CdefFilters
290   dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
291   dsp->cdef_filters[0][1] =
292       CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
293                    /*enable_secondary=*/false>;
294   dsp->cdef_filters[0][2] =
295       CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
296   dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
297   dsp->cdef_filters[1][1] =
298       CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
299                    /*enable_secondary=*/false>;
300   dsp->cdef_filters[1][2] =
301       CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
302 #endif
303 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
304 }
305 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
306 
307 #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()308 void Init12bpp() {
309   Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
310   assert(dsp != nullptr);
311 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
312   dsp->cdef_direction = CdefDirection_C<12, uint16_t>;
313   dsp->cdef_filters[0][0] = CdefFilter_C<4, 12, uint16_t>;
314   dsp->cdef_filters[0][1] =
315       CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/true,
316                    /*enable_secondary=*/false>;
317   dsp->cdef_filters[0][2] =
318       CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/false>;
319   dsp->cdef_filters[1][0] = CdefFilter_C<8, 12, uint16_t>;
320   dsp->cdef_filters[1][1] =
321       CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/true,
322                    /*enable_secondary=*/false>;
323   dsp->cdef_filters[1][2] =
324       CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/false>;
325 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
326   static_cast<void>(dsp);
327 #ifndef LIBGAV1_Dsp12bpp_CdefDirection
328   dsp->cdef_direction = CdefDirection_C<12, uint16_t>;
329 #endif
330 #ifndef LIBGAV1_Dsp12bpp_CdefFilters
331   dsp->cdef_filters[0][0] = CdefFilter_C<4, 12, uint16_t>;
332   dsp->cdef_filters[0][1] =
333       CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/true,
334                    /*enable_secondary=*/false>;
335   dsp->cdef_filters[0][2] =
336       CdefFilter_C<4, 12, uint16_t, /*enable_primary=*/false>;
337   dsp->cdef_filters[1][0] = CdefFilter_C<8, 12, uint16_t>;
338   dsp->cdef_filters[1][1] =
339       CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/true,
340                    /*enable_secondary=*/false>;
341   dsp->cdef_filters[1][2] =
342       CdefFilter_C<8, 12, uint16_t, /*enable_primary=*/false>;
343 #endif
344 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
345 }
346 #endif  // LIBGAV1_MAX_BITDEPTH == 12
347 
348 }  // namespace
349 
CdefInit_C()350 void CdefInit_C() {
351   Init8bpp();
352 #if LIBGAV1_MAX_BITDEPTH >= 10
353   Init10bpp();
354 #endif
355 #if LIBGAV1_MAX_BITDEPTH == 12
356   Init12bpp();
357 #endif
358 }
359 
360 }  // namespace dsp
361 }  // namespace libgav1
362