1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/cdef.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22
23 #include "src/dsp/constants.h"
24 #include "src/dsp/dsp.h"
25 #include "src/utils/common.h"
26 #include "src/utils/constants.h"
27
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31
32 #include "src/dsp/cdef.inc"
33
34 // Silence unused function warnings when CdefDirection_C is obviated.
35 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
36 !defined(LIBGAV1_Dsp8bpp_CdefDirection) || \
37 (LIBGAV1_MAX_BITDEPTH >= 10 && !defined(LIBGAV1_Dsp10bpp_CdefDirection))
38 constexpr int16_t kDivisionTable[] = {840, 420, 280, 210, 168, 140, 120, 105};
39
Square(int32_t x)40 int32_t Square(int32_t x) { return x * x; }
41
42 template <int bitdepth, typename Pixel>
CdefDirection_C(const void * const source,ptrdiff_t stride,uint8_t * const direction,int * const variance)43 void CdefDirection_C(const void* const source, ptrdiff_t stride,
44 uint8_t* const direction, int* const variance) {
45 assert(direction != nullptr);
46 assert(variance != nullptr);
47 const auto* src = static_cast<const Pixel*>(source);
48 stride /= sizeof(Pixel);
49 int32_t cost[8] = {};
50 // |partial| does not have to be int32_t for 8bpp. int16_t will suffice. We
51 // use int32_t to keep it simple since |cost| will have to be int32_t.
52 int32_t partial[8][15] = {};
53 for (int i = 0; i < 8; ++i) {
54 for (int j = 0; j < 8; ++j) {
55 const int x = (src[j] >> (bitdepth - 8)) - 128;
56 partial[0][i + j] += x;
57 partial[1][i + j / 2] += x;
58 partial[2][i] += x;
59 partial[3][3 + i - j / 2] += x;
60 partial[4][7 + i - j] += x;
61 partial[5][3 - i / 2 + j] += x;
62 partial[6][j] += x;
63 partial[7][i / 2 + j] += x;
64 }
65 src += stride;
66 }
67 for (int i = 0; i < 8; ++i) {
68 cost[2] += Square(partial[2][i]);
69 cost[6] += Square(partial[6][i]);
70 }
71 cost[2] *= kDivisionTable[7];
72 cost[6] *= kDivisionTable[7];
73 for (int i = 0; i < 7; ++i) {
74 cost[0] += (Square(partial[0][i]) + Square(partial[0][14 - i])) *
75 kDivisionTable[i];
76 cost[4] += (Square(partial[4][i]) + Square(partial[4][14 - i])) *
77 kDivisionTable[i];
78 }
79 cost[0] += Square(partial[0][7]) * kDivisionTable[7];
80 cost[4] += Square(partial[4][7]) * kDivisionTable[7];
81 for (int i = 1; i < 8; i += 2) {
82 for (int j = 0; j < 5; ++j) {
83 cost[i] += Square(partial[i][3 + j]);
84 }
85 cost[i] *= kDivisionTable[7];
86 for (int j = 0; j < 3; ++j) {
87 cost[i] += (Square(partial[i][j]) + Square(partial[i][10 - j])) *
88 kDivisionTable[2 * j + 1];
89 }
90 }
91 int32_t best_cost = 0;
92 *direction = 0;
93 for (int i = 0; i < 8; ++i) {
94 if (cost[i] > best_cost) {
95 best_cost = cost[i];
96 *direction = i;
97 }
98 }
99 *variance = (best_cost - cost[(*direction + 4) & 7]) >> 10;
100 }
101 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
102 // !defined(LIBGAV1_Dsp8bpp_CdefDirection) ||
103 // (LIBGAV1_MAX_BITDEPTH >= 10 &&
104 // !defined(LIBGAV1_Dsp10bpp_CdefDirection))
105
106 // Silence unused function warnings when CdefFilter_C is obviated.
107 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
108 !defined(LIBGAV1_Dsp8bpp_CdefFilters) || \
109 (LIBGAV1_MAX_BITDEPTH >= 10 && !defined(LIBGAV1_Dsp10bpp_CdefFilters))
110
Constrain(int diff,int threshold,int damping)111 int Constrain(int diff, int threshold, int damping) {
112 assert(threshold != 0);
113 damping = std::max(0, damping - FloorLog2(threshold));
114 const int sign = (diff < 0) ? -1 : 1;
115 return sign *
116 Clip3(threshold - (std::abs(diff) >> damping), 0, std::abs(diff));
117 }
118
119 // Filters the source block. It doesn't check whether the candidate pixel is
120 // inside the frame. However it requires the source input to be padded with a
121 // constant large value (kCdefLargeValue) if at the boundary.
122 template <int block_width, int bitdepth, typename Pixel,
123 bool enable_primary = true, bool enable_secondary = true>
CdefFilter_C(const uint16_t * src,const ptrdiff_t src_stride,const int block_height,const int primary_strength,const int secondary_strength,const int damping,const int direction,void * const dest,const ptrdiff_t dest_stride)124 void CdefFilter_C(const uint16_t* src, const ptrdiff_t src_stride,
125 const int block_height, const int primary_strength,
126 const int secondary_strength, const int damping,
127 const int direction, void* const dest,
128 const ptrdiff_t dest_stride) {
129 static_assert(block_width == 4 || block_width == 8, "Invalid CDEF width.");
130 static_assert(enable_primary || enable_secondary, "");
131 assert(block_height == 4 || block_height == 8);
132 assert(direction >= 0 && direction <= 7);
133 constexpr int coeff_shift = bitdepth - 8;
134 // Section 5.9.19. CDEF params syntax.
135 assert(primary_strength >= 0 && primary_strength <= 15 << coeff_shift);
136 assert(secondary_strength >= 0 && secondary_strength <= 4 << coeff_shift &&
137 secondary_strength != 3 << coeff_shift);
138 assert(primary_strength != 0 || secondary_strength != 0);
139 // damping is decreased by 1 for chroma.
140 assert((damping >= 3 && damping <= 6 + coeff_shift) ||
141 (damping >= 2 && damping <= 5 + coeff_shift));
142 // When only primary_strength or secondary_strength are non-zero the number
143 // of pixels inspected (4 for primary_strength, 8 for secondary_strength) and
144 // the taps used don't exceed the amount the sum is
145 // descaled by (16) so we can skip tracking and clipping to the minimum and
146 // maximum value observed.
147 constexpr bool clipping_required = enable_primary && enable_secondary;
148 static constexpr int kCdefSecondaryTaps[2] = {kCdefSecondaryTap0,
149 kCdefSecondaryTap1};
150 auto* dst = static_cast<Pixel*>(dest);
151 const ptrdiff_t dst_stride = dest_stride / sizeof(Pixel);
152 int y = block_height;
153 do {
154 int x = 0;
155 do {
156 int16_t sum = 0;
157 const uint16_t pixel_value = src[x];
158 uint16_t max_value = pixel_value;
159 uint16_t min_value = pixel_value;
160 for (int k = 0; k < 2; ++k) {
161 static constexpr int signs[] = {-1, 1};
162 for (const int& sign : signs) {
163 if (enable_primary) {
164 const int dy = sign * kCdefDirections[direction][k][0];
165 const int dx = sign * kCdefDirections[direction][k][1];
166 const uint16_t value = src[dy * src_stride + dx + x];
167 // Note: the summation can ignore the condition check in SIMD
168 // implementation, because Constrain() will return 0 when
169 // value == kCdefLargeValue.
170 if (value != kCdefLargeValue) {
171 sum += Constrain(value - pixel_value, primary_strength, damping) *
172 kCdefPrimaryTaps[(primary_strength >> coeff_shift) & 1][k];
173 if (clipping_required) {
174 max_value = std::max(value, max_value);
175 min_value = std::min(value, min_value);
176 }
177 }
178 }
179
180 if (enable_secondary) {
181 static constexpr int offsets[] = {-2, 2};
182 for (const int& offset : offsets) {
183 const int dy = sign * kCdefDirections[direction + offset][k][0];
184 const int dx = sign * kCdefDirections[direction + offset][k][1];
185 const uint16_t value = src[dy * src_stride + dx + x];
186 // Note: the summation can ignore the condition check in SIMD
187 // implementation.
188 if (value != kCdefLargeValue) {
189 sum += Constrain(value - pixel_value, secondary_strength,
190 damping) *
191 kCdefSecondaryTaps[k];
192 if (clipping_required) {
193 max_value = std::max(value, max_value);
194 min_value = std::min(value, min_value);
195 }
196 }
197 }
198 }
199 }
200 }
201
202 const int offset = (8 + sum - (sum < 0)) >> 4;
203 if (clipping_required) {
204 dst[x] = static_cast<Pixel>(
205 Clip3(pixel_value + offset, min_value, max_value));
206 } else {
207 dst[x] = static_cast<Pixel>(pixel_value + offset);
208 }
209 } while (++x < block_width);
210
211 src += src_stride;
212 dst += dst_stride;
213 } while (--y != 0);
214 }
215 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS ||
216 // !defined(LIBGAV1_Dsp8bpp_CdefFilters) ||
217 // (LIBGAV1_MAX_BITDEPTH >= 10 &&
218 // !defined(LIBGAV1_Dsp10bpp_CdefFilters))
219
Init8bpp()220 void Init8bpp() {
221 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
222 assert(dsp != nullptr);
223 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
224 dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
225 dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
226 dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
227 /*enable_secondary=*/false>;
228 dsp->cdef_filters[0][2] =
229 CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
230 dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
231 dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
232 /*enable_secondary=*/false>;
233 dsp->cdef_filters[1][2] =
234 CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
235 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
236 static_cast<void>(dsp);
237 #ifndef LIBGAV1_Dsp8bpp_CdefDirection
238 dsp->cdef_direction = CdefDirection_C<8, uint8_t>;
239 #endif
240 #ifndef LIBGAV1_Dsp8bpp_CdefFilters
241 dsp->cdef_filters[0][0] = CdefFilter_C<4, 8, uint8_t>;
242 dsp->cdef_filters[0][1] = CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/true,
243 /*enable_secondary=*/false>;
244 dsp->cdef_filters[0][2] =
245 CdefFilter_C<4, 8, uint8_t, /*enable_primary=*/false>;
246 dsp->cdef_filters[1][0] = CdefFilter_C<8, 8, uint8_t>;
247 dsp->cdef_filters[1][1] = CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/true,
248 /*enable_secondary=*/false>;
249 dsp->cdef_filters[1][2] =
250 CdefFilter_C<8, 8, uint8_t, /*enable_primary=*/false>;
251 #endif
252 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
253 }
254
255 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()256 void Init10bpp() {
257 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
258 assert(dsp != nullptr);
259 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
260 dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
261 dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
262 dsp->cdef_filters[0][1] =
263 CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
264 /*enable_secondary=*/false>;
265 dsp->cdef_filters[0][2] =
266 CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
267 dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
268 dsp->cdef_filters[1][1] =
269 CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
270 /*enable_secondary=*/false>;
271 dsp->cdef_filters[1][2] =
272 CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
273 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
274 static_cast<void>(dsp);
275 #ifndef LIBGAV1_Dsp10bpp_CdefDirection
276 dsp->cdef_direction = CdefDirection_C<10, uint16_t>;
277 #endif
278 #ifndef LIBGAV1_Dsp10bpp_CdefFilters
279 dsp->cdef_filters[0][0] = CdefFilter_C<4, 10, uint16_t>;
280 dsp->cdef_filters[0][1] =
281 CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/true,
282 /*enable_secondary=*/false>;
283 dsp->cdef_filters[0][2] =
284 CdefFilter_C<4, 10, uint16_t, /*enable_primary=*/false>;
285 dsp->cdef_filters[1][0] = CdefFilter_C<8, 10, uint16_t>;
286 dsp->cdef_filters[1][1] =
287 CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/true,
288 /*enable_secondary=*/false>;
289 dsp->cdef_filters[1][2] =
290 CdefFilter_C<8, 10, uint16_t, /*enable_primary=*/false>;
291 #endif
292 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
293 }
294 #endif
295
296 } // namespace
297
CdefInit_C()298 void CdefInit_C() {
299 Init8bpp();
300 #if LIBGAV1_MAX_BITDEPTH >= 10
301 Init10bpp();
302 #endif
303 }
304
305 } // namespace dsp
306 } // namespace libgav1
307