1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/inverse_transform.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cstdint>
20 #include <cstring>
21
22 #include "src/dsp/dsp.h"
23 #include "src/utils/array_2d.h"
24 #include "src/utils/common.h"
25 #include "src/utils/compiler_attributes.h"
26 #include "src/utils/logging.h"
27
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31
32 // Include the constants and utility functions inside the anonymous namespace.
33 #include "src/dsp/inverse_transform.inc"
34
35 constexpr uint8_t kTransformColumnShift = 4;
36
37 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
38 #undef LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
39 #endif
40
RangeCheckValue(int32_t value,int8_t range)41 int32_t RangeCheckValue(int32_t value, int8_t range) {
42 #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
43 LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
44 assert(range <= 32);
45 const auto min = static_cast<int32_t>(-(uint32_t{1} << (range - 1)));
46 const auto max = static_cast<int32_t>((uint32_t{1} << (range - 1)) - 1);
47 if (min > value || value > max) {
48 LIBGAV1_DLOG(ERROR, "coeff out of bit range, value: %d bit range %d\n",
49 value, range);
50 assert(min <= value && value <= max);
51 }
52 #endif // LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
53 static_cast<void>(range);
54 return value;
55 }
56
57 template <typename Residual>
ButterflyRotation_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)58 LIBGAV1_ALWAYS_INLINE void ButterflyRotation_C(Residual* const dst, int a,
59 int b, int angle, bool flip,
60 int8_t range) {
61 // Note that we multiply in 32 bits and then add/subtract the products in 64
62 // bits. The 32-bit multiplications do not overflow. Please see the comment
63 // and assert() in Cos128().
64 const int64_t x = static_cast<int64_t>(dst[a] * Cos128(angle)) -
65 static_cast<int64_t>(dst[b] * Sin128(angle));
66 const int64_t y = static_cast<int64_t>(dst[a] * Sin128(angle)) +
67 static_cast<int64_t>(dst[b] * Cos128(angle));
68 // Section 7.13.2.1: It is a requirement of bitstream conformance that the
69 // values saved into the array T by this function are representable by a
70 // signed integer using |range| bits of precision.
71 dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
72 dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
73 }
74
75 template <typename Residual>
ButterflyRotationFirstIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)76 void ButterflyRotationFirstIsZero_C(Residual* const dst, int a, int b,
77 int angle, bool flip, int8_t range) {
78 // Note that we multiply in 32 bits and then add/subtract the products in 64
79 // bits. The 32-bit multiplications do not overflow. Please see the comment
80 // and assert() in Cos128().
81 const auto x = static_cast<int64_t>(dst[b] * -Sin128(angle));
82 const auto y = static_cast<int64_t>(dst[b] * Cos128(angle));
83 // Section 7.13.2.1: It is a requirement of bitstream conformance that the
84 // values saved into the array T by this function are representable by a
85 // signed integer using |range| bits of precision.
86 dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
87 dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
88 }
89
90 template <typename Residual>
ButterflyRotationSecondIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)91 void ButterflyRotationSecondIsZero_C(Residual* const dst, int a, int b,
92 int angle, bool flip, int8_t range) {
93 // Note that we multiply in 32 bits and then add/subtract the products in 64
94 // bits. The 32-bit multiplications do not overflow. Please see the comment
95 // and assert() in Cos128().
96 const auto x = static_cast<int64_t>(dst[a] * Cos128(angle));
97 const auto y = static_cast<int64_t>(dst[a] * Sin128(angle));
98
99 // Section 7.13.2.1: It is a requirement of bitstream conformance that the
100 // values saved into the array T by this function are representable by a
101 // signed integer using |range| bits of precision.
102 dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
103 dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
104 }
105
106 template <typename Residual>
HadamardRotation_C(Residual * const dst,int a,int b,bool flip,int8_t range)107 void HadamardRotation_C(Residual* const dst, int a, int b, bool flip,
108 int8_t range) {
109 if (flip) std::swap(a, b);
110 --range;
111 // For Adst and Dct, the maximum possible value for range is 20. So min and
112 // max should always fit into int32_t.
113 const int32_t min = -(1 << range);
114 const int32_t max = (1 << range) - 1;
115 const int32_t x = dst[a] + dst[b];
116 const int32_t y = dst[a] - dst[b];
117 dst[a] = Clip3(x, min, max);
118 dst[b] = Clip3(y, min, max);
119 }
120
121 template <int bitdepth, typename Residual>
ClampIntermediate(Residual * const dst,int size)122 void ClampIntermediate(Residual* const dst, int size) {
123 // If Residual is int16_t (which implies bitdepth is 8), we don't need to
124 // clip residual[i][j] to 16 bits.
125 if (sizeof(Residual) > 2) {
126 const Residual intermediate_clamp_max =
127 (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
128 const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
129 for (int j = 0; j < size; ++j) {
130 dst[j] = Clip3(dst[j], intermediate_clamp_min, intermediate_clamp_max);
131 }
132 }
133 }
134
135 //------------------------------------------------------------------------------
136 // Discrete Cosine Transforms (DCT).
137
138 // Value for index (i, j) is computed as bitreverse(j) and interpreting that as
139 // an integer with bit-length i + 2.
140 // For e.g. index (2, 3) will be computed as follows:
141 // * bitreverse(3) = bitreverse(..000011) = 110000...
142 // * interpreting that as an integer with bit-length 2+2 = 4 will be 1100 = 12
143 constexpr uint8_t kBitReverseLookup[kNumTransform1dSizes][64] = {
144 {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,
145 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3,
146 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3},
147 {0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5,
148 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6,
149 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7},
150 {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
151 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
152 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
153 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15},
154 {0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
155 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
156 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
157 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31},
158 {0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
159 2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
160 1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
161 3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63}};
162
163 template <typename Residual, int size_log2>
Dct_C(void * dest,int8_t range)164 void Dct_C(void* dest, int8_t range) {
165 static_assert(size_log2 >= 2 && size_log2 <= 6, "");
166 auto* const dst = static_cast<Residual*>(dest);
167 // stage 1.
168 const int size = 1 << size_log2;
169 Residual temp[size];
170 memcpy(temp, dst, sizeof(temp));
171 for (int i = 0; i < size; ++i) {
172 dst[i] = temp[kBitReverseLookup[size_log2 - 2][i]];
173 }
174 // stages 2-32 are dependent on the value of size_log2.
175 // stage 2.
176 if (size_log2 == 6) {
177 for (int i = 0; i < 16; ++i) {
178 ButterflyRotation_C(dst, i + 32, 63 - i,
179 63 - MultiplyBy4(kBitReverseLookup[2][i]), false,
180 range);
181 }
182 }
183 // stage 3
184 if (size_log2 >= 5) {
185 for (int i = 0; i < 8; ++i) {
186 ButterflyRotation_C(dst, i + 16, 31 - i,
187 6 + MultiplyBy8(kBitReverseLookup[1][7 - i]), false,
188 range);
189 }
190 }
191 // stage 4.
192 if (size_log2 == 6) {
193 for (int i = 0; i < 16; ++i) {
194 HadamardRotation_C(dst, MultiplyBy2(i) + 32, MultiplyBy2(i) + 33,
195 static_cast<bool>(i & 1), range);
196 }
197 }
198 // stage 5.
199 if (size_log2 >= 4) {
200 for (int i = 0; i < 4; ++i) {
201 ButterflyRotation_C(dst, i + 8, 15 - i,
202 12 + MultiplyBy16(kBitReverseLookup[0][3 - i]), false,
203 range);
204 }
205 }
206 // stage 6.
207 if (size_log2 >= 5) {
208 for (int i = 0; i < 8; ++i) {
209 HadamardRotation_C(dst, MultiplyBy2(i) + 16, MultiplyBy2(i) + 17,
210 static_cast<bool>(i & 1), range);
211 }
212 }
213 // stage 7.
214 if (size_log2 == 6) {
215 for (int i = 0; i < 4; ++i) {
216 for (int j = 0; j < 2; ++j) {
217 ButterflyRotation_C(
218 dst, 62 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 33,
219 60 - MultiplyBy16(kBitReverseLookup[0][i]) + MultiplyBy64(j), true,
220 range);
221 }
222 }
223 }
224 // stage 8.
225 if (size_log2 >= 3) {
226 for (int i = 0; i < 2; ++i) {
227 ButterflyRotation_C(dst, i + 4, 7 - i, 56 - 32 * i, false, range);
228 }
229 }
230 // stage 9.
231 if (size_log2 >= 4) {
232 for (int i = 0; i < 4; ++i) {
233 HadamardRotation_C(dst, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
234 static_cast<bool>(i & 1), range);
235 }
236 }
237 // stage 10.
238 if (size_log2 >= 5) {
239 for (int i = 0; i < 2; ++i) {
240 for (int j = 0; j < 2; ++j) {
241 ButterflyRotation_C(
242 dst, 30 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 17,
243 24 + MultiplyBy64(j) + MultiplyBy32(1 - i), true, range);
244 }
245 }
246 }
247 // stage 11.
248 if (size_log2 == 6) {
249 for (int i = 0; i < 8; ++i) {
250 for (int j = 0; j < 2; ++j) {
251 HadamardRotation_C(dst, MultiplyBy4(i) + j + 32,
252 MultiplyBy4(i) - j + 35, static_cast<bool>(i & 1),
253 range);
254 }
255 }
256 }
257 // stage 12.
258 for (int i = 0; i < 2; ++i) {
259 ButterflyRotation_C(dst, MultiplyBy2(i), MultiplyBy2(i) + 1, 32 + 16 * i,
260 i == 0, range);
261 }
262 // stage 13.
263 if (size_log2 >= 3) {
264 for (int i = 0; i < 2; ++i) {
265 HadamardRotation_C(dst, MultiplyBy2(i) + 4, MultiplyBy2(i) + 5,
266 /*flip=*/i != 0, range);
267 }
268 }
269 // stage 14.
270 if (size_log2 >= 4) {
271 for (int i = 0; i < 2; ++i) {
272 ButterflyRotation_C(dst, 14 - i, i + 9, 48 + 64 * i, true, range);
273 }
274 }
275 // stage 15.
276 if (size_log2 >= 5) {
277 for (int i = 0; i < 4; ++i) {
278 for (int j = 0; j < 2; ++j) {
279 HadamardRotation_C(dst, MultiplyBy4(i) + j + 16,
280 MultiplyBy4(i) - j + 19, static_cast<bool>(i & 1),
281 range);
282 }
283 }
284 }
285 // stage 16.
286 if (size_log2 == 6) {
287 for (int i = 0; i < 2; ++i) {
288 for (int j = 0; j < 4; ++j) {
289 ButterflyRotation_C(
290 dst, 61 - MultiplyBy8(i) - j, MultiplyBy8(i) + j + 34,
291 56 - MultiplyBy32(i) + MultiplyBy64(DivideBy2(j)), true, range);
292 }
293 }
294 }
295 // stage 17.
296 for (int i = 0; i < 2; ++i) {
297 HadamardRotation_C(dst, i, 3 - i, false, range);
298 }
299 // stage 18.
300 if (size_log2 >= 3) {
301 ButterflyRotation_C(dst, 6, 5, 32, true, range);
302 }
303 // stage 19.
304 if (size_log2 >= 4) {
305 for (int i = 0; i < 2; ++i) {
306 for (int j = 0; j < 2; ++j) {
307 HadamardRotation_C(dst, MultiplyBy4(i) + j + 8, MultiplyBy4(i) - j + 11,
308 /*flip=*/i != 0, range);
309 }
310 }
311 }
312 // stage 20.
313 if (size_log2 >= 5) {
314 for (int i = 0; i < 4; ++i) {
315 ButterflyRotation_C(dst, 29 - i, i + 18, 48 + 64 * DivideBy2(i), true,
316 range);
317 }
318 }
319 // stage 21.
320 if (size_log2 == 6) {
321 for (int i = 0; i < 4; ++i) {
322 for (int j = 0; j < 4; ++j) {
323 HadamardRotation_C(dst, MultiplyBy8(i) + j + 32,
324 MultiplyBy8(i) - j + 39, static_cast<bool>(i & 1),
325 range);
326 }
327 }
328 }
329 // stage 22.
330 if (size_log2 >= 3) {
331 for (int i = 0; i < 4; ++i) {
332 HadamardRotation_C(dst, i, 7 - i, false, range);
333 }
334 }
335 // stage 23.
336 if (size_log2 >= 4) {
337 for (int i = 0; i < 2; ++i) {
338 ButterflyRotation_C(dst, 13 - i, i + 10, 32, true, range);
339 }
340 }
341 // stage 24.
342 if (size_log2 >= 5) {
343 for (int i = 0; i < 2; ++i) {
344 for (int j = 0; j < 4; ++j) {
345 HadamardRotation_C(dst, MultiplyBy8(i) + j + 16,
346 MultiplyBy8(i) - j + 23, i == 1, range);
347 }
348 }
349 }
350 // stage 25.
351 if (size_log2 == 6) {
352 for (int i = 0; i < 8; ++i) {
353 ButterflyRotation_C(dst, 59 - i, i + 36, (i < 4) ? 48 : 112, true, range);
354 }
355 }
356 // stage 26.
357 if (size_log2 >= 4) {
358 for (int i = 0; i < 8; ++i) {
359 HadamardRotation_C(dst, i, 15 - i, false, range);
360 }
361 }
362 // stage 27.
363 if (size_log2 >= 5) {
364 for (int i = 0; i < 4; ++i) {
365 ButterflyRotation_C(dst, 27 - i, i + 20, 32, true, range);
366 }
367 }
368 // stage 28.
369 if (size_log2 == 6) {
370 for (int i = 0; i < 8; ++i) {
371 HadamardRotation_C(dst, i + 32, 47 - i, false, range);
372 HadamardRotation_C(dst, i + 48, 63 - i, true, range);
373 }
374 }
375 // stage 29.
376 if (size_log2 >= 5) {
377 for (int i = 0; i < 16; ++i) {
378 HadamardRotation_C(dst, i, 31 - i, false, range);
379 }
380 }
381 // stage 30.
382 if (size_log2 == 6) {
383 for (int i = 0; i < 8; ++i) {
384 ButterflyRotation_C(dst, 55 - i, i + 40, 32, true, range);
385 }
386 }
387 // stage 31.
388 if (size_log2 == 6) {
389 for (int i = 0; i < 32; ++i) {
390 HadamardRotation_C(dst, i, 63 - i, false, range);
391 }
392 }
393 }
394
395 template <int bitdepth, typename Residual, int size_log2>
DctDcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)396 void DctDcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
397 bool is_row) {
398 auto* const dst = static_cast<Residual*>(dest);
399
400 if (is_row && should_round) {
401 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
402 }
403
404 ButterflyRotationSecondIsZero_C(dst, 0, 1, 32, true, range);
405
406 if (is_row && row_shift > 0) {
407 dst[0] = RightShiftWithRounding(dst[0], row_shift);
408 }
409
410 ClampIntermediate<bitdepth, Residual>(dst, 1);
411
412 const int size = 1 << size_log2;
413 for (int i = 1; i < size; ++i) {
414 dst[i] = dst[0];
415 }
416 }
417
418 //------------------------------------------------------------------------------
419 // Asymmetric Discrete Sine Transforms (ADST).
420
421 /*
422 * Row transform max range in bits for bitdepths 8/10/12: 28/30/32.
423 * Column transform max range in bits for bitdepths 8/10/12: 28/28/30.
424 */
425 template <typename Residual>
Adst4_C(void * dest,int8_t range)426 void Adst4_C(void* dest, int8_t range) {
427 auto* const dst = static_cast<Residual*>(dest);
428 if ((dst[0] | dst[1] | dst[2] | dst[3]) == 0) {
429 return;
430 }
431
432 // stage 1.
433 // Section 7.13.2.6: It is a requirement of bitstream conformance that all
434 // values stored in the s and x arrays by this process are representable by
435 // a signed integer using range + 12 bits of precision.
436 int32_t s[7];
437 s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
438 s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
439 s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[1], range + 12);
440 s[3] = RangeCheckValue(kAdst4Multiplier[3] * dst[2], range + 12);
441 s[4] = RangeCheckValue(kAdst4Multiplier[0] * dst[2], range + 12);
442 s[5] = RangeCheckValue(kAdst4Multiplier[1] * dst[3], range + 12);
443 s[6] = RangeCheckValue(kAdst4Multiplier[3] * dst[3], range + 12);
444 // stage 2.
445 // Section 7.13.2.6: It is a requirement of bitstream conformance that
446 // values stored in the variable a7 by this process are representable by a
447 // signed integer using range + 1 bits of precision.
448 const int32_t a7 = RangeCheckValue(dst[0] - dst[2], range + 1);
449 // Section 7.13.2.6: It is a requirement of bitstream conformance that
450 // values stored in the variable b7 by this process are representable by a
451 // signed integer using |range| bits of precision.
452 const int32_t b7 = RangeCheckValue(a7 + dst[3], range);
453 // stage 3.
454 s[0] = RangeCheckValue(s[0] + s[3], range + 12);
455 s[1] = RangeCheckValue(s[1] - s[4], range + 12);
456 s[3] = s[2];
457 s[2] = RangeCheckValue(kAdst4Multiplier[2] * b7, range + 12);
458 // stage 4.
459 s[0] = RangeCheckValue(s[0] + s[5], range + 12);
460 s[1] = RangeCheckValue(s[1] - s[6], range + 12);
461 // stages 5 and 6.
462 const int32_t x0 = RangeCheckValue(s[0] + s[3], range + 12);
463 const int32_t x1 = RangeCheckValue(s[1] + s[3], range + 12);
464 int32_t x3 = RangeCheckValue(s[0] + s[1], range + 12);
465 x3 = RangeCheckValue(x3 - s[3], range + 12);
466 int32_t dst_0 = RightShiftWithRounding(x0, 12);
467 int32_t dst_1 = RightShiftWithRounding(x1, 12);
468 int32_t dst_2 = RightShiftWithRounding(s[2], 12);
469 int32_t dst_3 = RightShiftWithRounding(x3, 12);
470 if (sizeof(Residual) == 2) {
471 // If the first argument to RightShiftWithRounding(..., 12) is only
472 // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
473 // in RightShiftWithRounding(..., 12) will cause the function to return
474 // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
475 dst_0 -= (dst_0 == 0x8000);
476 dst_1 -= (dst_1 == 0x8000);
477 dst_3 -= (dst_3 == 0x8000);
478 }
479 dst[0] = dst_0;
480 dst[1] = dst_1;
481 dst[2] = dst_2;
482 dst[3] = dst_3;
483 }
484
485 template <int bitdepth, typename Residual>
Adst4DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)486 void Adst4DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
487 bool is_row) {
488 auto* const dst = static_cast<Residual*>(dest);
489
490 if (is_row && should_round) {
491 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
492 }
493
494 // stage 1.
495 // Section 7.13.2.6: It is a requirement of bitstream conformance that all
496 // values stored in the s and x arrays by this process are representable by
497 // a signed integer using range + 12 bits of precision.
498 int32_t s[3];
499 s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
500 s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
501 s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[0], range + 12);
502 // stage 3.
503 // stage 4.
504 // stages 5 and 6.
505 int32_t dst_0 = RightShiftWithRounding(s[0], 12);
506 int32_t dst_1 = RightShiftWithRounding(s[1], 12);
507 int32_t dst_2 = RightShiftWithRounding(s[2], 12);
508 int32_t dst_3 =
509 RightShiftWithRounding(RangeCheckValue(s[0] + s[1], range + 12), 12);
510 if (sizeof(Residual) == 2) {
511 // If the first argument to RightShiftWithRounding(..., 12) is only
512 // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
513 // in RightShiftWithRounding(..., 12) will cause the function to return
514 // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
515 dst_0 -= (dst_0 == 0x8000);
516 dst_1 -= (dst_1 == 0x8000);
517 dst_3 -= (dst_3 == 0x8000);
518 }
519 dst[0] = dst_0;
520 dst[1] = dst_1;
521 dst[2] = dst_2;
522 dst[3] = dst_3;
523
524 const int size = 4;
525 if (is_row && row_shift > 0) {
526 for (int j = 0; j < size; ++j) {
527 dst[j] = RightShiftWithRounding(dst[j], row_shift);
528 }
529 }
530
531 ClampIntermediate<bitdepth, Residual>(dst, 4);
532 }
533
534 template <typename Residual>
AdstInputPermutation(int32_t * LIBGAV1_RESTRICT const dst,const Residual * LIBGAV1_RESTRICT const src,int n)535 void AdstInputPermutation(int32_t* LIBGAV1_RESTRICT const dst,
536 const Residual* LIBGAV1_RESTRICT const src, int n) {
537 assert(n == 8 || n == 16);
538 for (int i = 0; i < n; ++i) {
539 dst[i] = src[((i & 1) == 0) ? n - i - 1 : i - 1];
540 }
541 }
542
543 constexpr int8_t kAdstOutputPermutationLookup[16] = {
544 0, 8, 12, 4, 6, 14, 10, 2, 3, 11, 15, 7, 5, 13, 9, 1};
545
546 template <typename Residual>
AdstOutputPermutation(Residual * LIBGAV1_RESTRICT const dst,const int32_t * LIBGAV1_RESTRICT const src,int n)547 void AdstOutputPermutation(Residual* LIBGAV1_RESTRICT const dst,
548 const int32_t* LIBGAV1_RESTRICT const src, int n) {
549 assert(n == 8 || n == 16);
550 const auto shift = static_cast<int8_t>(n == 8);
551 for (int i = 0; i < n; ++i) {
552 const int8_t index = kAdstOutputPermutationLookup[i] >> shift;
553 int32_t dst_i = ((i & 1) == 0) ? src[index] : -src[index];
554 if (sizeof(Residual) == 2) {
555 // If i is odd and src[index] is -32768, dst_i will be 32768, which
556 // cannot be represented as an int16_t.
557 dst_i -= (dst_i == 0x8000);
558 }
559 dst[i] = dst_i;
560 }
561 }
562
563 template <typename Residual>
Adst8_C(void * dest,int8_t range)564 void Adst8_C(void* dest, int8_t range) {
565 auto* const dst = static_cast<Residual*>(dest);
566 // stage 1.
567 int32_t temp[8];
568 AdstInputPermutation(temp, dst, 8);
569 // stage 2.
570 for (int i = 0; i < 4; ++i) {
571 ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 60 - 16 * i,
572 true, range);
573 }
574 // stage 3.
575 for (int i = 0; i < 4; ++i) {
576 HadamardRotation_C(temp, i, i + 4, false, range);
577 }
578 // stage 4.
579 for (int i = 0; i < 2; ++i) {
580 ButterflyRotation_C(temp, i * 3 + 4, i + 5, 48 - 32 * i, true, range);
581 }
582 // stage 5.
583 for (int i = 0; i < 2; ++i) {
584 for (int j = 0; j < 2; ++j) {
585 HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
586 false, range);
587 }
588 }
589 // stage 6.
590 for (int i = 0; i < 2; ++i) {
591 ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
592 range);
593 }
594 // stage 7.
595 AdstOutputPermutation(dst, temp, 8);
596 }
597
598 template <int bitdepth, typename Residual>
Adst8DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)599 void Adst8DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
600 bool is_row) {
601 auto* const dst = static_cast<Residual*>(dest);
602
603 // stage 1.
604 int32_t temp[8];
605 // After the permutation, the dc value is in temp[1]. The remaining are zero.
606 AdstInputPermutation(temp, dst, 8);
607
608 if (is_row && should_round) {
609 temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
610 }
611
612 // stage 2.
613 ButterflyRotationFirstIsZero_C(temp, 0, 1, 60, true, range);
614
615 // stage 3.
616 temp[4] = temp[0];
617 temp[5] = temp[1];
618
619 // stage 4.
620 ButterflyRotation_C(temp, 4, 5, 48, true, range);
621
622 // stage 5.
623 temp[2] = temp[0];
624 temp[3] = temp[1];
625 temp[6] = temp[4];
626 temp[7] = temp[5];
627
628 // stage 6.
629 ButterflyRotation_C(temp, 2, 3, 32, true, range);
630 ButterflyRotation_C(temp, 6, 7, 32, true, range);
631
632 // stage 7.
633 AdstOutputPermutation(dst, temp, 8);
634
635 const int size = 8;
636 if (is_row && row_shift > 0) {
637 for (int j = 0; j < size; ++j) {
638 dst[j] = RightShiftWithRounding(dst[j], row_shift);
639 }
640 }
641
642 ClampIntermediate<bitdepth, Residual>(dst, 8);
643 }
644
645 template <typename Residual>
Adst16_C(void * dest,int8_t range)646 void Adst16_C(void* dest, int8_t range) {
647 auto* const dst = static_cast<Residual*>(dest);
648 // stage 1.
649 int32_t temp[16];
650 AdstInputPermutation(temp, dst, 16);
651 // stage 2.
652 for (int i = 0; i < 8; ++i) {
653 ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 62 - 8 * i,
654 true, range);
655 }
656 // stage 3.
657 for (int i = 0; i < 8; ++i) {
658 HadamardRotation_C(temp, i, i + 8, false, range);
659 }
660 // stage 4.
661 for (int i = 0; i < 2; ++i) {
662 ButterflyRotation_C(temp, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
663 56 - 32 * i, true, range);
664 ButterflyRotation_C(temp, MultiplyBy2(i) + 13, MultiplyBy2(i) + 12,
665 8 + 32 * i, true, range);
666 }
667 // stage 5.
668 for (int i = 0; i < 4; ++i) {
669 for (int j = 0; j < 2; ++j) {
670 HadamardRotation_C(temp, i + MultiplyBy8(j), i + MultiplyBy8(j) + 4,
671 false, range);
672 }
673 }
674 // stage 6.
675 for (int i = 0; i < 2; ++i) {
676 for (int j = 0; j < 2; ++j) {
677 ButterflyRotation_C(temp, i * 3 + MultiplyBy8(j) + 4,
678 i + MultiplyBy8(j) + 5, 48 - 32 * i, true, range);
679 }
680 }
681 // stage 7.
682 for (int i = 0; i < 2; ++i) {
683 for (int j = 0; j < 4; ++j) {
684 HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
685 false, range);
686 }
687 }
688 // stage 8.
689 for (int i = 0; i < 4; ++i) {
690 ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
691 range);
692 }
693 // stage 9.
694 AdstOutputPermutation(dst, temp, 16);
695 }
696
697 template <int bitdepth, typename Residual>
Adst16DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)698 void Adst16DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
699 bool is_row) {
700 auto* const dst = static_cast<Residual*>(dest);
701
702 // stage 1.
703 int32_t temp[16];
704 // After the permutation, the dc value is in temp[1]. The remaining are zero.
705 AdstInputPermutation(temp, dst, 16);
706
707 if (is_row && should_round) {
708 temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
709 }
710
711 // stage 2.
712 ButterflyRotationFirstIsZero_C(temp, 0, 1, 62, true, range);
713
714 // stage 3.
715 temp[8] = temp[0];
716 temp[9] = temp[1];
717
718 // stage 4.
719 ButterflyRotation_C(temp, 8, 9, 56, true, range);
720
721 // stage 5.
722 temp[4] = temp[0];
723 temp[5] = temp[1];
724 temp[12] = temp[8];
725 temp[13] = temp[9];
726
727 // stage 6.
728 ButterflyRotation_C(temp, 4, 5, 48, true, range);
729 ButterflyRotation_C(temp, 12, 13, 48, true, range);
730
731 // stage 7.
732 temp[2] = temp[0];
733 temp[3] = temp[1];
734 temp[10] = temp[8];
735 temp[11] = temp[9];
736
737 temp[6] = temp[4];
738 temp[7] = temp[5];
739 temp[14] = temp[12];
740 temp[15] = temp[13];
741
742 // stage 8.
743 for (int i = 0; i < 4; ++i) {
744 ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
745 range);
746 }
747
748 // stage 9.
749 AdstOutputPermutation(dst, temp, 16);
750
751 const int size = 16;
752 if (is_row && row_shift > 0) {
753 for (int j = 0; j < size; ++j) {
754 dst[j] = RightShiftWithRounding(dst[j], row_shift);
755 }
756 }
757
758 ClampIntermediate<bitdepth, Residual>(dst, 16);
759 }
760
761 //------------------------------------------------------------------------------
762 // Identity Transforms.
763 //
764 // In the spec, the inverse identity transform is followed by a Round2() call:
765 // The row transforms with i = 0..(h-1) are applied as follows:
766 // ...
767 // * Otherwise, invoke the inverse identity transform process specified in
768 // section 7.13.2.15 with the input variable n equal to log2W.
769 // * Set Residual[ i ][ j ] equal to Round2( T[ j ], rowShift )
770 // for j = 0..(w-1).
771 // ...
772 // The column transforms with j = 0..(w-1) are applied as follows:
773 // ...
774 // * Otherwise, invoke the inverse identity transform process specified in
775 // section 7.13.2.15 with the input variable n equal to log2H.
776 // * Residual[ i ][ j ] is set equal to Round2( T[ i ], colShift )
777 // for i = 0..(h-1).
778 //
779 // Therefore, we define the identity transform functions to perform both the
780 // inverse identity transform and the Round2() call. This has two advantages:
781 // 1. The outputs of the inverse identity transform do not need to be stored
782 // in the Residual array. They can be stored in int32_t local variables,
783 // which have a larger range if Residual is an int16_t array.
784 // 2. The inverse identity transform and the Round2() call can be jointly
785 // optimized.
786 //
787 // The identity transform functions have the following prototype:
788 // void Identity_C(void* dest, int8_t shift);
789 //
790 // The |shift| parameter is the amount of shift for the Round2() call. For row
791 // transforms, |shift| is 0, 1, or 2. For column transforms, |shift| is always
792 // 4. Therefore, an identity transform function can detect whether it is being
793 // invoked as a row transform or a column transform by checking whether |shift|
794 // is equal to 4.
795 //
796 // Input Range
797 //
798 // The inputs of row transforms, stored in the 2D array Dequant, are
799 // representable by a signed integer using 8 + BitDepth bits of precision:
800 // f. Dequant[ i ][ j ] is set equal to
801 // Clip3( - ( 1 << ( 7 + BitDepth ) ), ( 1 << ( 7 + BitDepth ) ) - 1, dq2 ).
802 //
803 // The inputs of column transforms are representable by a signed integer using
804 // Max( BitDepth + 6, 16 ) bits of precision:
805 // Set the variable colClampRange equal to Max( BitDepth + 6, 16 ).
806 // ...
807 // Between the row and column transforms, Residual[ i ][ j ] is set equal to
808 // Clip3( - ( 1 << ( colClampRange - 1 ) ),
809 // ( 1 << (colClampRange - 1 ) ) - 1,
810 // Residual[ i ][ j ] )
811 // for i = 0..(h-1), for j = 0..(w-1).
812 //
813 // Output Range
814 //
815 // The outputs of row transforms are representable by a signed integer using
816 // 8 + BitDepth + 1 = 9 + BitDepth bits of precision, because the net effect
817 // of the multiplicative factor of inverse identity transforms minus the
818 // smallest row shift is an increase of at most one bit.
819 //
820 // Transform | Multiplicative factor | Smallest row | Net increase
821 // width | (in bits) | shift | in bits
822 // ---------------------------------------------------------------
823 // 4 | sqrt(2) (0.5 bits) | 0 | +0.5
824 // 8 | 2 (1 bit) | 0 | +1
825 // 16 | 2*sqrt(2) (1.5 bits) | 1 | +0.5
826 // 32 | 4 (2 bits) | 1 | +1
827 //
828 // If BitDepth is 8 and Residual is an int16_t array, to avoid truncation we
829 // clip the outputs (which have 17 bits of precision) to the range of int16_t
830 // before storing them in the Residual array. This clipping happens to be the
831 // same as the required clipping after the row transform (see the spec quoted
832 // above), so we remain compliant with the spec. (In this case,
833 // TransformLoop_C() skips clipping the outputs of row transforms to avoid
834 // duplication of effort.)
835 //
836 // The outputs of column transforms are representable by a signed integer using
837 // Max( BitDepth + 6, 16 ) + 2 - 4 = Max( BitDepth + 4, 14 ) bits of precision,
838 // because the multiplicative factor of inverse identity transforms is at most
839 // 4 (2 bits) and |shift| is always 4.
840
841 template <typename Residual>
Identity4Row_C(void * dest,int8_t shift)842 void Identity4Row_C(void* dest, int8_t shift) {
843 assert(shift == 0 || shift == 1);
844 auto* const dst = static_cast<Residual*>(dest);
845 // If |shift| is 0, |rounding| should be 1 << 11. If |shift| is 1, |rounding|
846 // should be (1 + (1 << 1)) << 11. The following expression works for both
847 // values of |shift|.
848 const int32_t rounding = (1 + (shift << 1)) << 11;
849 for (int i = 0; i < 4; ++i) {
850 // The intermediate value here will have to fit into an int32_t for it to be
851 // bitstream conformant. The multiplication is promoted to int32_t by
852 // defining kIdentity4Multiplier as int32_t.
853 int32_t dst_i = (dst[i] * kIdentity4Multiplier + rounding) >> (12 + shift);
854 if (sizeof(Residual) == 2) {
855 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
856 }
857 dst[i] = static_cast<Residual>(dst_i);
858 }
859 }
860
861 template <typename Residual>
Identity4Column_C(void * dest,int8_t)862 void Identity4Column_C(void* dest, int8_t /*shift*/) {
863 auto* const dst = static_cast<Residual*>(dest);
864 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
865 for (int i = 0; i < 4; ++i) {
866 // The intermediate value here will have to fit into an int32_t for it to be
867 // bitstream conformant. The multiplication is promoted to int32_t by
868 // defining kIdentity4Multiplier as int32_t.
869 dst[i] = static_cast<Residual>((dst[i] * kIdentity4Multiplier + rounding) >>
870 (12 + kTransformColumnShift));
871 }
872 }
873
874 template <int bitdepth, typename Residual>
Identity4DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)875 void Identity4DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
876 int row_shift, bool is_row) {
877 auto* const dst = static_cast<Residual*>(dest);
878
879 if (is_row) {
880 if (should_round) {
881 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
882 }
883
884 const int32_t rounding = (1 + (row_shift << 1)) << 11;
885 int32_t dst_i =
886 (dst[0] * kIdentity4Multiplier + rounding) >> (12 + row_shift);
887 if (sizeof(Residual) == 2) {
888 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
889 }
890 dst[0] = static_cast<Residual>(dst_i);
891
892 ClampIntermediate<bitdepth, Residual>(dst, 1);
893 return;
894 }
895
896 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
897 dst[0] = static_cast<Residual>((dst[0] * kIdentity4Multiplier + rounding) >>
898 (12 + kTransformColumnShift));
899 }
900
901 template <typename Residual>
Identity8Row_C(void * dest,int8_t shift)902 void Identity8Row_C(void* dest, int8_t shift) {
903 assert(shift == 0 || shift == 1 || shift == 2);
904 auto* const dst = static_cast<Residual*>(dest);
905 for (int i = 0; i < 8; ++i) {
906 int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[i]), shift);
907 if (sizeof(Residual) == 2) {
908 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
909 }
910 dst[i] = static_cast<Residual>(dst_i);
911 }
912 }
913
914 template <typename Residual>
Identity8Column_C(void * dest,int8_t)915 void Identity8Column_C(void* dest, int8_t /*shift*/) {
916 auto* const dst = static_cast<Residual*>(dest);
917 for (int i = 0; i < 8; ++i) {
918 dst[i] = static_cast<Residual>(
919 RightShiftWithRounding(dst[i], kTransformColumnShift - 1));
920 }
921 }
922
923 template <int bitdepth, typename Residual>
Identity8DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)924 void Identity8DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
925 int row_shift, bool is_row) {
926 auto* const dst = static_cast<Residual*>(dest);
927
928 if (is_row) {
929 if (should_round) {
930 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
931 }
932
933 int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[0]), row_shift);
934 if (sizeof(Residual) == 2) {
935 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
936 }
937 dst[0] = static_cast<Residual>(dst_i);
938
939 // If Residual is int16_t (which implies bitdepth is 8), we don't need to
940 // clip residual[i][j] to 16 bits.
941 if (sizeof(Residual) > 2) {
942 const Residual intermediate_clamp_max =
943 (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
944 const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
945 dst[0] = Clip3(dst[0], intermediate_clamp_min, intermediate_clamp_max);
946 }
947 return;
948 }
949
950 dst[0] = static_cast<Residual>(
951 RightShiftWithRounding(dst[0], kTransformColumnShift - 1));
952 }
953
954 template <typename Residual>
Identity16Row_C(void * dest,int8_t shift)955 void Identity16Row_C(void* dest, int8_t shift) {
956 assert(shift == 1 || shift == 2);
957 auto* const dst = static_cast<Residual*>(dest);
958 const int32_t rounding = (1 + (1 << shift)) << 11;
959 for (int i = 0; i < 16; ++i) {
960 // The intermediate value here will have to fit into an int32_t for it to be
961 // bitstream conformant. The multiplication is promoted to int32_t by
962 // defining kIdentity16Multiplier as int32_t.
963 int32_t dst_i = (dst[i] * kIdentity16Multiplier + rounding) >> (12 + shift);
964 if (sizeof(Residual) == 2) {
965 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
966 }
967 dst[i] = static_cast<Residual>(dst_i);
968 }
969 }
970
971 template <typename Residual>
Identity16Column_C(void * dest,int8_t)972 void Identity16Column_C(void* dest, int8_t /*shift*/) {
973 auto* const dst = static_cast<Residual*>(dest);
974 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
975 for (int i = 0; i < 16; ++i) {
976 // The intermediate value here will have to fit into an int32_t for it to be
977 // bitstream conformant. The multiplication is promoted to int32_t by
978 // defining kIdentity16Multiplier as int32_t.
979 dst[i] =
980 static_cast<Residual>((dst[i] * kIdentity16Multiplier + rounding) >>
981 (12 + kTransformColumnShift));
982 }
983 }
984
985 template <int bitdepth, typename Residual>
Identity16DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)986 void Identity16DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
987 int row_shift, bool is_row) {
988 auto* const dst = static_cast<Residual*>(dest);
989
990 if (is_row) {
991 if (should_round) {
992 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
993 }
994
995 const int32_t rounding = (1 + (1 << row_shift)) << 11;
996 int32_t dst_i =
997 (dst[0] * kIdentity16Multiplier + rounding) >> (12 + row_shift);
998 if (sizeof(Residual) == 2) {
999 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1000 }
1001 dst[0] = static_cast<Residual>(dst_i);
1002
1003 ClampIntermediate<bitdepth, Residual>(dst, 1);
1004 return;
1005 }
1006
1007 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1008 dst[0] = static_cast<Residual>((dst[0] * kIdentity16Multiplier + rounding) >>
1009 (12 + kTransformColumnShift));
1010 }
1011
1012 template <typename Residual>
Identity32Row_C(void * dest,int8_t shift)1013 void Identity32Row_C(void* dest, int8_t shift) {
1014 assert(shift == 1 || shift == 2);
1015 auto* const dst = static_cast<Residual*>(dest);
1016 for (int i = 0; i < 32; ++i) {
1017 int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[i]), shift);
1018 if (sizeof(Residual) == 2) {
1019 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1020 }
1021 dst[i] = static_cast<Residual>(dst_i);
1022 }
1023 }
1024
1025 template <typename Residual>
Identity32Column_C(void * dest,int8_t)1026 void Identity32Column_C(void* dest, int8_t /*shift*/) {
1027 auto* const dst = static_cast<Residual*>(dest);
1028 for (int i = 0; i < 32; ++i) {
1029 dst[i] = static_cast<Residual>(
1030 RightShiftWithRounding(dst[i], kTransformColumnShift - 2));
1031 }
1032 }
1033
1034 template <int bitdepth, typename Residual>
Identity32DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1035 void Identity32DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1036 int row_shift, bool is_row) {
1037 auto* const dst = static_cast<Residual*>(dest);
1038
1039 if (is_row) {
1040 if (should_round) {
1041 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
1042 }
1043
1044 int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[0]), row_shift);
1045 if (sizeof(Residual) == 2) {
1046 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1047 }
1048 dst[0] = static_cast<Residual>(dst_i);
1049
1050 ClampIntermediate<bitdepth, Residual>(dst, 1);
1051 return;
1052 }
1053
1054 dst[0] = static_cast<Residual>(
1055 RightShiftWithRounding(dst[0], kTransformColumnShift - 2));
1056 }
1057
1058 //------------------------------------------------------------------------------
1059 // Walsh Hadamard Transform.
1060
1061 template <typename Residual>
Wht4_C(void * dest,int8_t shift)1062 void Wht4_C(void* dest, int8_t shift) {
1063 auto* const dst = static_cast<Residual*>(dest);
1064 Residual temp[4];
1065 temp[0] = dst[0] >> shift;
1066 temp[2] = dst[1] >> shift;
1067 temp[3] = dst[2] >> shift;
1068 temp[1] = dst[3] >> shift;
1069 temp[0] += temp[2];
1070 temp[3] -= temp[1];
1071 // This signed right shift must be an arithmetic shift.
1072 Residual e = (temp[0] - temp[3]) >> 1;
1073 dst[1] = e - temp[1];
1074 dst[2] = e - temp[2];
1075 dst[0] = temp[0] - dst[1];
1076 dst[3] = temp[3] + dst[2];
1077 }
1078
1079 template <int bitdepth, typename Residual>
Wht4DcOnly_C(void * dest,int8_t range,bool,int,bool)1080 void Wht4DcOnly_C(void* dest, int8_t range, bool /*should_round*/,
1081 int /*row_shift*/, bool /*is_row*/) {
1082 auto* const dst = static_cast<Residual*>(dest);
1083 const int shift = range;
1084
1085 Residual temp = dst[0] >> shift;
1086 // This signed right shift must be an arithmetic shift.
1087 Residual e = temp >> 1;
1088 dst[0] = temp - e;
1089 dst[1] = e;
1090 dst[2] = e;
1091 dst[3] = e;
1092
1093 ClampIntermediate<bitdepth, Residual>(dst, 4);
1094 }
1095
1096 //------------------------------------------------------------------------------
1097 // row/column transform loop
1098
1099 using InverseTransform1dFunc = void (*)(void* dst, int8_t range);
1100 using InverseTransformDcOnlyFunc = void (*)(void* dest, int8_t range,
1101 bool should_round, int row_shift,
1102 bool is_row);
1103
1104 template <int bitdepth, typename Residual, typename Pixel,
1105 Transform1d transform1d_type,
1106 InverseTransformDcOnlyFunc dconly_transform1d,
1107 InverseTransform1dFunc transform1d_func, bool is_row>
TransformLoop_C(TransformType tx_type,TransformSize tx_size,int adjusted_tx_height,void * LIBGAV1_RESTRICT src_buffer,int start_x,int start_y,void * LIBGAV1_RESTRICT dst_frame)1108 void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
1109 int adjusted_tx_height, void* LIBGAV1_RESTRICT src_buffer,
1110 int start_x, int start_y,
1111 void* LIBGAV1_RESTRICT dst_frame) {
1112 constexpr bool lossless = transform1d_type == kTransform1dWht;
1113 constexpr bool is_identity = transform1d_type == kTransform1dIdentity;
1114 // The transform size of the WHT is always 4x4. Setting tx_width and
1115 // tx_height to the constant 4 for the WHT speeds the code up.
1116 assert(!lossless || tx_size == kTransformSize4x4);
1117 const int tx_width = lossless ? 4 : kTransformWidth[tx_size];
1118 const int tx_height = lossless ? 4 : kTransformHeight[tx_size];
1119 const int tx_width_log2 = kTransformWidthLog2[tx_size];
1120 const int tx_height_log2 = kTransformHeightLog2[tx_size];
1121 auto* frame = static_cast<Array2DView<Pixel>*>(dst_frame);
1122
1123 // Initially this points to the dequantized values. After the transforms are
1124 // applied, this buffer contains the residual.
1125 Array2DView<Residual> residual(tx_height, tx_width,
1126 static_cast<Residual*>(src_buffer));
1127
1128 if (is_row) {
1129 // Row transform.
1130 const uint8_t row_shift = lossless ? 0 : kTransformRowShift[tx_size];
1131 // This is the |range| parameter of the InverseTransform1dFunc. For lossy
1132 // transforms, this will be equal to the clamping range.
1133 const int8_t row_clamp_range = lossless ? 2 : (bitdepth + 8);
1134 // If the width:height ratio of the transform size is 2:1 or 1:2, multiply
1135 // the input to the row transform by 1 / sqrt(2), which is approximated by
1136 // the fraction 2896 / 2^12.
1137 const bool should_round = std::abs(tx_width_log2 - tx_height_log2) == 1;
1138
1139 if (adjusted_tx_height == 1) {
1140 dconly_transform1d(residual[0], row_clamp_range, should_round, row_shift,
1141 true);
1142 return;
1143 }
1144
1145 // Row transforms need to be done only up to 32 because the rest of the rows
1146 // are always all zero if |tx_height| is 64. Otherwise, only process the
1147 // rows that have a non zero coefficients.
1148 for (int i = 0; i < adjusted_tx_height; ++i) {
1149 // If lossless, the transform size is 4x4, so should_round is false.
1150 if (!lossless && should_round) {
1151 // The last 32 values of every row are always zero if the |tx_width| is
1152 // 64.
1153 for (int j = 0; j < std::min(tx_width, 32); ++j) {
1154 residual[i][j] = RightShiftWithRounding(
1155 residual[i][j] * kTransformRowMultiplier, 12);
1156 }
1157 }
1158 // For identity transform, |transform1d_func| also performs the
1159 // Round2(T[j], rowShift) call in the spec.
1160 transform1d_func(residual[i], is_identity ? row_shift : row_clamp_range);
1161 if (!lossless && !is_identity && row_shift > 0) {
1162 for (int j = 0; j < tx_width; ++j) {
1163 residual[i][j] = RightShiftWithRounding(residual[i][j], row_shift);
1164 }
1165 }
1166
1167 ClampIntermediate<bitdepth, Residual>(residual[i], tx_width);
1168 }
1169 return;
1170 }
1171
1172 assert(!is_row);
1173 constexpr uint8_t column_shift = lossless ? 0 : kTransformColumnShift;
1174 // This is the |range| parameter of the InverseTransform1dFunc. For lossy
1175 // transforms, this will be equal to the clamping range.
1176 const int8_t column_clamp_range = lossless ? 0 : std::max(bitdepth + 6, 16);
1177 const bool flip_rows = transform1d_type == kTransform1dAdst &&
1178 kTransformFlipRowsMask.Contains(tx_type);
1179 const bool flip_columns =
1180 !lossless && kTransformFlipColumnsMask.Contains(tx_type);
1181 const int min_value = 0;
1182 const int max_value = (1 << bitdepth) - 1;
1183 // Note: 64 is the maximum size of a 1D transform buffer (the largest
1184 // transform size is kTransformSize64x64).
1185 Residual tx_buffer[64];
1186 for (int j = 0; j < tx_width; ++j) {
1187 const int flipped_j = flip_columns ? tx_width - j - 1 : j;
1188 int i = 0;
1189 do {
1190 tx_buffer[i] = residual[i][flipped_j];
1191 } while (++i != tx_height);
1192 if (adjusted_tx_height == 1) {
1193 dconly_transform1d(tx_buffer, column_clamp_range, false, 0, false);
1194 } else {
1195 // For identity transform, |transform1d_func| also performs the
1196 // Round2(T[i], colShift) call in the spec.
1197 transform1d_func(tx_buffer,
1198 is_identity ? column_shift : column_clamp_range);
1199 }
1200 const int x = start_x + j;
1201 for (int i = 0; i < tx_height; ++i) {
1202 const int y = start_y + i;
1203 const int index = flip_rows ? tx_height - i - 1 : i;
1204 Residual residual_value = tx_buffer[index];
1205 if (!lossless && !is_identity) {
1206 residual_value = RightShiftWithRounding(residual_value, column_shift);
1207 }
1208 (*frame)[y][x] =
1209 Clip3((*frame)[y][x] + residual_value, min_value, max_value);
1210 }
1211 }
1212 }
1213
1214 //------------------------------------------------------------------------------
1215
1216 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1217 template <int bitdepth, typename Residual, typename Pixel>
InitAll(Dsp * const dsp)1218 void InitAll(Dsp* const dsp) {
1219 // Maximum transform size for Dct is 64.
1220 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1221 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1222 DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1223 /*is_row=*/true>;
1224 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1225 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1226 DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1227 /*is_row=*/false>;
1228 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1229 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1230 DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1231 /*is_row=*/true>;
1232 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1233 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1234 DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1235 /*is_row=*/false>;
1236 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1237 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1238 DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1239 /*is_row=*/true>;
1240 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1241 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1242 DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1243 /*is_row=*/false>;
1244 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1245 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1246 DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1247 /*is_row=*/true>;
1248 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1249 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1250 DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1251 /*is_row=*/false>;
1252 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1253 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1254 DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1255 /*is_row=*/true>;
1256 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1257 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1258 DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1259 /*is_row=*/false>;
1260
1261 // Maximum transform size for Adst is 16.
1262 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1263 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1264 Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1265 /*is_row=*/true>;
1266 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1267 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1268 Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1269 /*is_row=*/false>;
1270 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1271 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1272 Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1273 /*is_row=*/true>;
1274 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1275 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1276 Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1277 /*is_row=*/false>;
1278 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1279 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1280 Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1281 /*is_row=*/true>;
1282 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1283 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1284 Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1285 /*is_row=*/false>;
1286
1287 // Maximum transform size for Identity transform is 32.
1288 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1289 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1290 Identity4DcOnly_C<bitdepth, Residual>,
1291 Identity4Row_C<Residual>, /*is_row=*/true>;
1292 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1293 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1294 Identity4DcOnly_C<bitdepth, Residual>,
1295 Identity4Column_C<Residual>, /*is_row=*/false>;
1296 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1297 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1298 Identity8DcOnly_C<bitdepth, Residual>,
1299 Identity8Row_C<Residual>, /*is_row=*/true>;
1300 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1301 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1302 Identity8DcOnly_C<bitdepth, Residual>,
1303 Identity8Column_C<Residual>, /*is_row=*/false>;
1304 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1305 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1306 Identity16DcOnly_C<bitdepth, Residual>,
1307 Identity16Row_C<Residual>, /*is_row=*/true>;
1308 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1309 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1310 Identity16DcOnly_C<bitdepth, Residual>,
1311 Identity16Column_C<Residual>, /*is_row=*/false>;
1312 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1313 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1314 Identity32DcOnly_C<bitdepth, Residual>,
1315 Identity32Row_C<Residual>, /*is_row=*/true>;
1316 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1317 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1318 Identity32DcOnly_C<bitdepth, Residual>,
1319 Identity32Column_C<Residual>, /*is_row=*/false>;
1320
1321 // Maximum transform size for Wht is 4.
1322 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1323 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1324 Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1325 /*is_row=*/true>;
1326 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1327 TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1328 Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1329 /*is_row=*/false>;
1330 }
1331 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1332
Init8bpp()1333 void Init8bpp() {
1334 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
1335 assert(dsp != nullptr);
1336 static_cast<void>(dsp);
1337 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1338 InitAll<8, int16_t, uint8_t>(dsp);
1339 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1340 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dDct
1341 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1342 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1343 DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1344 /*is_row=*/true>;
1345 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1346 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1347 DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1348 /*is_row=*/false>;
1349 #endif
1350 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dDct
1351 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1352 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1353 DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1354 /*is_row=*/true>;
1355 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1356 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1357 DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1358 /*is_row=*/false>;
1359 #endif
1360 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dDct
1361 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1362 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1363 DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1364 /*is_row=*/true>;
1365 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1366 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1367 DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1368 /*is_row=*/false>;
1369 #endif
1370 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dDct
1371 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1372 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1373 DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1374 /*is_row=*/true>;
1375 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1376 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1377 DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1378 /*is_row=*/false>;
1379 #endif
1380 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize64_Transform1dDct
1381 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1382 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1383 DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1384 /*is_row=*/true>;
1385 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1386 TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1387 DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1388 /*is_row=*/false>;
1389 #endif
1390 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dAdst
1391 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1392 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1393 Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1394 /*is_row=*/true>;
1395 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1396 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1397 Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1398 /*is_row=*/false>;
1399 #endif
1400 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dAdst
1401 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1402 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1403 Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1404 /*is_row=*/true>;
1405 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1406 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1407 Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1408 /*is_row=*/false>;
1409 #endif
1410 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dAdst
1411 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1412 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1413 Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1414 /*is_row=*/true>;
1415 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1416 TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1417 Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1418 /*is_row=*/false>;
1419 #endif
1420 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dIdentity
1421 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1422 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1423 Identity4DcOnly_C<8, int16_t>, Identity4Row_C<int16_t>,
1424 /*is_row=*/true>;
1425 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1426 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1427 Identity4DcOnly_C<8, int16_t>, Identity4Column_C<int16_t>,
1428 /*is_row=*/false>;
1429 #endif
1430 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dIdentity
1431 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1432 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1433 Identity8DcOnly_C<8, int16_t>, Identity8Row_C<int16_t>,
1434 /*is_row=*/true>;
1435 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1436 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1437 Identity8DcOnly_C<8, int16_t>, Identity8Column_C<int16_t>,
1438 /*is_row=*/false>;
1439 #endif
1440 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dIdentity
1441 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1442 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1443 Identity16DcOnly_C<8, int16_t>, Identity16Row_C<int16_t>,
1444 /*is_row=*/true>;
1445 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1446 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1447 Identity16DcOnly_C<8, int16_t>,
1448 Identity16Column_C<int16_t>, /*is_row=*/false>;
1449 #endif
1450 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dIdentity
1451 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1452 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1453 Identity32DcOnly_C<8, int16_t>, Identity32Row_C<int16_t>,
1454 /*is_row=*/true>;
1455 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1456 TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1457 Identity32DcOnly_C<8, int16_t>,
1458 Identity32Column_C<int16_t>, /*is_row=*/false>;
1459 #endif
1460 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dWht
1461 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1462 TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1463 Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1464 /*is_row=*/true>;
1465 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1466 TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1467 Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1468 /*is_row=*/false>;
1469 #endif
1470 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1471 }
1472
1473 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()1474 void Init10bpp() {
1475 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
1476 assert(dsp != nullptr);
1477 static_cast<void>(dsp);
1478 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1479 InitAll<10, int32_t, uint16_t>(dsp);
1480 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1481 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dDct
1482 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1483 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1484 DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1485 /*is_row=*/true>;
1486 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1487 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1488 DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1489 /*is_row=*/false>;
1490 #endif
1491 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dDct
1492 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1493 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1494 DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1495 /*is_row=*/true>;
1496 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1497 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1498 DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1499 /*is_row=*/false>;
1500 #endif
1501 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dDct
1502 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1503 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1504 DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1505 /*is_row=*/true>;
1506 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1507 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1508 DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1509 /*is_row=*/false>;
1510 #endif
1511 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dDct
1512 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1513 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1514 DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1515 /*is_row=*/true>;
1516 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1517 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1518 DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1519 /*is_row=*/false>;
1520 #endif
1521 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize64_Transform1dDct
1522 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1523 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1524 DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1525 /*is_row=*/true>;
1526 dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1527 TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1528 DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1529 /*is_row=*/false>;
1530 #endif
1531 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dAdst
1532 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1533 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1534 Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1535 /*is_row=*/true>;
1536 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1537 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1538 Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1539 /*is_row=*/false>;
1540 #endif
1541 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dAdst
1542 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1543 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1544 Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1545 /*is_row=*/true>;
1546 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1547 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1548 Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1549 /*is_row=*/false>;
1550 #endif
1551 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dAdst
1552 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1553 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1554 Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1555 /*is_row=*/true>;
1556 dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1557 TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1558 Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1559 /*is_row=*/false>;
1560 #endif
1561 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dIdentity
1562 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1563 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1564 Identity4DcOnly_C<10, int32_t>, Identity4Row_C<int32_t>,
1565 /*is_row=*/true>;
1566 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1567 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1568 Identity4DcOnly_C<10, int32_t>,
1569 Identity4Column_C<int32_t>, /*is_row=*/false>;
1570 #endif
1571 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dIdentity
1572 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1573 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1574 Identity8DcOnly_C<10, int32_t>, Identity8Row_C<int32_t>,
1575 /*is_row=*/true>;
1576 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1577 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1578 Identity8DcOnly_C<10, int32_t>,
1579 Identity8Column_C<int32_t>, /*is_row=*/false>;
1580 #endif
1581 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dIdentity
1582 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1583 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1584 Identity16DcOnly_C<10, int32_t>, Identity16Row_C<int32_t>,
1585 /*is_row=*/true>;
1586 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1587 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1588 Identity16DcOnly_C<10, int32_t>,
1589 Identity16Column_C<int32_t>, /*is_row=*/false>;
1590 #endif
1591 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dIdentity
1592 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1593 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1594 Identity32DcOnly_C<10, int32_t>, Identity32Row_C<int32_t>,
1595 /*is_row=*/true>;
1596 dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1597 TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1598 Identity32DcOnly_C<10, int32_t>,
1599 Identity32Column_C<int32_t>, /*is_row=*/false>;
1600 #endif
1601 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dWht
1602 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1603 TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1604 Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1605 /*is_row=*/true>;
1606 dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1607 TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1608 Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1609 /*is_row=*/false>;
1610 #endif
1611 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1612 }
1613 #endif // LIBGAV1_MAX_BITDEPTH >= 10
1614
1615 } // namespace
1616
InverseTransformInit_C()1617 void InverseTransformInit_C() {
1618 Init8bpp();
1619 #if LIBGAV1_MAX_BITDEPTH >= 10
1620 Init10bpp();
1621 #endif
1622
1623 // Local functions that may be unused depending on the optimizations
1624 // available.
1625 static_cast<void>(RangeCheckValue);
1626 static_cast<void>(kBitReverseLookup);
1627 }
1628
1629 } // namespace dsp
1630 } // namespace libgav1
1631