1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/inverse_transform.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cstdint>
20 #include <cstring>
21
22 #include "src/dsp/dsp.h"
23 #include "src/utils/array_2d.h"
24 #include "src/utils/common.h"
25 #include "src/utils/compiler_attributes.h"
26 #include "src/utils/logging.h"
27
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31
32 // Include the constants and utility functions inside the anonymous namespace.
33 #include "src/dsp/inverse_transform.inc"
34
35 constexpr uint8_t kTransformColumnShift = 4;
36
37 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
38 #undef LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
39 #endif
40
RangeCheckValue(int32_t value,int8_t range)41 int32_t RangeCheckValue(int32_t value, int8_t range) {
42 #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
43 LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
44 assert(range <= 32);
45 const int32_t min = -(1 << (range - 1));
46 const int32_t max = (1 << (range - 1)) - 1;
47 if (min > value || value > max) {
48 LIBGAV1_DLOG(ERROR, "coeff out of bit range, value: %d bit range %d\n",
49 value, range);
50 assert(min <= value && value <= max);
51 }
52 #endif // LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
53 static_cast<void>(range);
54 return value;
55 }
56
57 template <typename Residual>
ButterflyRotation_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)58 LIBGAV1_ALWAYS_INLINE void ButterflyRotation_C(Residual* const dst, int a,
59 int b, int angle, bool flip,
60 int8_t range) {
61 // Note that we multiply in 32 bits and then add/subtract the products in 64
62 // bits. The 32-bit multiplications do not overflow. Please see the comment
63 // and assert() in Cos128().
64 const int64_t x = static_cast<int64_t>(dst[a] * Cos128(angle)) -
65 static_cast<int64_t>(dst[b] * Sin128(angle));
66 const int64_t y = static_cast<int64_t>(dst[a] * Sin128(angle)) +
67 static_cast<int64_t>(dst[b] * Cos128(angle));
68 // Section 7.13.2.1: It is a requirement of bitstream conformance that the
69 // values saved into the array T by this function are representable by a
70 // signed integer using |range| bits of precision.
71 dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
72 dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
73 }
74
75 template <typename Residual>
ButterflyRotationFirstIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)76 void ButterflyRotationFirstIsZero_C(Residual* const dst, int a, int b,
77 int angle, bool flip, int8_t range) {
78 // Note that we multiply in 32 bits and then add/subtract the products in 64
79 // bits. The 32-bit multiplications do not overflow. Please see the comment
80 // and assert() in Cos128().
81 const auto x = static_cast<int64_t>(dst[b] * -Sin128(angle));
82 const auto y = static_cast<int64_t>(dst[b] * Cos128(angle));
83 // Section 7.13.2.1: It is a requirement of bitstream conformance that the
84 // values saved into the array T by this function are representable by a
85 // signed integer using |range| bits of precision.
86 dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
87 dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
88 }
89
90 template <typename Residual>
ButterflyRotationSecondIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)91 void ButterflyRotationSecondIsZero_C(Residual* const dst, int a, int b,
92 int angle, bool flip, int8_t range) {
93 // Note that we multiply in 32 bits and then add/subtract the products in 64
94 // bits. The 32-bit multiplications do not overflow. Please see the comment
95 // and assert() in Cos128().
96 const auto x = static_cast<int64_t>(dst[a] * Cos128(angle));
97 const auto y = static_cast<int64_t>(dst[a] * Sin128(angle));
98
99 // Section 7.13.2.1: It is a requirement of bitstream conformance that the
100 // values saved into the array T by this function are representable by a
101 // signed integer using |range| bits of precision.
102 dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
103 dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
104 }
105
106 template <typename Residual>
HadamardRotation_C(Residual * const dst,int a,int b,bool flip,int8_t range)107 void HadamardRotation_C(Residual* const dst, int a, int b, bool flip,
108 int8_t range) {
109 if (flip) std::swap(a, b);
110 --range;
111 // For Adst and Dct, the maximum possible value for range is 20. So min and
112 // max should always fit into int32_t.
113 const int32_t min = -(1 << range);
114 const int32_t max = (1 << range) - 1;
115 const int32_t x = dst[a] + dst[b];
116 const int32_t y = dst[a] - dst[b];
117 dst[a] = Clip3(x, min, max);
118 dst[b] = Clip3(y, min, max);
119 }
120
121 template <int bitdepth, typename Residual>
ClampIntermediate(Residual * const dst,int size)122 void ClampIntermediate(Residual* const dst, int size) {
123 // If Residual is int16_t (which implies bitdepth is 8), we don't need to
124 // clip residual[i][j] to 16 bits.
125 if (sizeof(Residual) > 2) {
126 const Residual intermediate_clamp_max =
127 (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
128 const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
129 for (int j = 0; j < size; ++j) {
130 dst[j] = Clip3(dst[j], intermediate_clamp_min, intermediate_clamp_max);
131 }
132 }
133 }
134
135 //------------------------------------------------------------------------------
136 // Discrete Cosine Transforms (DCT).
137
138 // Value for index (i, j) is computed as bitreverse(j) and interpreting that as
139 // an integer with bit-length i + 2.
140 // For e.g. index (2, 3) will be computed as follows:
141 // * bitreverse(3) = bitreverse(..000011) = 110000...
142 // * interpreting that as an integer with bit-length 2+2 = 4 will be 1100 = 12
143 constexpr uint8_t kBitReverseLookup[kNum1DTransformSizes][64] = {
144 {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,
145 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3,
146 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3},
147 {0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5,
148 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6,
149 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7},
150 {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
151 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
152 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
153 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15},
154 {0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
155 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
156 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
157 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31},
158 {0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
159 2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
160 1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
161 3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63}};
162
163 template <typename Residual, int size_log2>
Dct_C(void * dest,int8_t range)164 void Dct_C(void* dest, int8_t range) {
165 static_assert(size_log2 >= 2 && size_log2 <= 6, "");
166 auto* const dst = static_cast<Residual*>(dest);
167 // stage 1.
168 const int size = 1 << size_log2;
169 Residual temp[size];
170 memcpy(temp, dst, sizeof(temp));
171 for (int i = 0; i < size; ++i) {
172 dst[i] = temp[kBitReverseLookup[size_log2 - 2][i]];
173 }
174 // stages 2-32 are dependent on the value of size_log2.
175 // stage 2.
176 if (size_log2 == 6) {
177 for (int i = 0; i < 16; ++i) {
178 ButterflyRotation_C(dst, i + 32, 63 - i,
179 63 - MultiplyBy4(kBitReverseLookup[2][i]), false,
180 range);
181 }
182 }
183 // stage 3
184 if (size_log2 >= 5) {
185 for (int i = 0; i < 8; ++i) {
186 ButterflyRotation_C(dst, i + 16, 31 - i,
187 6 + MultiplyBy8(kBitReverseLookup[1][7 - i]), false,
188 range);
189 }
190 }
191 // stage 4.
192 if (size_log2 == 6) {
193 for (int i = 0; i < 16; ++i) {
194 HadamardRotation_C(dst, MultiplyBy2(i) + 32, MultiplyBy2(i) + 33,
195 static_cast<bool>(i & 1), range);
196 }
197 }
198 // stage 5.
199 if (size_log2 >= 4) {
200 for (int i = 0; i < 4; ++i) {
201 ButterflyRotation_C(dst, i + 8, 15 - i,
202 12 + MultiplyBy16(kBitReverseLookup[0][3 - i]), false,
203 range);
204 }
205 }
206 // stage 6.
207 if (size_log2 >= 5) {
208 for (int i = 0; i < 8; ++i) {
209 HadamardRotation_C(dst, MultiplyBy2(i) + 16, MultiplyBy2(i) + 17,
210 static_cast<bool>(i & 1), range);
211 }
212 }
213 // stage 7.
214 if (size_log2 == 6) {
215 for (int i = 0; i < 4; ++i) {
216 for (int j = 0; j < 2; ++j) {
217 ButterflyRotation_C(
218 dst, 62 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 33,
219 60 - MultiplyBy16(kBitReverseLookup[0][i]) + MultiplyBy64(j), true,
220 range);
221 }
222 }
223 }
224 // stage 8.
225 if (size_log2 >= 3) {
226 for (int i = 0; i < 2; ++i) {
227 ButterflyRotation_C(dst, i + 4, 7 - i, 56 - 32 * i, false, range);
228 }
229 }
230 // stage 9.
231 if (size_log2 >= 4) {
232 for (int i = 0; i < 4; ++i) {
233 HadamardRotation_C(dst, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
234 static_cast<bool>(i & 1), range);
235 }
236 }
237 // stage 10.
238 if (size_log2 >= 5) {
239 for (int i = 0; i < 2; ++i) {
240 for (int j = 0; j < 2; ++j) {
241 ButterflyRotation_C(
242 dst, 30 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 17,
243 24 + MultiplyBy64(j) + MultiplyBy32(1 - i), true, range);
244 }
245 }
246 }
247 // stage 11.
248 if (size_log2 == 6) {
249 for (int i = 0; i < 8; ++i) {
250 for (int j = 0; j < 2; ++j) {
251 HadamardRotation_C(dst, MultiplyBy4(i) + j + 32,
252 MultiplyBy4(i) - j + 35, static_cast<bool>(i & 1),
253 range);
254 }
255 }
256 }
257 // stage 12.
258 for (int i = 0; i < 2; ++i) {
259 ButterflyRotation_C(dst, MultiplyBy2(i), MultiplyBy2(i) + 1, 32 + 16 * i,
260 i == 0, range);
261 }
262 // stage 13.
263 if (size_log2 >= 3) {
264 for (int i = 0; i < 2; ++i) {
265 HadamardRotation_C(dst, MultiplyBy2(i) + 4, MultiplyBy2(i) + 5,
266 /*flip=*/i != 0, range);
267 }
268 }
269 // stage 14.
270 if (size_log2 >= 4) {
271 for (int i = 0; i < 2; ++i) {
272 ButterflyRotation_C(dst, 14 - i, i + 9, 48 + 64 * i, true, range);
273 }
274 }
275 // stage 15.
276 if (size_log2 >= 5) {
277 for (int i = 0; i < 4; ++i) {
278 for (int j = 0; j < 2; ++j) {
279 HadamardRotation_C(dst, MultiplyBy4(i) + j + 16,
280 MultiplyBy4(i) - j + 19, static_cast<bool>(i & 1),
281 range);
282 }
283 }
284 }
285 // stage 16.
286 if (size_log2 == 6) {
287 for (int i = 0; i < 2; ++i) {
288 for (int j = 0; j < 4; ++j) {
289 ButterflyRotation_C(
290 dst, 61 - MultiplyBy8(i) - j, MultiplyBy8(i) + j + 34,
291 56 - MultiplyBy32(i) + MultiplyBy64(DivideBy2(j)), true, range);
292 }
293 }
294 }
295 // stage 17.
296 for (int i = 0; i < 2; ++i) {
297 HadamardRotation_C(dst, i, 3 - i, false, range);
298 }
299 // stage 18.
300 if (size_log2 >= 3) {
301 ButterflyRotation_C(dst, 6, 5, 32, true, range);
302 }
303 // stage 19.
304 if (size_log2 >= 4) {
305 for (int i = 0; i < 2; ++i) {
306 for (int j = 0; j < 2; ++j) {
307 HadamardRotation_C(dst, MultiplyBy4(i) + j + 8, MultiplyBy4(i) - j + 11,
308 /*flip=*/i != 0, range);
309 }
310 }
311 }
312 // stage 20.
313 if (size_log2 >= 5) {
314 for (int i = 0; i < 4; ++i) {
315 ButterflyRotation_C(dst, 29 - i, i + 18, 48 + 64 * DivideBy2(i), true,
316 range);
317 }
318 }
319 // stage 21.
320 if (size_log2 == 6) {
321 for (int i = 0; i < 4; ++i) {
322 for (int j = 0; j < 4; ++j) {
323 HadamardRotation_C(dst, MultiplyBy8(i) + j + 32,
324 MultiplyBy8(i) - j + 39, static_cast<bool>(i & 1),
325 range);
326 }
327 }
328 }
329 // stage 22.
330 if (size_log2 >= 3) {
331 for (int i = 0; i < 4; ++i) {
332 HadamardRotation_C(dst, i, 7 - i, false, range);
333 }
334 }
335 // stage 23.
336 if (size_log2 >= 4) {
337 for (int i = 0; i < 2; ++i) {
338 ButterflyRotation_C(dst, 13 - i, i + 10, 32, true, range);
339 }
340 }
341 // stage 24.
342 if (size_log2 >= 5) {
343 for (int i = 0; i < 2; ++i) {
344 for (int j = 0; j < 4; ++j) {
345 HadamardRotation_C(dst, MultiplyBy8(i) + j + 16,
346 MultiplyBy8(i) - j + 23, i == 1, range);
347 }
348 }
349 }
350 // stage 25.
351 if (size_log2 == 6) {
352 for (int i = 0; i < 8; ++i) {
353 ButterflyRotation_C(dst, 59 - i, i + 36, (i < 4) ? 48 : 112, true, range);
354 }
355 }
356 // stage 26.
357 if (size_log2 >= 4) {
358 for (int i = 0; i < 8; ++i) {
359 HadamardRotation_C(dst, i, 15 - i, false, range);
360 }
361 }
362 // stage 27.
363 if (size_log2 >= 5) {
364 for (int i = 0; i < 4; ++i) {
365 ButterflyRotation_C(dst, 27 - i, i + 20, 32, true, range);
366 }
367 }
368 // stage 28.
369 if (size_log2 == 6) {
370 for (int i = 0; i < 8; ++i) {
371 HadamardRotation_C(dst, i + 32, 47 - i, false, range);
372 HadamardRotation_C(dst, i + 48, 63 - i, true, range);
373 }
374 }
375 // stage 29.
376 if (size_log2 >= 5) {
377 for (int i = 0; i < 16; ++i) {
378 HadamardRotation_C(dst, i, 31 - i, false, range);
379 }
380 }
381 // stage 30.
382 if (size_log2 == 6) {
383 for (int i = 0; i < 8; ++i) {
384 ButterflyRotation_C(dst, 55 - i, i + 40, 32, true, range);
385 }
386 }
387 // stage 31.
388 if (size_log2 == 6) {
389 for (int i = 0; i < 32; ++i) {
390 HadamardRotation_C(dst, i, 63 - i, false, range);
391 }
392 }
393 }
394
395 template <int bitdepth, typename Residual, int size_log2>
DctDcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)396 void DctDcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
397 bool is_row) {
398 auto* const dst = static_cast<Residual*>(dest);
399
400 if (is_row && should_round) {
401 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
402 }
403
404 ButterflyRotationSecondIsZero_C(dst, 0, 1, 32, true, range);
405
406 if (is_row && row_shift > 0) {
407 dst[0] = RightShiftWithRounding(dst[0], row_shift);
408 }
409
410 ClampIntermediate<bitdepth, Residual>(dst, 1);
411
412 const int size = 1 << size_log2;
413 for (int i = 1; i < size; ++i) {
414 dst[i] = dst[0];
415 }
416 }
417
418 //------------------------------------------------------------------------------
419 // Asymmetric Discrete Sine Transforms (ADST).
420
421 /*
422 * Row transform max range in bits for bitdepths 8/10/12: 28/30/32.
423 * Column transform max range in bits for bitdepths 8/10/12: 28/28/30.
424 */
425 template <typename Residual>
Adst4_C(void * dest,int8_t range)426 void Adst4_C(void* dest, int8_t range) {
427 auto* const dst = static_cast<Residual*>(dest);
428 if ((dst[0] | dst[1] | dst[2] | dst[3]) == 0) {
429 return;
430 }
431
432 // stage 1.
433 // Section 7.13.2.6: It is a requirement of bitstream conformance that all
434 // values stored in the s and x arrays by this process are representable by
435 // a signed integer using range + 12 bits of precision.
436 int32_t s[7];
437 s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
438 s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
439 s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[1], range + 12);
440 s[3] = RangeCheckValue(kAdst4Multiplier[3] * dst[2], range + 12);
441 s[4] = RangeCheckValue(kAdst4Multiplier[0] * dst[2], range + 12);
442 s[5] = RangeCheckValue(kAdst4Multiplier[1] * dst[3], range + 12);
443 s[6] = RangeCheckValue(kAdst4Multiplier[3] * dst[3], range + 12);
444 // stage 2.
445 // Section 7.13.2.6: It is a requirement of bitstream conformance that
446 // values stored in the variable a7 by this process are representable by a
447 // signed integer using range + 1 bits of precision.
448 const int32_t a7 = RangeCheckValue(dst[0] - dst[2], range + 1);
449 // Section 7.13.2.6: It is a requirement of bitstream conformance that
450 // values stored in the variable b7 by this process are representable by a
451 // signed integer using |range| bits of precision.
452 const int32_t b7 = RangeCheckValue(a7 + dst[3], range);
453 // stage 3.
454 s[0] = RangeCheckValue(s[0] + s[3], range + 12);
455 s[1] = RangeCheckValue(s[1] - s[4], range + 12);
456 s[3] = s[2];
457 s[2] = RangeCheckValue(kAdst4Multiplier[2] * b7, range + 12);
458 // stage 4.
459 s[0] = RangeCheckValue(s[0] + s[5], range + 12);
460 s[1] = RangeCheckValue(s[1] - s[6], range + 12);
461 // stages 5 and 6.
462 const int32_t x0 = RangeCheckValue(s[0] + s[3], range + 12);
463 const int32_t x1 = RangeCheckValue(s[1] + s[3], range + 12);
464 int32_t x3 = RangeCheckValue(s[0] + s[1], range + 12);
465 x3 = RangeCheckValue(x3 - s[3], range + 12);
466 int32_t dst_0 = RightShiftWithRounding(x0, 12);
467 int32_t dst_1 = RightShiftWithRounding(x1, 12);
468 int32_t dst_2 = RightShiftWithRounding(s[2], 12);
469 int32_t dst_3 = RightShiftWithRounding(x3, 12);
470 if (sizeof(Residual) == 2) {
471 // If the first argument to RightShiftWithRounding(..., 12) is only
472 // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
473 // in RightShiftWithRounding(..., 12) will cause the function to return
474 // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
475 dst_0 -= (dst_0 == 0x8000);
476 dst_1 -= (dst_1 == 0x8000);
477 dst_3 -= (dst_3 == 0x8000);
478 }
479 dst[0] = dst_0;
480 dst[1] = dst_1;
481 dst[2] = dst_2;
482 dst[3] = dst_3;
483 }
484
485 template <int bitdepth, typename Residual>
Adst4DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)486 void Adst4DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
487 bool is_row) {
488 auto* const dst = static_cast<Residual*>(dest);
489
490 if (is_row && should_round) {
491 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
492 }
493
494 // stage 1.
495 // Section 7.13.2.6: It is a requirement of bitstream conformance that all
496 // values stored in the s and x arrays by this process are representable by
497 // a signed integer using range + 12 bits of precision.
498 int32_t s[3];
499 s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
500 s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
501 s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[0], range + 12);
502 // stage 3.
503 // stage 4.
504 // stages 5 and 6.
505 int32_t dst_0 = RightShiftWithRounding(s[0], 12);
506 int32_t dst_1 = RightShiftWithRounding(s[1], 12);
507 int32_t dst_2 = RightShiftWithRounding(s[2], 12);
508 int32_t dst_3 =
509 RightShiftWithRounding(RangeCheckValue(s[0] + s[1], range + 12), 12);
510 if (sizeof(Residual) == 2) {
511 // If the first argument to RightShiftWithRounding(..., 12) is only
512 // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
513 // in RightShiftWithRounding(..., 12) will cause the function to return
514 // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
515 dst_0 -= (dst_0 == 0x8000);
516 dst_1 -= (dst_1 == 0x8000);
517 dst_3 -= (dst_3 == 0x8000);
518 }
519 dst[0] = dst_0;
520 dst[1] = dst_1;
521 dst[2] = dst_2;
522 dst[3] = dst_3;
523
524 const int size = 4;
525 if (is_row && row_shift > 0) {
526 for (int j = 0; j < size; ++j) {
527 dst[j] = RightShiftWithRounding(dst[j], row_shift);
528 }
529 }
530
531 ClampIntermediate<bitdepth, Residual>(dst, 4);
532 }
533
534 template <typename Residual>
AdstInputPermutation(int32_t * const dst,const Residual * const src,int n)535 void AdstInputPermutation(int32_t* const dst, const Residual* const src,
536 int n) {
537 assert(n == 8 || n == 16);
538 for (int i = 0; i < n; ++i) {
539 dst[i] = src[((i & 1) == 0) ? n - i - 1 : i - 1];
540 }
541 }
542
543 constexpr int8_t kAdstOutputPermutationLookup[16] = {
544 0, 8, 12, 4, 6, 14, 10, 2, 3, 11, 15, 7, 5, 13, 9, 1};
545
546 template <typename Residual>
AdstOutputPermutation(Residual * const dst,const int32_t * const src,int n)547 void AdstOutputPermutation(Residual* const dst, const int32_t* const src,
548 int n) {
549 assert(n == 8 || n == 16);
550 const auto shift = static_cast<int8_t>(n == 8);
551 for (int i = 0; i < n; ++i) {
552 const int8_t index = kAdstOutputPermutationLookup[i] >> shift;
553 int32_t dst_i = ((i & 1) == 0) ? src[index] : -src[index];
554 if (sizeof(Residual) == 2) {
555 // If i is odd and src[index] is -32768, dst_i will be 32768, which
556 // cannot be represented as an int16_t.
557 dst_i -= (dst_i == 0x8000);
558 }
559 dst[i] = dst_i;
560 }
561 }
562
563 template <typename Residual>
Adst8_C(void * dest,int8_t range)564 void Adst8_C(void* dest, int8_t range) {
565 auto* const dst = static_cast<Residual*>(dest);
566 // stage 1.
567 int32_t temp[8];
568 AdstInputPermutation(temp, dst, 8);
569 // stage 2.
570 for (int i = 0; i < 4; ++i) {
571 ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 60 - 16 * i,
572 true, range);
573 }
574 // stage 3.
575 for (int i = 0; i < 4; ++i) {
576 HadamardRotation_C(temp, i, i + 4, false, range);
577 }
578 // stage 4.
579 for (int i = 0; i < 2; ++i) {
580 ButterflyRotation_C(temp, i * 3 + 4, i + 5, 48 - 32 * i, true, range);
581 }
582 // stage 5.
583 for (int i = 0; i < 2; ++i) {
584 for (int j = 0; j < 2; ++j) {
585 HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
586 false, range);
587 }
588 }
589 // stage 6.
590 for (int i = 0; i < 2; ++i) {
591 ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
592 range);
593 }
594 // stage 7.
595 AdstOutputPermutation(dst, temp, 8);
596 }
597
598 template <int bitdepth, typename Residual>
Adst8DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)599 void Adst8DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
600 bool is_row) {
601 auto* const dst = static_cast<Residual*>(dest);
602
603 // stage 1.
604 int32_t temp[8];
605 // After the permutation, the dc value is in temp[1]. The remaining are zero.
606 AdstInputPermutation(temp, dst, 8);
607
608 if (is_row && should_round) {
609 temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
610 }
611
612 // stage 2.
613 ButterflyRotationFirstIsZero_C(temp, 0, 1, 60, true, range);
614
615 // stage 3.
616 temp[4] = temp[0];
617 temp[5] = temp[1];
618
619 // stage 4.
620 ButterflyRotation_C(temp, 4, 5, 48, true, range);
621
622 // stage 5.
623 temp[2] = temp[0];
624 temp[3] = temp[1];
625 temp[6] = temp[4];
626 temp[7] = temp[5];
627
628 // stage 6.
629 ButterflyRotation_C(temp, 2, 3, 32, true, range);
630 ButterflyRotation_C(temp, 6, 7, 32, true, range);
631
632 // stage 7.
633 AdstOutputPermutation(dst, temp, 8);
634
635 const int size = 8;
636 if (is_row && row_shift > 0) {
637 for (int j = 0; j < size; ++j) {
638 dst[j] = RightShiftWithRounding(dst[j], row_shift);
639 }
640 }
641
642 ClampIntermediate<bitdepth, Residual>(dst, 8);
643 }
644
645 template <typename Residual>
Adst16_C(void * dest,int8_t range)646 void Adst16_C(void* dest, int8_t range) {
647 auto* const dst = static_cast<Residual*>(dest);
648 // stage 1.
649 int32_t temp[16];
650 AdstInputPermutation(temp, dst, 16);
651 // stage 2.
652 for (int i = 0; i < 8; ++i) {
653 ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 62 - 8 * i,
654 true, range);
655 }
656 // stage 3.
657 for (int i = 0; i < 8; ++i) {
658 HadamardRotation_C(temp, i, i + 8, false, range);
659 }
660 // stage 4.
661 for (int i = 0; i < 2; ++i) {
662 ButterflyRotation_C(temp, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
663 56 - 32 * i, true, range);
664 ButterflyRotation_C(temp, MultiplyBy2(i) + 13, MultiplyBy2(i) + 12,
665 8 + 32 * i, true, range);
666 }
667 // stage 5.
668 for (int i = 0; i < 4; ++i) {
669 for (int j = 0; j < 2; ++j) {
670 HadamardRotation_C(temp, i + MultiplyBy8(j), i + MultiplyBy8(j) + 4,
671 false, range);
672 }
673 }
674 // stage 6.
675 for (int i = 0; i < 2; ++i) {
676 for (int j = 0; j < 2; ++j) {
677 ButterflyRotation_C(temp, i * 3 + MultiplyBy8(j) + 4,
678 i + MultiplyBy8(j) + 5, 48 - 32 * i, true, range);
679 }
680 }
681 // stage 7.
682 for (int i = 0; i < 2; ++i) {
683 for (int j = 0; j < 4; ++j) {
684 HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
685 false, range);
686 }
687 }
688 // stage 8.
689 for (int i = 0; i < 4; ++i) {
690 ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
691 range);
692 }
693 // stage 9.
694 AdstOutputPermutation(dst, temp, 16);
695 }
696
697 template <int bitdepth, typename Residual>
Adst16DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)698 void Adst16DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
699 bool is_row) {
700 auto* const dst = static_cast<Residual*>(dest);
701
702 // stage 1.
703 int32_t temp[16];
704 // After the permutation, the dc value is in temp[1]. The remaining are zero.
705 AdstInputPermutation(temp, dst, 16);
706
707 if (is_row && should_round) {
708 temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
709 }
710
711 // stage 2.
712 ButterflyRotationFirstIsZero_C(temp, 0, 1, 62, true, range);
713
714 // stage 3.
715 temp[8] = temp[0];
716 temp[9] = temp[1];
717
718 // stage 4.
719 ButterflyRotation_C(temp, 8, 9, 56, true, range);
720
721 // stage 5.
722 temp[4] = temp[0];
723 temp[5] = temp[1];
724 temp[12] = temp[8];
725 temp[13] = temp[9];
726
727 // stage 6.
728 ButterflyRotation_C(temp, 4, 5, 48, true, range);
729 ButterflyRotation_C(temp, 12, 13, 48, true, range);
730
731 // stage 7.
732 temp[2] = temp[0];
733 temp[3] = temp[1];
734 temp[10] = temp[8];
735 temp[11] = temp[9];
736
737 temp[6] = temp[4];
738 temp[7] = temp[5];
739 temp[14] = temp[12];
740 temp[15] = temp[13];
741
742 // stage 8.
743 for (int i = 0; i < 4; ++i) {
744 ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
745 range);
746 }
747
748 // stage 9.
749 AdstOutputPermutation(dst, temp, 16);
750
751 const int size = 16;
752 if (is_row && row_shift > 0) {
753 for (int j = 0; j < size; ++j) {
754 dst[j] = RightShiftWithRounding(dst[j], row_shift);
755 }
756 }
757
758 ClampIntermediate<bitdepth, Residual>(dst, 16);
759 }
760
761 //------------------------------------------------------------------------------
762 // Identity Transforms.
763 //
764 // In the spec, the inverse identity transform is followed by a Round2() call:
765 // The row transforms with i = 0..(h-1) are applied as follows:
766 // ...
767 // * Otherwise, invoke the inverse identity transform process specified in
768 // section 7.13.2.15 with the input variable n equal to log2W.
769 // * Set Residual[ i ][ j ] equal to Round2( T[ j ], rowShift )
770 // for j = 0..(w-1).
771 // ...
772 // The column transforms with j = 0..(w-1) are applied as follows:
773 // ...
774 // * Otherwise, invoke the inverse identity transform process specified in
775 // section 7.13.2.15 with the input variable n equal to log2H.
776 // * Residual[ i ][ j ] is set equal to Round2( T[ i ], colShift )
777 // for i = 0..(h-1).
778 //
779 // Therefore, we define the identity transform functions to perform both the
780 // inverse identity transform and the Round2() call. This has two advantages:
781 // 1. The outputs of the inverse identity transform do not need to be stored
782 // in the Residual array. They can be stored in int32_t local variables,
783 // which have a larger range if Residual is an int16_t array.
784 // 2. The inverse identity transform and the Round2() call can be jointly
785 // optimized.
786 //
787 // The identity transform functions have the following prototype:
788 // void Identity_C(void* dest, int8_t shift);
789 //
790 // The |shift| parameter is the amount of shift for the Round2() call. For row
791 // transforms, |shift| is 0, 1, or 2. For column transforms, |shift| is always
792 // 4. Therefore, an identity transform function can detect whether it is being
793 // invoked as a row transform or a column transform by checking whether |shift|
794 // is equal to 4.
795 //
796 // Input Range
797 //
798 // The inputs of row transforms, stored in the 2D array Dequant, are
799 // representable by a signed integer using 8 + BitDepth bits of precision:
800 // f. Dequant[ i ][ j ] is set equal to
801 // Clip3( - ( 1 << ( 7 + BitDepth ) ), ( 1 << ( 7 + BitDepth ) ) - 1, dq2 ).
802 //
803 // The inputs of column transforms are representable by a signed integer using
804 // Max( BitDepth + 6, 16 ) bits of precision:
805 // Set the variable colClampRange equal to Max( BitDepth + 6, 16 ).
806 // ...
807 // Between the row and column transforms, Residual[ i ][ j ] is set equal to
808 // Clip3( - ( 1 << ( colClampRange - 1 ) ),
809 // ( 1 << (colClampRange - 1 ) ) - 1,
810 // Residual[ i ][ j ] )
811 // for i = 0..(h-1), for j = 0..(w-1).
812 //
813 // Output Range
814 //
815 // The outputs of row transforms are representable by a signed integer using
816 // 8 + BitDepth + 1 = 9 + BitDepth bits of precision, because the net effect
817 // of the multiplicative factor of inverse identity transforms minus the
818 // smallest row shift is an increase of at most one bit.
819 //
820 // Transform | Multiplicative factor | Smallest row | Net increase
821 // width | (in bits) | shift | in bits
822 // ---------------------------------------------------------------
823 // 4 | sqrt(2) (0.5 bits) | 0 | +0.5
824 // 8 | 2 (1 bit) | 0 | +1
825 // 16 | 2*sqrt(2) (1.5 bits) | 1 | +0.5
826 // 32 | 4 (2 bits) | 1 | +1
827 //
828 // If BitDepth is 8 and Residual is an int16_t array, to avoid truncation we
829 // clip the outputs (which have 17 bits of precision) to the range of int16_t
830 // before storing them in the Residual array. This clipping happens to be the
831 // same as the required clipping after the row transform (see the spec quoted
832 // above), so we remain compliant with the spec. (In this case,
833 // TransformLoop_C() skips clipping the outputs of row transforms to avoid
834 // duplication of effort.)
835 //
836 // The outputs of column transforms are representable by a signed integer using
837 // Max( BitDepth + 6, 16 ) + 2 - 4 = Max( BitDepth + 4, 14 ) bits of precision,
838 // because the multiplicative factor of inverse identity transforms is at most
839 // 4 (2 bits) and |shift| is always 4.
840
841 template <typename Residual>
Identity4Row_C(void * dest,int8_t shift)842 void Identity4Row_C(void* dest, int8_t shift) {
843 assert(shift == 0 || shift == 1);
844 auto* const dst = static_cast<Residual*>(dest);
845 // If |shift| is 0, |rounding| should be 1 << 11. If |shift| is 1, |rounding|
846 // should be (1 + (1 << 1)) << 11. The following expression works for both
847 // values of |shift|.
848 const int32_t rounding = (1 + (shift << 1)) << 11;
849 for (int i = 0; i < 4; ++i) {
850 // The intermediate value here will have to fit into an int32_t for it to be
851 // bitstream conformant. The multiplication is promoted to int32_t by
852 // defining kIdentity4Multiplier as int32_t.
853 int32_t dst_i = (dst[i] * kIdentity4Multiplier + rounding) >> (12 + shift);
854 if (sizeof(Residual) == 2) {
855 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
856 }
857 dst[i] = static_cast<Residual>(dst_i);
858 }
859 }
860
861 template <typename Residual>
Identity4Column_C(void * dest,int8_t)862 void Identity4Column_C(void* dest, int8_t /*shift*/) {
863 auto* const dst = static_cast<Residual*>(dest);
864 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
865 for (int i = 0; i < 4; ++i) {
866 // The intermediate value here will have to fit into an int32_t for it to be
867 // bitstream conformant. The multiplication is promoted to int32_t by
868 // defining kIdentity4Multiplier as int32_t.
869 dst[i] = static_cast<Residual>((dst[i] * kIdentity4Multiplier + rounding) >>
870 (12 + kTransformColumnShift));
871 }
872 }
873
874 template <int bitdepth, typename Residual>
Identity4DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)875 void Identity4DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
876 int row_shift, bool is_row) {
877 auto* const dst = static_cast<Residual*>(dest);
878
879 if (is_row) {
880 if (should_round) {
881 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
882 }
883
884 const int32_t rounding = (1 + (row_shift << 1)) << 11;
885 int32_t dst_i =
886 (dst[0] * kIdentity4Multiplier + rounding) >> (12 + row_shift);
887 if (sizeof(Residual) == 2) {
888 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
889 }
890 dst[0] = static_cast<Residual>(dst_i);
891
892 ClampIntermediate<bitdepth, Residual>(dst, 1);
893 return;
894 }
895
896 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
897 dst[0] = static_cast<Residual>((dst[0] * kIdentity4Multiplier + rounding) >>
898 (12 + kTransformColumnShift));
899 }
900
901 template <typename Residual>
Identity8Row_C(void * dest,int8_t shift)902 void Identity8Row_C(void* dest, int8_t shift) {
903 assert(shift == 0 || shift == 1 || shift == 2);
904 auto* const dst = static_cast<Residual*>(dest);
905 for (int i = 0; i < 8; ++i) {
906 int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[i]), shift);
907 if (sizeof(Residual) == 2) {
908 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
909 }
910 dst[i] = static_cast<Residual>(dst_i);
911 }
912 }
913
914 template <typename Residual>
Identity8Column_C(void * dest,int8_t)915 void Identity8Column_C(void* dest, int8_t /*shift*/) {
916 auto* const dst = static_cast<Residual*>(dest);
917 for (int i = 0; i < 8; ++i) {
918 dst[i] = static_cast<Residual>(
919 RightShiftWithRounding(dst[i], kTransformColumnShift - 1));
920 }
921 }
922
923 template <int bitdepth, typename Residual>
Identity8DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)924 void Identity8DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
925 int row_shift, bool is_row) {
926 auto* const dst = static_cast<Residual*>(dest);
927
928 if (is_row) {
929 if (should_round) {
930 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
931 }
932
933 int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[0]), row_shift);
934 if (sizeof(Residual) == 2) {
935 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
936 }
937 dst[0] = static_cast<Residual>(dst_i);
938
939 // If Residual is int16_t (which implies bitdepth is 8), we don't need to
940 // clip residual[i][j] to 16 bits.
941 if (sizeof(Residual) > 2) {
942 const Residual intermediate_clamp_max =
943 (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
944 const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
945 dst[0] = Clip3(dst[0], intermediate_clamp_min, intermediate_clamp_max);
946 }
947 return;
948 }
949
950 dst[0] = static_cast<Residual>(
951 RightShiftWithRounding(dst[0], kTransformColumnShift - 1));
952 }
953
954 template <typename Residual>
Identity16Row_C(void * dest,int8_t shift)955 void Identity16Row_C(void* dest, int8_t shift) {
956 assert(shift == 1 || shift == 2);
957 auto* const dst = static_cast<Residual*>(dest);
958 const int32_t rounding = (1 + (1 << shift)) << 11;
959 for (int i = 0; i < 16; ++i) {
960 // The intermediate value here will have to fit into an int32_t for it to be
961 // bitstream conformant. The multiplication is promoted to int32_t by
962 // defining kIdentity16Multiplier as int32_t.
963 int32_t dst_i = (dst[i] * kIdentity16Multiplier + rounding) >> (12 + shift);
964 if (sizeof(Residual) == 2) {
965 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
966 }
967 dst[i] = static_cast<Residual>(dst_i);
968 }
969 }
970
971 template <typename Residual>
Identity16Column_C(void * dest,int8_t)972 void Identity16Column_C(void* dest, int8_t /*shift*/) {
973 auto* const dst = static_cast<Residual*>(dest);
974 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
975 for (int i = 0; i < 16; ++i) {
976 // The intermediate value here will have to fit into an int32_t for it to be
977 // bitstream conformant. The multiplication is promoted to int32_t by
978 // defining kIdentity16Multiplier as int32_t.
979 dst[i] =
980 static_cast<Residual>((dst[i] * kIdentity16Multiplier + rounding) >>
981 (12 + kTransformColumnShift));
982 }
983 }
984
985 template <int bitdepth, typename Residual>
Identity16DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)986 void Identity16DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
987 int row_shift, bool is_row) {
988 auto* const dst = static_cast<Residual*>(dest);
989
990 if (is_row) {
991 if (should_round) {
992 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
993 }
994
995 const int32_t rounding = (1 + (1 << row_shift)) << 11;
996 int32_t dst_i =
997 (dst[0] * kIdentity16Multiplier + rounding) >> (12 + row_shift);
998 if (sizeof(Residual) == 2) {
999 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1000 }
1001 dst[0] = static_cast<Residual>(dst_i);
1002
1003 ClampIntermediate<bitdepth, Residual>(dst, 1);
1004 return;
1005 }
1006
1007 const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1008 dst[0] = static_cast<Residual>((dst[0] * kIdentity16Multiplier + rounding) >>
1009 (12 + kTransformColumnShift));
1010 }
1011
1012 template <typename Residual>
Identity32Row_C(void * dest,int8_t shift)1013 void Identity32Row_C(void* dest, int8_t shift) {
1014 assert(shift == 1 || shift == 2);
1015 auto* const dst = static_cast<Residual*>(dest);
1016 for (int i = 0; i < 32; ++i) {
1017 int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[i]), shift);
1018 if (sizeof(Residual) == 2) {
1019 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1020 }
1021 dst[i] = static_cast<Residual>(dst_i);
1022 }
1023 }
1024
1025 template <typename Residual>
Identity32Column_C(void * dest,int8_t)1026 void Identity32Column_C(void* dest, int8_t /*shift*/) {
1027 auto* const dst = static_cast<Residual*>(dest);
1028 for (int i = 0; i < 32; ++i) {
1029 dst[i] = static_cast<Residual>(
1030 RightShiftWithRounding(dst[i], kTransformColumnShift - 2));
1031 }
1032 }
1033
1034 template <int bitdepth, typename Residual>
Identity32DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1035 void Identity32DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1036 int row_shift, bool is_row) {
1037 auto* const dst = static_cast<Residual*>(dest);
1038
1039 if (is_row) {
1040 if (should_round) {
1041 dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
1042 }
1043
1044 int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[0]), row_shift);
1045 if (sizeof(Residual) == 2) {
1046 dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1047 }
1048 dst[0] = static_cast<Residual>(dst_i);
1049
1050 ClampIntermediate<bitdepth, Residual>(dst, 1);
1051 return;
1052 }
1053
1054 dst[0] = static_cast<Residual>(
1055 RightShiftWithRounding(dst[0], kTransformColumnShift - 2));
1056 }
1057
1058 //------------------------------------------------------------------------------
1059 // Walsh Hadamard Transform.
1060
1061 template <typename Residual>
Wht4_C(void * dest,int8_t shift)1062 void Wht4_C(void* dest, int8_t shift) {
1063 auto* const dst = static_cast<Residual*>(dest);
1064 Residual temp[4];
1065 temp[0] = dst[0] >> shift;
1066 temp[2] = dst[1] >> shift;
1067 temp[3] = dst[2] >> shift;
1068 temp[1] = dst[3] >> shift;
1069 temp[0] += temp[2];
1070 temp[3] -= temp[1];
1071 // This signed right shift must be an arithmetic shift.
1072 Residual e = (temp[0] - temp[3]) >> 1;
1073 dst[1] = e - temp[1];
1074 dst[2] = e - temp[2];
1075 dst[0] = temp[0] - dst[1];
1076 dst[3] = temp[3] + dst[2];
1077 }
1078
1079 template <int bitdepth, typename Residual>
Wht4DcOnly_C(void * dest,int8_t range,bool,int,bool)1080 void Wht4DcOnly_C(void* dest, int8_t range, bool /*should_round*/,
1081 int /*row_shift*/, bool /*is_row*/) {
1082 auto* const dst = static_cast<Residual*>(dest);
1083 const int shift = range;
1084
1085 Residual temp = dst[0] >> shift;
1086 // This signed right shift must be an arithmetic shift.
1087 Residual e = temp >> 1;
1088 dst[0] = temp - e;
1089 dst[1] = e;
1090 dst[2] = e;
1091 dst[3] = e;
1092
1093 ClampIntermediate<bitdepth, Residual>(dst, 4);
1094 }
1095
1096 //------------------------------------------------------------------------------
1097 // row/column transform loop
1098
1099 using InverseTransform1DFunc = void (*)(void* dst, int8_t range);
1100 using InverseTransformDcOnlyFunc = void (*)(void* dest, int8_t range,
1101 bool should_round, int row_shift,
1102 bool is_row);
1103
1104 template <int bitdepth, typename Residual, typename Pixel,
1105 Transform1D transform1d_type,
1106 InverseTransformDcOnlyFunc dconly_transform1d,
1107 InverseTransform1DFunc transform1d_func, bool is_row>
TransformLoop_C(TransformType tx_type,TransformSize tx_size,int adjusted_tx_height,void * src_buffer,int start_x,int start_y,void * dst_frame)1108 void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
1109 int adjusted_tx_height, void* src_buffer, int start_x,
1110 int start_y, void* dst_frame) {
1111 constexpr bool lossless = transform1d_type == k1DTransformWht;
1112 constexpr bool is_identity = transform1d_type == k1DTransformIdentity;
1113 // The transform size of the WHT is always 4x4. Setting tx_width and
1114 // tx_height to the constant 4 for the WHT speeds the code up.
1115 assert(!lossless || tx_size == kTransformSize4x4);
1116 const int tx_width = lossless ? 4 : kTransformWidth[tx_size];
1117 const int tx_height = lossless ? 4 : kTransformHeight[tx_size];
1118 const int tx_width_log2 = kTransformWidthLog2[tx_size];
1119 const int tx_height_log2 = kTransformHeightLog2[tx_size];
1120 auto* frame = static_cast<Array2DView<Pixel>*>(dst_frame);
1121
1122 // Initially this points to the dequantized values. After the transforms are
1123 // applied, this buffer contains the residual.
1124 Array2DView<Residual> residual(tx_height, tx_width,
1125 static_cast<Residual*>(src_buffer));
1126
1127 if (is_row) {
1128 // Row transform.
1129 const uint8_t row_shift = lossless ? 0 : kTransformRowShift[tx_size];
1130 // This is the |range| parameter of the InverseTransform1DFunc. For lossy
1131 // transforms, this will be equal to the clamping range.
1132 const int8_t row_clamp_range = lossless ? 2 : (bitdepth + 8);
1133 // If the width:height ratio of the transform size is 2:1 or 1:2, multiply
1134 // the input to the row transform by 1 / sqrt(2), which is approximated by
1135 // the fraction 2896 / 2^12.
1136 const bool should_round = std::abs(tx_width_log2 - tx_height_log2) == 1;
1137
1138 if (adjusted_tx_height == 1) {
1139 dconly_transform1d(residual[0], row_clamp_range, should_round, row_shift,
1140 true);
1141 return;
1142 }
1143
1144 // Row transforms need to be done only up to 32 because the rest of the rows
1145 // are always all zero if |tx_height| is 64. Otherwise, only process the
1146 // rows that have a non zero coefficients.
1147 for (int i = 0; i < adjusted_tx_height; ++i) {
1148 // If lossless, the transform size is 4x4, so should_round is false.
1149 if (!lossless && should_round) {
1150 // The last 32 values of every row are always zero if the |tx_width| is
1151 // 64.
1152 for (int j = 0; j < std::min(tx_width, 32); ++j) {
1153 residual[i][j] = RightShiftWithRounding(
1154 residual[i][j] * kTransformRowMultiplier, 12);
1155 }
1156 }
1157 // For identity transform, |transform1d_func| also performs the
1158 // Round2(T[j], rowShift) call in the spec.
1159 transform1d_func(residual[i], is_identity ? row_shift : row_clamp_range);
1160 if (!lossless && !is_identity && row_shift > 0) {
1161 for (int j = 0; j < tx_width; ++j) {
1162 residual[i][j] = RightShiftWithRounding(residual[i][j], row_shift);
1163 }
1164 }
1165
1166 ClampIntermediate<bitdepth, Residual>(residual[i], tx_width);
1167 }
1168 return;
1169 }
1170
1171 assert(!is_row);
1172 constexpr uint8_t column_shift = lossless ? 0 : kTransformColumnShift;
1173 // This is the |range| parameter of the InverseTransform1DFunc. For lossy
1174 // transforms, this will be equal to the clamping range.
1175 const int8_t column_clamp_range = lossless ? 0 : std::max(bitdepth + 6, 16);
1176 const bool flip_rows = transform1d_type == k1DTransformAdst &&
1177 kTransformFlipRowsMask.Contains(tx_type);
1178 const bool flip_columns =
1179 !lossless && kTransformFlipColumnsMask.Contains(tx_type);
1180 const int min_value = 0;
1181 const int max_value = (1 << bitdepth) - 1;
1182 // Note: 64 is the maximum size of a 1D transform buffer (the largest
1183 // transform size is kTransformSize64x64).
1184 Residual tx_buffer[64];
1185 for (int j = 0; j < tx_width; ++j) {
1186 const int flipped_j = flip_columns ? tx_width - j - 1 : j;
1187 int i = 0;
1188 do {
1189 tx_buffer[i] = residual[i][flipped_j];
1190 } while (++i != tx_height);
1191 if (adjusted_tx_height == 1) {
1192 dconly_transform1d(tx_buffer, column_clamp_range, false, 0, false);
1193 } else {
1194 // For identity transform, |transform1d_func| also performs the
1195 // Round2(T[i], colShift) call in the spec.
1196 transform1d_func(tx_buffer,
1197 is_identity ? column_shift : column_clamp_range);
1198 }
1199 const int x = start_x + j;
1200 for (int i = 0; i < tx_height; ++i) {
1201 const int y = start_y + i;
1202 const int index = flip_rows ? tx_height - i - 1 : i;
1203 Residual residual_value = tx_buffer[index];
1204 if (!lossless && !is_identity) {
1205 residual_value = RightShiftWithRounding(residual_value, column_shift);
1206 }
1207 (*frame)[y][x] =
1208 Clip3((*frame)[y][x] + residual_value, min_value, max_value);
1209 }
1210 }
1211 }
1212
1213 //------------------------------------------------------------------------------
1214
1215 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1216 template <int bitdepth, typename Residual, typename Pixel>
InitAll(Dsp * const dsp)1217 void InitAll(Dsp* const dsp) {
1218 // Maximum transform size for Dct is 64.
1219 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
1220 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1221 DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1222 /*is_row=*/true>;
1223 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
1224 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1225 DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1226 /*is_row=*/false>;
1227 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
1228 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1229 DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1230 /*is_row=*/true>;
1231 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
1232 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1233 DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1234 /*is_row=*/false>;
1235 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
1236 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1237 DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1238 /*is_row=*/true>;
1239 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
1240 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1241 DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1242 /*is_row=*/false>;
1243 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
1244 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1245 DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1246 /*is_row=*/true>;
1247 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
1248 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1249 DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1250 /*is_row=*/false>;
1251 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
1252 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1253 DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1254 /*is_row=*/true>;
1255 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
1256 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1257 DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1258 /*is_row=*/false>;
1259
1260 // Maximum transform size for Adst is 16.
1261 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
1262 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1263 Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1264 /*is_row=*/true>;
1265 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
1266 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1267 Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1268 /*is_row=*/false>;
1269 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
1270 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1271 Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1272 /*is_row=*/true>;
1273 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
1274 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1275 Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1276 /*is_row=*/false>;
1277 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
1278 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1279 Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1280 /*is_row=*/true>;
1281 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
1282 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1283 Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1284 /*is_row=*/false>;
1285
1286 // Maximum transform size for Identity transform is 32.
1287 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
1288 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1289 Identity4DcOnly_C<bitdepth, Residual>,
1290 Identity4Row_C<Residual>, /*is_row=*/true>;
1291 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
1292 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1293 Identity4DcOnly_C<bitdepth, Residual>,
1294 Identity4Column_C<Residual>, /*is_row=*/false>;
1295 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
1296 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1297 Identity8DcOnly_C<bitdepth, Residual>,
1298 Identity8Row_C<Residual>, /*is_row=*/true>;
1299 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
1300 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1301 Identity8DcOnly_C<bitdepth, Residual>,
1302 Identity8Column_C<Residual>, /*is_row=*/false>;
1303 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
1304 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1305 Identity16DcOnly_C<bitdepth, Residual>,
1306 Identity16Row_C<Residual>, /*is_row=*/true>;
1307 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
1308 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1309 Identity16DcOnly_C<bitdepth, Residual>,
1310 Identity16Column_C<Residual>, /*is_row=*/false>;
1311 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
1312 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1313 Identity32DcOnly_C<bitdepth, Residual>,
1314 Identity32Row_C<Residual>, /*is_row=*/true>;
1315 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
1316 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1317 Identity32DcOnly_C<bitdepth, Residual>,
1318 Identity32Column_C<Residual>, /*is_row=*/false>;
1319
1320 // Maximum transform size for Wht is 4.
1321 dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
1322 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformWht,
1323 Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1324 /*is_row=*/true>;
1325 dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
1326 TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformWht,
1327 Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1328 /*is_row=*/false>;
1329 }
1330 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1331
Init8bpp()1332 void Init8bpp() {
1333 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
1334 assert(dsp != nullptr);
1335 for (auto& inverse_transform_by_size : dsp->inverse_transforms) {
1336 for (auto& inverse_transform : inverse_transform_by_size) {
1337 inverse_transform[kRow] = nullptr;
1338 inverse_transform[kColumn] = nullptr;
1339 }
1340 }
1341 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1342 InitAll<8, int16_t, uint8_t>(dsp);
1343 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1344 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformDct
1345 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
1346 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1347 DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1348 /*is_row=*/true>;
1349 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
1350 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1351 DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1352 /*is_row=*/false>;
1353 #endif
1354 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformDct
1355 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
1356 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1357 DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1358 /*is_row=*/true>;
1359 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
1360 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1361 DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1362 /*is_row=*/false>;
1363 #endif
1364 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformDct
1365 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
1366 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1367 DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1368 /*is_row=*/true>;
1369 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
1370 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1371 DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1372 /*is_row=*/false>;
1373 #endif
1374 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformDct
1375 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
1376 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1377 DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1378 /*is_row=*/true>;
1379 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
1380 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1381 DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1382 /*is_row=*/false>;
1383 #endif
1384 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize64_1DTransformDct
1385 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
1386 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1387 DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1388 /*is_row=*/true>;
1389 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
1390 TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1391 DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1392 /*is_row=*/false>;
1393 #endif
1394 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformAdst
1395 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
1396 TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1397 Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1398 /*is_row=*/true>;
1399 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
1400 TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1401 Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1402 /*is_row=*/false>;
1403 #endif
1404 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformAdst
1405 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
1406 TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1407 Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1408 /*is_row=*/true>;
1409 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
1410 TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1411 Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1412 /*is_row=*/false>;
1413 #endif
1414 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformAdst
1415 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
1416 TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1417 Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1418 /*is_row=*/true>;
1419 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
1420 TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1421 Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1422 /*is_row=*/false>;
1423 #endif
1424 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformIdentity
1425 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
1426 TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1427 Identity4DcOnly_C<8, int16_t>, Identity4Row_C<int16_t>,
1428 /*is_row=*/true>;
1429 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
1430 TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1431 Identity4DcOnly_C<8, int16_t>, Identity4Column_C<int16_t>,
1432 /*is_row=*/false>;
1433 #endif
1434 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformIdentity
1435 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
1436 TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1437 Identity8DcOnly_C<8, int16_t>, Identity8Row_C<int16_t>,
1438 /*is_row=*/true>;
1439 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
1440 TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1441 Identity8DcOnly_C<8, int16_t>, Identity8Column_C<int16_t>,
1442 /*is_row=*/false>;
1443 #endif
1444 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformIdentity
1445 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
1446 TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1447 Identity16DcOnly_C<8, int16_t>, Identity16Row_C<int16_t>,
1448 /*is_row=*/true>;
1449 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
1450 TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1451 Identity16DcOnly_C<8, int16_t>,
1452 Identity16Column_C<int16_t>, /*is_row=*/false>;
1453 #endif
1454 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformIdentity
1455 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
1456 TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1457 Identity32DcOnly_C<8, int16_t>, Identity32Row_C<int16_t>,
1458 /*is_row=*/true>;
1459 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
1460 TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1461 Identity32DcOnly_C<8, int16_t>,
1462 Identity32Column_C<int16_t>, /*is_row=*/false>;
1463 #endif
1464 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformWht
1465 dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
1466 TransformLoop_C<8, int16_t, uint8_t, k1DTransformWht,
1467 Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1468 /*is_row=*/true>;
1469 dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
1470 TransformLoop_C<8, int16_t, uint8_t, k1DTransformWht,
1471 Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1472 /*is_row=*/false>;
1473 #endif
1474 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1475 }
1476
1477 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()1478 void Init10bpp() {
1479 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
1480 assert(dsp != nullptr);
1481 for (auto& inverse_transform_by_size : dsp->inverse_transforms) {
1482 for (auto& inverse_transform : inverse_transform_by_size) {
1483 inverse_transform[kRow] = nullptr;
1484 inverse_transform[kColumn] = nullptr;
1485 }
1486 }
1487 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1488 InitAll<10, int32_t, uint16_t>(dsp);
1489 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1490 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformDct
1491 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
1492 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1493 DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1494 /*is_row=*/true>;
1495 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
1496 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1497 DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1498 /*is_row=*/false>;
1499 #endif
1500 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformDct
1501 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
1502 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1503 DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1504 /*is_row=*/true>;
1505 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
1506 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1507 DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1508 /*is_row=*/false>;
1509 #endif
1510 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformDct
1511 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
1512 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1513 DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1514 /*is_row=*/true>;
1515 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
1516 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1517 DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1518 /*is_row=*/false>;
1519 #endif
1520 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize32_1DTransformDct
1521 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
1522 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1523 DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1524 /*is_row=*/true>;
1525 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
1526 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1527 DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1528 /*is_row=*/false>;
1529 #endif
1530 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize64_1DTransformDct
1531 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
1532 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1533 DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1534 /*is_row=*/true>;
1535 dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
1536 TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1537 DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1538 /*is_row=*/false>;
1539 #endif
1540 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformAdst
1541 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
1542 TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1543 Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1544 /*is_row=*/true>;
1545 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
1546 TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1547 Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1548 /*is_row=*/false>;
1549 #endif
1550 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformAdst
1551 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
1552 TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1553 Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1554 /*is_row=*/true>;
1555 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
1556 TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1557 Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1558 /*is_row=*/false>;
1559 #endif
1560 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformAdst
1561 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
1562 TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1563 Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1564 /*is_row=*/true>;
1565 dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
1566 TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1567 Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1568 /*is_row=*/false>;
1569 #endif
1570 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformIdentity
1571 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
1572 TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1573 Identity4DcOnly_C<10, int32_t>, Identity4Row_C<int32_t>,
1574 /*is_row=*/true>;
1575 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
1576 TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1577 Identity4DcOnly_C<10, int32_t>,
1578 Identity4Column_C<int32_t>, /*is_row=*/false>;
1579 #endif
1580 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformIdentity
1581 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
1582 TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1583 Identity8DcOnly_C<10, int32_t>, Identity8Row_C<int32_t>,
1584 /*is_row=*/true>;
1585 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
1586 TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1587 Identity8DcOnly_C<10, int32_t>,
1588 Identity8Column_C<int32_t>, /*is_row=*/false>;
1589 #endif
1590 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformIdentity
1591 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
1592 TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1593 Identity16DcOnly_C<10, int32_t>, Identity16Row_C<int32_t>,
1594 /*is_row=*/true>;
1595 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
1596 TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1597 Identity16DcOnly_C<10, int32_t>,
1598 Identity16Column_C<int32_t>, /*is_row=*/false>;
1599 #endif
1600 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize32_1DTransformIdentity
1601 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
1602 TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1603 Identity32DcOnly_C<10, int32_t>, Identity32Row_C<int32_t>,
1604 /*is_row=*/true>;
1605 dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
1606 TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1607 Identity32DcOnly_C<10, int32_t>,
1608 Identity32Column_C<int32_t>, /*is_row=*/false>;
1609 #endif
1610 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformWht
1611 dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
1612 TransformLoop_C<10, int32_t, uint16_t, k1DTransformWht,
1613 Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1614 /*is_row=*/true>;
1615 dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
1616 TransformLoop_C<10, int32_t, uint16_t, k1DTransformWht,
1617 Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1618 /*is_row=*/false>;
1619 #endif
1620 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1621 }
1622 #endif // LIBGAV1_MAX_BITDEPTH >= 10
1623
1624 } // namespace
1625
InverseTransformInit_C()1626 void InverseTransformInit_C() {
1627 Init8bpp();
1628 #if LIBGAV1_MAX_BITDEPTH >= 10
1629 Init10bpp();
1630 #endif
1631
1632 // Local functions that may be unused depending on the optimizations
1633 // available.
1634 static_cast<void>(RangeCheckValue);
1635 static_cast<void>(kBitReverseLookup);
1636 }
1637
1638 } // namespace dsp
1639 } // namespace libgav1
1640