• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/inverse_transform.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstdint>
20 #include <cstring>
21 
22 #include "src/dsp/dsp.h"
23 #include "src/utils/array_2d.h"
24 #include "src/utils/common.h"
25 #include "src/utils/compiler_attributes.h"
26 #include "src/utils/logging.h"
27 
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31 
32 // Include the constants and utility functions inside the anonymous namespace.
33 #include "src/dsp/inverse_transform.inc"
34 
35 constexpr uint8_t kTransformColumnShift = 4;
36 
37 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
38 #undef LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
39 #endif
40 
RangeCheckValue(int32_t value,int8_t range)41 int32_t RangeCheckValue(int32_t value, int8_t range) {
42 #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
43     LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
44   assert(range <= 32);
45   const auto min = static_cast<int32_t>(-(uint32_t{1} << (range - 1)));
46   const auto max = static_cast<int32_t>((uint32_t{1} << (range - 1)) - 1);
47   if (min > value || value > max) {
48     LIBGAV1_DLOG(ERROR, "coeff out of bit range, value: %d bit range %d\n",
49                  value, range);
50     assert(min <= value && value <= max);
51   }
52 #endif  // LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
53   static_cast<void>(range);
54   return value;
55 }
56 
57 template <typename Residual>
ButterflyRotation_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)58 LIBGAV1_ALWAYS_INLINE void ButterflyRotation_C(Residual* const dst, int a,
59                                                int b, int angle, bool flip,
60                                                int8_t range) {
61   // Note that we multiply in 32 bits and then add/subtract the products in 64
62   // bits. The 32-bit multiplications do not overflow. Please see the comment
63   // and assert() in Cos128().
64   const int64_t x = static_cast<int64_t>(dst[a] * Cos128(angle)) -
65                     static_cast<int64_t>(dst[b] * Sin128(angle));
66   const int64_t y = static_cast<int64_t>(dst[a] * Sin128(angle)) +
67                     static_cast<int64_t>(dst[b] * Cos128(angle));
68   // Section 7.13.2.1: It is a requirement of bitstream conformance that the
69   // values saved into the array T by this function are representable by a
70   // signed integer using |range| bits of precision.
71   dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
72   dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
73 }
74 
75 template <typename Residual>
ButterflyRotationFirstIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)76 void ButterflyRotationFirstIsZero_C(Residual* const dst, int a, int b,
77                                     int angle, bool flip, int8_t range) {
78   // Note that we multiply in 32 bits and then add/subtract the products in 64
79   // bits. The 32-bit multiplications do not overflow. Please see the comment
80   // and assert() in Cos128().
81   const auto x = static_cast<int64_t>(dst[b] * -Sin128(angle));
82   const auto y = static_cast<int64_t>(dst[b] * Cos128(angle));
83   // Section 7.13.2.1: It is a requirement of bitstream conformance that the
84   // values saved into the array T by this function are representable by a
85   // signed integer using |range| bits of precision.
86   dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
87   dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
88 }
89 
90 template <typename Residual>
ButterflyRotationSecondIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)91 void ButterflyRotationSecondIsZero_C(Residual* const dst, int a, int b,
92                                      int angle, bool flip, int8_t range) {
93   // Note that we multiply in 32 bits and then add/subtract the products in 64
94   // bits. The 32-bit multiplications do not overflow. Please see the comment
95   // and assert() in Cos128().
96   const auto x = static_cast<int64_t>(dst[a] * Cos128(angle));
97   const auto y = static_cast<int64_t>(dst[a] * Sin128(angle));
98 
99   // Section 7.13.2.1: It is a requirement of bitstream conformance that the
100   // values saved into the array T by this function are representable by a
101   // signed integer using |range| bits of precision.
102   dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
103   dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
104 }
105 
106 template <typename Residual>
HadamardRotation_C(Residual * const dst,int a,int b,bool flip,int8_t range)107 void HadamardRotation_C(Residual* const dst, int a, int b, bool flip,
108                         int8_t range) {
109   if (flip) std::swap(a, b);
110   --range;
111   // For Adst and Dct, the maximum possible value for range is 20. So min and
112   // max should always fit into int32_t.
113   const int32_t min = -(1 << range);
114   const int32_t max = (1 << range) - 1;
115   const int32_t x = dst[a] + dst[b];
116   const int32_t y = dst[a] - dst[b];
117   dst[a] = Clip3(x, min, max);
118   dst[b] = Clip3(y, min, max);
119 }
120 
121 template <int bitdepth, typename Residual>
ClampIntermediate(Residual * const dst,int size)122 void ClampIntermediate(Residual* const dst, int size) {
123   // If Residual is int16_t (which implies bitdepth is 8), we don't need to
124   // clip residual[i][j] to 16 bits.
125   if (sizeof(Residual) > 2) {
126     const Residual intermediate_clamp_max =
127         (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
128     const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
129     for (int j = 0; j < size; ++j) {
130       dst[j] = Clip3(dst[j], intermediate_clamp_min, intermediate_clamp_max);
131     }
132   }
133 }
134 
135 //------------------------------------------------------------------------------
136 // Discrete Cosine Transforms (DCT).
137 
138 // Value for index (i, j) is computed as bitreverse(j) and interpreting that as
139 // an integer with bit-length i + 2.
140 // For e.g. index (2, 3) will be computed as follows:
141 //   * bitreverse(3) = bitreverse(..000011) = 110000...
142 //   * interpreting that as an integer with bit-length 2+2 = 4 will be 1100 = 12
143 constexpr uint8_t kBitReverseLookup[kNumTransform1dSizes][64] = {
144     {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,
145      1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3,
146      0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3},
147     {0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5,
148      3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6,
149      1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7},
150     {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
151      0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
152      0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
153      0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15},
154     {0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
155      1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
156      0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
157      1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31},
158     {0, 32, 16, 48, 8,  40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
159      2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
160      1, 33, 17, 49, 9,  41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
161      3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63}};
162 
163 template <typename Residual, int size_log2>
Dct_C(void * dest,int8_t range)164 void Dct_C(void* dest, int8_t range) {
165   static_assert(size_log2 >= 2 && size_log2 <= 6, "");
166   auto* const dst = static_cast<Residual*>(dest);
167   // stage 1.
168   const int size = 1 << size_log2;
169   Residual temp[size];
170   memcpy(temp, dst, sizeof(temp));
171   for (int i = 0; i < size; ++i) {
172     dst[i] = temp[kBitReverseLookup[size_log2 - 2][i]];
173   }
174   // stages 2-32 are dependent on the value of size_log2.
175   // stage 2.
176   if (size_log2 == 6) {
177     for (int i = 0; i < 16; ++i) {
178       ButterflyRotation_C(dst, i + 32, 63 - i,
179                           63 - MultiplyBy4(kBitReverseLookup[2][i]), false,
180                           range);
181     }
182   }
183   // stage 3
184   if (size_log2 >= 5) {
185     for (int i = 0; i < 8; ++i) {
186       ButterflyRotation_C(dst, i + 16, 31 - i,
187                           6 + MultiplyBy8(kBitReverseLookup[1][7 - i]), false,
188                           range);
189     }
190   }
191   // stage 4.
192   if (size_log2 == 6) {
193     for (int i = 0; i < 16; ++i) {
194       HadamardRotation_C(dst, MultiplyBy2(i) + 32, MultiplyBy2(i) + 33,
195                          static_cast<bool>(i & 1), range);
196     }
197   }
198   // stage 5.
199   if (size_log2 >= 4) {
200     for (int i = 0; i < 4; ++i) {
201       ButterflyRotation_C(dst, i + 8, 15 - i,
202                           12 + MultiplyBy16(kBitReverseLookup[0][3 - i]), false,
203                           range);
204     }
205   }
206   // stage 6.
207   if (size_log2 >= 5) {
208     for (int i = 0; i < 8; ++i) {
209       HadamardRotation_C(dst, MultiplyBy2(i) + 16, MultiplyBy2(i) + 17,
210                          static_cast<bool>(i & 1), range);
211     }
212   }
213   // stage 7.
214   if (size_log2 == 6) {
215     for (int i = 0; i < 4; ++i) {
216       for (int j = 0; j < 2; ++j) {
217         ButterflyRotation_C(
218             dst, 62 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 33,
219             60 - MultiplyBy16(kBitReverseLookup[0][i]) + MultiplyBy64(j), true,
220             range);
221       }
222     }
223   }
224   // stage 8.
225   if (size_log2 >= 3) {
226     for (int i = 0; i < 2; ++i) {
227       ButterflyRotation_C(dst, i + 4, 7 - i, 56 - 32 * i, false, range);
228     }
229   }
230   // stage 9.
231   if (size_log2 >= 4) {
232     for (int i = 0; i < 4; ++i) {
233       HadamardRotation_C(dst, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
234                          static_cast<bool>(i & 1), range);
235     }
236   }
237   // stage 10.
238   if (size_log2 >= 5) {
239     for (int i = 0; i < 2; ++i) {
240       for (int j = 0; j < 2; ++j) {
241         ButterflyRotation_C(
242             dst, 30 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 17,
243             24 + MultiplyBy64(j) + MultiplyBy32(1 - i), true, range);
244       }
245     }
246   }
247   // stage 11.
248   if (size_log2 == 6) {
249     for (int i = 0; i < 8; ++i) {
250       for (int j = 0; j < 2; ++j) {
251         HadamardRotation_C(dst, MultiplyBy4(i) + j + 32,
252                            MultiplyBy4(i) - j + 35, static_cast<bool>(i & 1),
253                            range);
254       }
255     }
256   }
257   // stage 12.
258   for (int i = 0; i < 2; ++i) {
259     ButterflyRotation_C(dst, MultiplyBy2(i), MultiplyBy2(i) + 1, 32 + 16 * i,
260                         i == 0, range);
261   }
262   // stage 13.
263   if (size_log2 >= 3) {
264     for (int i = 0; i < 2; ++i) {
265       HadamardRotation_C(dst, MultiplyBy2(i) + 4, MultiplyBy2(i) + 5,
266                          /*flip=*/i != 0, range);
267     }
268   }
269   // stage 14.
270   if (size_log2 >= 4) {
271     for (int i = 0; i < 2; ++i) {
272       ButterflyRotation_C(dst, 14 - i, i + 9, 48 + 64 * i, true, range);
273     }
274   }
275   // stage 15.
276   if (size_log2 >= 5) {
277     for (int i = 0; i < 4; ++i) {
278       for (int j = 0; j < 2; ++j) {
279         HadamardRotation_C(dst, MultiplyBy4(i) + j + 16,
280                            MultiplyBy4(i) - j + 19, static_cast<bool>(i & 1),
281                            range);
282       }
283     }
284   }
285   // stage 16.
286   if (size_log2 == 6) {
287     for (int i = 0; i < 2; ++i) {
288       for (int j = 0; j < 4; ++j) {
289         ButterflyRotation_C(
290             dst, 61 - MultiplyBy8(i) - j, MultiplyBy8(i) + j + 34,
291             56 - MultiplyBy32(i) + MultiplyBy64(DivideBy2(j)), true, range);
292       }
293     }
294   }
295   // stage 17.
296   for (int i = 0; i < 2; ++i) {
297     HadamardRotation_C(dst, i, 3 - i, false, range);
298   }
299   // stage 18.
300   if (size_log2 >= 3) {
301     ButterflyRotation_C(dst, 6, 5, 32, true, range);
302   }
303   // stage 19.
304   if (size_log2 >= 4) {
305     for (int i = 0; i < 2; ++i) {
306       for (int j = 0; j < 2; ++j) {
307         HadamardRotation_C(dst, MultiplyBy4(i) + j + 8, MultiplyBy4(i) - j + 11,
308                            /*flip=*/i != 0, range);
309       }
310     }
311   }
312   // stage 20.
313   if (size_log2 >= 5) {
314     for (int i = 0; i < 4; ++i) {
315       ButterflyRotation_C(dst, 29 - i, i + 18, 48 + 64 * DivideBy2(i), true,
316                           range);
317     }
318   }
319   // stage 21.
320   if (size_log2 == 6) {
321     for (int i = 0; i < 4; ++i) {
322       for (int j = 0; j < 4; ++j) {
323         HadamardRotation_C(dst, MultiplyBy8(i) + j + 32,
324                            MultiplyBy8(i) - j + 39, static_cast<bool>(i & 1),
325                            range);
326       }
327     }
328   }
329   // stage 22.
330   if (size_log2 >= 3) {
331     for (int i = 0; i < 4; ++i) {
332       HadamardRotation_C(dst, i, 7 - i, false, range);
333     }
334   }
335   // stage 23.
336   if (size_log2 >= 4) {
337     for (int i = 0; i < 2; ++i) {
338       ButterflyRotation_C(dst, 13 - i, i + 10, 32, true, range);
339     }
340   }
341   // stage 24.
342   if (size_log2 >= 5) {
343     for (int i = 0; i < 2; ++i) {
344       for (int j = 0; j < 4; ++j) {
345         HadamardRotation_C(dst, MultiplyBy8(i) + j + 16,
346                            MultiplyBy8(i) - j + 23, i == 1, range);
347       }
348     }
349   }
350   // stage 25.
351   if (size_log2 == 6) {
352     for (int i = 0; i < 8; ++i) {
353       ButterflyRotation_C(dst, 59 - i, i + 36, (i < 4) ? 48 : 112, true, range);
354     }
355   }
356   // stage 26.
357   if (size_log2 >= 4) {
358     for (int i = 0; i < 8; ++i) {
359       HadamardRotation_C(dst, i, 15 - i, false, range);
360     }
361   }
362   // stage 27.
363   if (size_log2 >= 5) {
364     for (int i = 0; i < 4; ++i) {
365       ButterflyRotation_C(dst, 27 - i, i + 20, 32, true, range);
366     }
367   }
368   // stage 28.
369   if (size_log2 == 6) {
370     for (int i = 0; i < 8; ++i) {
371       HadamardRotation_C(dst, i + 32, 47 - i, false, range);
372       HadamardRotation_C(dst, i + 48, 63 - i, true, range);
373     }
374   }
375   // stage 29.
376   if (size_log2 >= 5) {
377     for (int i = 0; i < 16; ++i) {
378       HadamardRotation_C(dst, i, 31 - i, false, range);
379     }
380   }
381   // stage 30.
382   if (size_log2 == 6) {
383     for (int i = 0; i < 8; ++i) {
384       ButterflyRotation_C(dst, 55 - i, i + 40, 32, true, range);
385     }
386   }
387   // stage 31.
388   if (size_log2 == 6) {
389     for (int i = 0; i < 32; ++i) {
390       HadamardRotation_C(dst, i, 63 - i, false, range);
391     }
392   }
393 }
394 
395 template <int bitdepth, typename Residual, int size_log2>
DctDcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)396 void DctDcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
397                  bool is_row) {
398   auto* const dst = static_cast<Residual*>(dest);
399 
400   if (is_row && should_round) {
401     dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
402   }
403 
404   ButterflyRotationSecondIsZero_C(dst, 0, 1, 32, true, range);
405 
406   if (is_row && row_shift > 0) {
407     dst[0] = RightShiftWithRounding(dst[0], row_shift);
408   }
409 
410   ClampIntermediate<bitdepth, Residual>(dst, 1);
411 
412   const int size = 1 << size_log2;
413   for (int i = 1; i < size; ++i) {
414     dst[i] = dst[0];
415   }
416 }
417 
418 //------------------------------------------------------------------------------
419 // Asymmetric Discrete Sine Transforms (ADST).
420 
421 /*
422  * Row transform max range in bits for bitdepths 8/10/12: 28/30/32.
423  * Column transform max range in bits for bitdepths 8/10/12: 28/28/30.
424  */
425 template <typename Residual>
Adst4_C(void * dest,int8_t range)426 void Adst4_C(void* dest, int8_t range) {
427   auto* const dst = static_cast<Residual*>(dest);
428   if ((dst[0] | dst[1] | dst[2] | dst[3]) == 0) {
429     return;
430   }
431 
432   // stage 1.
433   // Section 7.13.2.6: It is a requirement of bitstream conformance that all
434   // values stored in the s and x arrays by this process are representable by
435   // a signed integer using range + 12 bits of precision.
436   int32_t s[7];
437   s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
438   s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
439   s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[1], range + 12);
440   s[3] = RangeCheckValue(kAdst4Multiplier[3] * dst[2], range + 12);
441   s[4] = RangeCheckValue(kAdst4Multiplier[0] * dst[2], range + 12);
442   s[5] = RangeCheckValue(kAdst4Multiplier[1] * dst[3], range + 12);
443   s[6] = RangeCheckValue(kAdst4Multiplier[3] * dst[3], range + 12);
444   // stage 2.
445   // Section 7.13.2.6: It is a requirement of bitstream conformance that
446   // values stored in the variable a7 by this process are representable by a
447   // signed integer using range + 1 bits of precision.
448   const int32_t a7 = RangeCheckValue(dst[0] - dst[2], range + 1);
449   // Section 7.13.2.6: It is a requirement of bitstream conformance that
450   // values stored in the variable b7 by this process are representable by a
451   // signed integer using |range| bits of precision.
452   const int32_t b7 = RangeCheckValue(a7 + dst[3], range);
453   // stage 3.
454   s[0] = RangeCheckValue(s[0] + s[3], range + 12);
455   s[1] = RangeCheckValue(s[1] - s[4], range + 12);
456   s[3] = s[2];
457   s[2] = RangeCheckValue(kAdst4Multiplier[2] * b7, range + 12);
458   // stage 4.
459   s[0] = RangeCheckValue(s[0] + s[5], range + 12);
460   s[1] = RangeCheckValue(s[1] - s[6], range + 12);
461   // stages 5 and 6.
462   const int32_t x0 = RangeCheckValue(s[0] + s[3], range + 12);
463   const int32_t x1 = RangeCheckValue(s[1] + s[3], range + 12);
464   int32_t x3 = RangeCheckValue(s[0] + s[1], range + 12);
465   x3 = RangeCheckValue(x3 - s[3], range + 12);
466   int32_t dst_0 = RightShiftWithRounding(x0, 12);
467   int32_t dst_1 = RightShiftWithRounding(x1, 12);
468   int32_t dst_2 = RightShiftWithRounding(s[2], 12);
469   int32_t dst_3 = RightShiftWithRounding(x3, 12);
470   if (sizeof(Residual) == 2) {
471     // If the first argument to RightShiftWithRounding(..., 12) is only
472     // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
473     // in RightShiftWithRounding(..., 12) will cause the function to return
474     // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
475     dst_0 -= (dst_0 == 0x8000);
476     dst_1 -= (dst_1 == 0x8000);
477     dst_3 -= (dst_3 == 0x8000);
478   }
479   dst[0] = dst_0;
480   dst[1] = dst_1;
481   dst[2] = dst_2;
482   dst[3] = dst_3;
483 }
484 
485 template <int bitdepth, typename Residual>
Adst4DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)486 void Adst4DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
487                    bool is_row) {
488   auto* const dst = static_cast<Residual*>(dest);
489 
490   if (is_row && should_round) {
491     dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
492   }
493 
494   // stage 1.
495   // Section 7.13.2.6: It is a requirement of bitstream conformance that all
496   // values stored in the s and x arrays by this process are representable by
497   // a signed integer using range + 12 bits of precision.
498   int32_t s[3];
499   s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
500   s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
501   s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[0], range + 12);
502   // stage 3.
503   // stage 4.
504   // stages 5 and 6.
505   int32_t dst_0 = RightShiftWithRounding(s[0], 12);
506   int32_t dst_1 = RightShiftWithRounding(s[1], 12);
507   int32_t dst_2 = RightShiftWithRounding(s[2], 12);
508   int32_t dst_3 =
509       RightShiftWithRounding(RangeCheckValue(s[0] + s[1], range + 12), 12);
510   if (sizeof(Residual) == 2) {
511     // If the first argument to RightShiftWithRounding(..., 12) is only
512     // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
513     // in RightShiftWithRounding(..., 12) will cause the function to return
514     // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
515     dst_0 -= (dst_0 == 0x8000);
516     dst_1 -= (dst_1 == 0x8000);
517     dst_3 -= (dst_3 == 0x8000);
518   }
519   dst[0] = dst_0;
520   dst[1] = dst_1;
521   dst[2] = dst_2;
522   dst[3] = dst_3;
523 
524   const int size = 4;
525   if (is_row && row_shift > 0) {
526     for (int j = 0; j < size; ++j) {
527       dst[j] = RightShiftWithRounding(dst[j], row_shift);
528     }
529   }
530 
531   ClampIntermediate<bitdepth, Residual>(dst, 4);
532 }
533 
534 template <typename Residual>
AdstInputPermutation(int32_t * LIBGAV1_RESTRICT const dst,const Residual * LIBGAV1_RESTRICT const src,int n)535 void AdstInputPermutation(int32_t* LIBGAV1_RESTRICT const dst,
536                           const Residual* LIBGAV1_RESTRICT const src, int n) {
537   assert(n == 8 || n == 16);
538   for (int i = 0; i < n; ++i) {
539     dst[i] = src[((i & 1) == 0) ? n - i - 1 : i - 1];
540   }
541 }
542 
543 constexpr int8_t kAdstOutputPermutationLookup[16] = {
544     0, 8, 12, 4, 6, 14, 10, 2, 3, 11, 15, 7, 5, 13, 9, 1};
545 
546 template <typename Residual>
AdstOutputPermutation(Residual * LIBGAV1_RESTRICT const dst,const int32_t * LIBGAV1_RESTRICT const src,int n)547 void AdstOutputPermutation(Residual* LIBGAV1_RESTRICT const dst,
548                            const int32_t* LIBGAV1_RESTRICT const src, int n) {
549   assert(n == 8 || n == 16);
550   const auto shift = static_cast<int8_t>(n == 8);
551   for (int i = 0; i < n; ++i) {
552     const int8_t index = kAdstOutputPermutationLookup[i] >> shift;
553     int32_t dst_i = ((i & 1) == 0) ? src[index] : -src[index];
554     if (sizeof(Residual) == 2) {
555       // If i is odd and src[index] is -32768, dst_i will be 32768, which
556       // cannot be represented as an int16_t.
557       dst_i -= (dst_i == 0x8000);
558     }
559     dst[i] = dst_i;
560   }
561 }
562 
563 template <typename Residual>
Adst8_C(void * dest,int8_t range)564 void Adst8_C(void* dest, int8_t range) {
565   auto* const dst = static_cast<Residual*>(dest);
566   // stage 1.
567   int32_t temp[8];
568   AdstInputPermutation(temp, dst, 8);
569   // stage 2.
570   for (int i = 0; i < 4; ++i) {
571     ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 60 - 16 * i,
572                         true, range);
573   }
574   // stage 3.
575   for (int i = 0; i < 4; ++i) {
576     HadamardRotation_C(temp, i, i + 4, false, range);
577   }
578   // stage 4.
579   for (int i = 0; i < 2; ++i) {
580     ButterflyRotation_C(temp, i * 3 + 4, i + 5, 48 - 32 * i, true, range);
581   }
582   // stage 5.
583   for (int i = 0; i < 2; ++i) {
584     for (int j = 0; j < 2; ++j) {
585       HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
586                          false, range);
587     }
588   }
589   // stage 6.
590   for (int i = 0; i < 2; ++i) {
591     ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
592                         range);
593   }
594   // stage 7.
595   AdstOutputPermutation(dst, temp, 8);
596 }
597 
598 template <int bitdepth, typename Residual>
Adst8DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)599 void Adst8DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
600                    bool is_row) {
601   auto* const dst = static_cast<Residual*>(dest);
602 
603   // stage 1.
604   int32_t temp[8];
605   // After the permutation, the dc value is in temp[1]. The remaining are zero.
606   AdstInputPermutation(temp, dst, 8);
607 
608   if (is_row && should_round) {
609     temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
610   }
611 
612   // stage 2.
613   ButterflyRotationFirstIsZero_C(temp, 0, 1, 60, true, range);
614 
615   // stage 3.
616   temp[4] = temp[0];
617   temp[5] = temp[1];
618 
619   // stage 4.
620   ButterflyRotation_C(temp, 4, 5, 48, true, range);
621 
622   // stage 5.
623   temp[2] = temp[0];
624   temp[3] = temp[1];
625   temp[6] = temp[4];
626   temp[7] = temp[5];
627 
628   // stage 6.
629   ButterflyRotation_C(temp, 2, 3, 32, true, range);
630   ButterflyRotation_C(temp, 6, 7, 32, true, range);
631 
632   // stage 7.
633   AdstOutputPermutation(dst, temp, 8);
634 
635   const int size = 8;
636   if (is_row && row_shift > 0) {
637     for (int j = 0; j < size; ++j) {
638       dst[j] = RightShiftWithRounding(dst[j], row_shift);
639     }
640   }
641 
642   ClampIntermediate<bitdepth, Residual>(dst, 8);
643 }
644 
645 template <typename Residual>
Adst16_C(void * dest,int8_t range)646 void Adst16_C(void* dest, int8_t range) {
647   auto* const dst = static_cast<Residual*>(dest);
648   // stage 1.
649   int32_t temp[16];
650   AdstInputPermutation(temp, dst, 16);
651   // stage 2.
652   for (int i = 0; i < 8; ++i) {
653     ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 62 - 8 * i,
654                         true, range);
655   }
656   // stage 3.
657   for (int i = 0; i < 8; ++i) {
658     HadamardRotation_C(temp, i, i + 8, false, range);
659   }
660   // stage 4.
661   for (int i = 0; i < 2; ++i) {
662     ButterflyRotation_C(temp, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
663                         56 - 32 * i, true, range);
664     ButterflyRotation_C(temp, MultiplyBy2(i) + 13, MultiplyBy2(i) + 12,
665                         8 + 32 * i, true, range);
666   }
667   // stage 5.
668   for (int i = 0; i < 4; ++i) {
669     for (int j = 0; j < 2; ++j) {
670       HadamardRotation_C(temp, i + MultiplyBy8(j), i + MultiplyBy8(j) + 4,
671                          false, range);
672     }
673   }
674   // stage 6.
675   for (int i = 0; i < 2; ++i) {
676     for (int j = 0; j < 2; ++j) {
677       ButterflyRotation_C(temp, i * 3 + MultiplyBy8(j) + 4,
678                           i + MultiplyBy8(j) + 5, 48 - 32 * i, true, range);
679     }
680   }
681   // stage 7.
682   for (int i = 0; i < 2; ++i) {
683     for (int j = 0; j < 4; ++j) {
684       HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
685                          false, range);
686     }
687   }
688   // stage 8.
689   for (int i = 0; i < 4; ++i) {
690     ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
691                         range);
692   }
693   // stage 9.
694   AdstOutputPermutation(dst, temp, 16);
695 }
696 
697 template <int bitdepth, typename Residual>
Adst16DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)698 void Adst16DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
699                     bool is_row) {
700   auto* const dst = static_cast<Residual*>(dest);
701 
702   // stage 1.
703   int32_t temp[16];
704   // After the permutation, the dc value is in temp[1].  The remaining are zero.
705   AdstInputPermutation(temp, dst, 16);
706 
707   if (is_row && should_round) {
708     temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
709   }
710 
711   // stage 2.
712   ButterflyRotationFirstIsZero_C(temp, 0, 1, 62, true, range);
713 
714   // stage 3.
715   temp[8] = temp[0];
716   temp[9] = temp[1];
717 
718   // stage 4.
719   ButterflyRotation_C(temp, 8, 9, 56, true, range);
720 
721   // stage 5.
722   temp[4] = temp[0];
723   temp[5] = temp[1];
724   temp[12] = temp[8];
725   temp[13] = temp[9];
726 
727   // stage 6.
728   ButterflyRotation_C(temp, 4, 5, 48, true, range);
729   ButterflyRotation_C(temp, 12, 13, 48, true, range);
730 
731   // stage 7.
732   temp[2] = temp[0];
733   temp[3] = temp[1];
734   temp[10] = temp[8];
735   temp[11] = temp[9];
736 
737   temp[6] = temp[4];
738   temp[7] = temp[5];
739   temp[14] = temp[12];
740   temp[15] = temp[13];
741 
742   // stage 8.
743   for (int i = 0; i < 4; ++i) {
744     ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
745                         range);
746   }
747 
748   // stage 9.
749   AdstOutputPermutation(dst, temp, 16);
750 
751   const int size = 16;
752   if (is_row && row_shift > 0) {
753     for (int j = 0; j < size; ++j) {
754       dst[j] = RightShiftWithRounding(dst[j], row_shift);
755     }
756   }
757 
758   ClampIntermediate<bitdepth, Residual>(dst, 16);
759 }
760 
761 //------------------------------------------------------------------------------
762 // Identity Transforms.
763 //
764 // In the spec, the inverse identity transform is followed by a Round2() call:
765 //   The row transforms with i = 0..(h-1) are applied as follows:
766 //     ...
767 //     * Otherwise, invoke the inverse identity transform process specified in
768 //       section 7.13.2.15 with the input variable n equal to log2W.
769 //     * Set Residual[ i ][ j ] equal to Round2( T[ j ], rowShift )
770 //       for j = 0..(w-1).
771 //   ...
772 //   The column transforms with j = 0..(w-1) are applied as follows:
773 //     ...
774 //     * Otherwise, invoke the inverse identity transform process specified in
775 //       section 7.13.2.15 with the input variable n equal to log2H.
776 //     * Residual[ i ][ j ] is set equal to Round2( T[ i ], colShift )
777 //       for i = 0..(h-1).
778 //
779 // Therefore, we define the identity transform functions to perform both the
780 // inverse identity transform and the Round2() call. This has two advantages:
781 // 1. The outputs of the inverse identity transform do not need to be stored
782 //    in the Residual array. They can be stored in int32_t local variables,
783 //    which have a larger range if Residual is an int16_t array.
784 // 2. The inverse identity transform and the Round2() call can be jointly
785 //    optimized.
786 //
787 // The identity transform functions have the following prototype:
788 //   void Identity_C(void* dest, int8_t shift);
789 //
790 // The |shift| parameter is the amount of shift for the Round2() call. For row
791 // transforms, |shift| is 0, 1, or 2. For column transforms, |shift| is always
792 // 4. Therefore, an identity transform function can detect whether it is being
793 // invoked as a row transform or a column transform by checking whether |shift|
794 // is equal to 4.
795 //
796 // Input Range
797 //
798 // The inputs of row transforms, stored in the 2D array Dequant, are
799 // representable by a signed integer using 8 + BitDepth bits of precision:
800 //   f. Dequant[ i ][ j ] is set equal to
801 //   Clip3( - ( 1 << ( 7 + BitDepth ) ), ( 1 << ( 7 + BitDepth ) ) - 1, dq2 ).
802 //
803 // The inputs of column transforms are representable by a signed integer using
804 // Max( BitDepth + 6, 16 ) bits of precision:
805 //   Set the variable colClampRange equal to Max( BitDepth + 6, 16 ).
806 //   ...
807 //   Between the row and column transforms, Residual[ i ][ j ] is set equal to
808 //   Clip3( - ( 1 << ( colClampRange - 1 ) ),
809 //          ( 1 << (colClampRange - 1 ) ) - 1,
810 //          Residual[ i ][ j ] )
811 //   for i = 0..(h-1), for j = 0..(w-1).
812 //
813 // Output Range
814 //
815 // The outputs of row transforms are representable by a signed integer using
816 // 8 + BitDepth + 1 = 9 + BitDepth bits of precision, because the net effect
817 // of the multiplicative factor of inverse identity transforms minus the
818 // smallest row shift is an increase of at most one bit.
819 //
820 // Transform | Multiplicative factor | Smallest row | Net increase
821 // width     | (in bits)             | shift        | in bits
822 // ---------------------------------------------------------------
823 //     4     |  sqrt(2)  (0.5 bits)  |      0       |    +0.5
824 //     8     |     2     (1 bit)     |      0       |    +1
825 //    16     | 2*sqrt(2) (1.5 bits)  |      1       |    +0.5
826 //    32     |     4     (2 bits)    |      1       |    +1
827 //
828 // If BitDepth is 8 and Residual is an int16_t array, to avoid truncation we
829 // clip the outputs (which have 17 bits of precision) to the range of int16_t
830 // before storing them in the Residual array. This clipping happens to be the
831 // same as the required clipping after the row transform (see the spec quoted
832 // above), so we remain compliant with the spec. (In this case,
833 // TransformLoop_C() skips clipping the outputs of row transforms to avoid
834 // duplication of effort.)
835 //
836 // The outputs of column transforms are representable by a signed integer using
837 // Max( BitDepth + 6, 16 ) + 2 - 4 = Max( BitDepth + 4, 14 ) bits of precision,
838 // because the multiplicative factor of inverse identity transforms is at most
839 // 4 (2 bits) and |shift| is always 4.
840 
841 template <typename Residual>
Identity4Row_C(void * dest,int8_t shift)842 void Identity4Row_C(void* dest, int8_t shift) {
843   assert(shift == 0 || shift == 1);
844   auto* const dst = static_cast<Residual*>(dest);
845   // If |shift| is 0, |rounding| should be 1 << 11. If |shift| is 1, |rounding|
846   // should be (1 + (1 << 1)) << 11. The following expression works for both
847   // values of |shift|.
848   const int32_t rounding = (1 + (shift << 1)) << 11;
849   for (int i = 0; i < 4; ++i) {
850     // The intermediate value here will have to fit into an int32_t for it to be
851     // bitstream conformant. The multiplication is promoted to int32_t by
852     // defining kIdentity4Multiplier as int32_t.
853     int32_t dst_i = (dst[i] * kIdentity4Multiplier + rounding) >> (12 + shift);
854     if (sizeof(Residual) == 2) {
855       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
856     }
857     dst[i] = static_cast<Residual>(dst_i);
858   }
859 }
860 
861 template <typename Residual>
Identity4Column_C(void * dest,int8_t)862 void Identity4Column_C(void* dest, int8_t /*shift*/) {
863   auto* const dst = static_cast<Residual*>(dest);
864   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
865   for (int i = 0; i < 4; ++i) {
866     // The intermediate value here will have to fit into an int32_t for it to be
867     // bitstream conformant. The multiplication is promoted to int32_t by
868     // defining kIdentity4Multiplier as int32_t.
869     dst[i] = static_cast<Residual>((dst[i] * kIdentity4Multiplier + rounding) >>
870                                    (12 + kTransformColumnShift));
871   }
872 }
873 
874 template <int bitdepth, typename Residual>
Identity4DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)875 void Identity4DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
876                        int row_shift, bool is_row) {
877   auto* const dst = static_cast<Residual*>(dest);
878 
879   if (is_row) {
880     if (should_round) {
881       dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
882     }
883 
884     const int32_t rounding = (1 + (row_shift << 1)) << 11;
885     int32_t dst_i =
886         (dst[0] * kIdentity4Multiplier + rounding) >> (12 + row_shift);
887     if (sizeof(Residual) == 2) {
888       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
889     }
890     dst[0] = static_cast<Residual>(dst_i);
891 
892     ClampIntermediate<bitdepth, Residual>(dst, 1);
893     return;
894   }
895 
896   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
897   dst[0] = static_cast<Residual>((dst[0] * kIdentity4Multiplier + rounding) >>
898                                  (12 + kTransformColumnShift));
899 }
900 
901 template <typename Residual>
Identity8Row_C(void * dest,int8_t shift)902 void Identity8Row_C(void* dest, int8_t shift) {
903   assert(shift == 0 || shift == 1 || shift == 2);
904   auto* const dst = static_cast<Residual*>(dest);
905   for (int i = 0; i < 8; ++i) {
906     int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[i]), shift);
907     if (sizeof(Residual) == 2) {
908       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
909     }
910     dst[i] = static_cast<Residual>(dst_i);
911   }
912 }
913 
914 template <typename Residual>
Identity8Column_C(void * dest,int8_t)915 void Identity8Column_C(void* dest, int8_t /*shift*/) {
916   auto* const dst = static_cast<Residual*>(dest);
917   for (int i = 0; i < 8; ++i) {
918     dst[i] = static_cast<Residual>(
919         RightShiftWithRounding(dst[i], kTransformColumnShift - 1));
920   }
921 }
922 
923 template <int bitdepth, typename Residual>
Identity8DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)924 void Identity8DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
925                        int row_shift, bool is_row) {
926   auto* const dst = static_cast<Residual*>(dest);
927 
928   if (is_row) {
929     if (should_round) {
930       dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
931     }
932 
933     int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[0]), row_shift);
934     if (sizeof(Residual) == 2) {
935       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
936     }
937     dst[0] = static_cast<Residual>(dst_i);
938 
939     // If Residual is int16_t (which implies bitdepth is 8), we don't need to
940     // clip residual[i][j] to 16 bits.
941     if (sizeof(Residual) > 2) {
942       const Residual intermediate_clamp_max =
943           (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
944       const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
945       dst[0] = Clip3(dst[0], intermediate_clamp_min, intermediate_clamp_max);
946     }
947     return;
948   }
949 
950   dst[0] = static_cast<Residual>(
951       RightShiftWithRounding(dst[0], kTransformColumnShift - 1));
952 }
953 
954 template <typename Residual>
Identity16Row_C(void * dest,int8_t shift)955 void Identity16Row_C(void* dest, int8_t shift) {
956   assert(shift == 1 || shift == 2);
957   auto* const dst = static_cast<Residual*>(dest);
958   const int32_t rounding = (1 + (1 << shift)) << 11;
959   for (int i = 0; i < 16; ++i) {
960     // The intermediate value here will have to fit into an int32_t for it to be
961     // bitstream conformant. The multiplication is promoted to int32_t by
962     // defining kIdentity16Multiplier as int32_t.
963     int32_t dst_i = (dst[i] * kIdentity16Multiplier + rounding) >> (12 + shift);
964     if (sizeof(Residual) == 2) {
965       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
966     }
967     dst[i] = static_cast<Residual>(dst_i);
968   }
969 }
970 
971 template <typename Residual>
Identity16Column_C(void * dest,int8_t)972 void Identity16Column_C(void* dest, int8_t /*shift*/) {
973   auto* const dst = static_cast<Residual*>(dest);
974   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
975   for (int i = 0; i < 16; ++i) {
976     // The intermediate value here will have to fit into an int32_t for it to be
977     // bitstream conformant. The multiplication is promoted to int32_t by
978     // defining kIdentity16Multiplier as int32_t.
979     dst[i] =
980         static_cast<Residual>((dst[i] * kIdentity16Multiplier + rounding) >>
981                               (12 + kTransformColumnShift));
982   }
983 }
984 
985 template <int bitdepth, typename Residual>
Identity16DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)986 void Identity16DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
987                         int row_shift, bool is_row) {
988   auto* const dst = static_cast<Residual*>(dest);
989 
990   if (is_row) {
991     if (should_round) {
992       dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
993     }
994 
995     const int32_t rounding = (1 + (1 << row_shift)) << 11;
996     int32_t dst_i =
997         (dst[0] * kIdentity16Multiplier + rounding) >> (12 + row_shift);
998     if (sizeof(Residual) == 2) {
999       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1000     }
1001     dst[0] = static_cast<Residual>(dst_i);
1002 
1003     ClampIntermediate<bitdepth, Residual>(dst, 1);
1004     return;
1005   }
1006 
1007   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1008   dst[0] = static_cast<Residual>((dst[0] * kIdentity16Multiplier + rounding) >>
1009                                  (12 + kTransformColumnShift));
1010 }
1011 
1012 template <typename Residual>
Identity32Row_C(void * dest,int8_t shift)1013 void Identity32Row_C(void* dest, int8_t shift) {
1014   assert(shift == 1 || shift == 2);
1015   auto* const dst = static_cast<Residual*>(dest);
1016   for (int i = 0; i < 32; ++i) {
1017     int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[i]), shift);
1018     if (sizeof(Residual) == 2) {
1019       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1020     }
1021     dst[i] = static_cast<Residual>(dst_i);
1022   }
1023 }
1024 
1025 template <typename Residual>
Identity32Column_C(void * dest,int8_t)1026 void Identity32Column_C(void* dest, int8_t /*shift*/) {
1027   auto* const dst = static_cast<Residual*>(dest);
1028   for (int i = 0; i < 32; ++i) {
1029     dst[i] = static_cast<Residual>(
1030         RightShiftWithRounding(dst[i], kTransformColumnShift - 2));
1031   }
1032 }
1033 
1034 template <int bitdepth, typename Residual>
Identity32DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1035 void Identity32DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1036                         int row_shift, bool is_row) {
1037   auto* const dst = static_cast<Residual*>(dest);
1038 
1039   if (is_row) {
1040     if (should_round) {
1041       dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
1042     }
1043 
1044     int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[0]), row_shift);
1045     if (sizeof(Residual) == 2) {
1046       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1047     }
1048     dst[0] = static_cast<Residual>(dst_i);
1049 
1050     ClampIntermediate<bitdepth, Residual>(dst, 1);
1051     return;
1052   }
1053 
1054   dst[0] = static_cast<Residual>(
1055       RightShiftWithRounding(dst[0], kTransformColumnShift - 2));
1056 }
1057 
1058 //------------------------------------------------------------------------------
1059 // Walsh Hadamard Transform.
1060 
1061 template <typename Residual>
Wht4_C(void * dest,int8_t shift)1062 void Wht4_C(void* dest, int8_t shift) {
1063   auto* const dst = static_cast<Residual*>(dest);
1064   Residual temp[4];
1065   temp[0] = dst[0] >> shift;
1066   temp[2] = dst[1] >> shift;
1067   temp[3] = dst[2] >> shift;
1068   temp[1] = dst[3] >> shift;
1069   temp[0] += temp[2];
1070   temp[3] -= temp[1];
1071   // This signed right shift must be an arithmetic shift.
1072   Residual e = (temp[0] - temp[3]) >> 1;
1073   dst[1] = e - temp[1];
1074   dst[2] = e - temp[2];
1075   dst[0] = temp[0] - dst[1];
1076   dst[3] = temp[3] + dst[2];
1077 }
1078 
1079 template <int bitdepth, typename Residual>
Wht4DcOnly_C(void * dest,int8_t range,bool,int,bool)1080 void Wht4DcOnly_C(void* dest, int8_t range, bool /*should_round*/,
1081                   int /*row_shift*/, bool /*is_row*/) {
1082   auto* const dst = static_cast<Residual*>(dest);
1083   const int shift = range;
1084 
1085   Residual temp = dst[0] >> shift;
1086   // This signed right shift must be an arithmetic shift.
1087   Residual e = temp >> 1;
1088   dst[0] = temp - e;
1089   dst[1] = e;
1090   dst[2] = e;
1091   dst[3] = e;
1092 
1093   ClampIntermediate<bitdepth, Residual>(dst, 4);
1094 }
1095 
1096 //------------------------------------------------------------------------------
1097 // row/column transform loop
1098 
1099 using InverseTransform1dFunc = void (*)(void* dst, int8_t range);
1100 using InverseTransformDcOnlyFunc = void (*)(void* dest, int8_t range,
1101                                             bool should_round, int row_shift,
1102                                             bool is_row);
1103 
1104 template <int bitdepth, typename Residual, typename Pixel,
1105           Transform1d transform1d_type,
1106           InverseTransformDcOnlyFunc dconly_transform1d,
1107           InverseTransform1dFunc transform1d_func, bool is_row>
TransformLoop_C(TransformType tx_type,TransformSize tx_size,int adjusted_tx_height,void * LIBGAV1_RESTRICT src_buffer,int start_x,int start_y,void * LIBGAV1_RESTRICT dst_frame)1108 void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
1109                      int adjusted_tx_height, void* LIBGAV1_RESTRICT src_buffer,
1110                      int start_x, int start_y,
1111                      void* LIBGAV1_RESTRICT dst_frame) {
1112   constexpr bool lossless = transform1d_type == kTransform1dWht;
1113   constexpr bool is_identity = transform1d_type == kTransform1dIdentity;
1114   // The transform size of the WHT is always 4x4. Setting tx_width and
1115   // tx_height to the constant 4 for the WHT speeds the code up.
1116   assert(!lossless || tx_size == kTransformSize4x4);
1117   const int tx_width = lossless ? 4 : kTransformWidth[tx_size];
1118   const int tx_height = lossless ? 4 : kTransformHeight[tx_size];
1119   const int tx_width_log2 = kTransformWidthLog2[tx_size];
1120   const int tx_height_log2 = kTransformHeightLog2[tx_size];
1121   auto* frame = static_cast<Array2DView<Pixel>*>(dst_frame);
1122 
1123   // Initially this points to the dequantized values. After the transforms are
1124   // applied, this buffer contains the residual.
1125   Array2DView<Residual> residual(tx_height, tx_width,
1126                                  static_cast<Residual*>(src_buffer));
1127 
1128   if (is_row) {
1129     // Row transform.
1130     const uint8_t row_shift = lossless ? 0 : kTransformRowShift[tx_size];
1131     // This is the |range| parameter of the InverseTransform1dFunc.  For lossy
1132     // transforms, this will be equal to the clamping range.
1133     const int8_t row_clamp_range = lossless ? 2 : (bitdepth + 8);
1134     // If the width:height ratio of the transform size is 2:1 or 1:2, multiply
1135     // the input to the row transform by 1 / sqrt(2), which is approximated by
1136     // the fraction 2896 / 2^12.
1137     const bool should_round = std::abs(tx_width_log2 - tx_height_log2) == 1;
1138 
1139     if (adjusted_tx_height == 1) {
1140       dconly_transform1d(residual[0], row_clamp_range, should_round, row_shift,
1141                          true);
1142       return;
1143     }
1144 
1145     // Row transforms need to be done only up to 32 because the rest of the rows
1146     // are always all zero if |tx_height| is 64.  Otherwise, only process the
1147     // rows that have a non zero coefficients.
1148     for (int i = 0; i < adjusted_tx_height; ++i) {
1149       // If lossless, the transform size is 4x4, so should_round is false.
1150       if (!lossless && should_round) {
1151         // The last 32 values of every row are always zero if the |tx_width| is
1152         // 64.
1153         for (int j = 0; j < std::min(tx_width, 32); ++j) {
1154           residual[i][j] = RightShiftWithRounding(
1155               residual[i][j] * kTransformRowMultiplier, 12);
1156         }
1157       }
1158       // For identity transform, |transform1d_func| also performs the
1159       // Round2(T[j], rowShift) call in the spec.
1160       transform1d_func(residual[i], is_identity ? row_shift : row_clamp_range);
1161       if (!lossless && !is_identity && row_shift > 0) {
1162         for (int j = 0; j < tx_width; ++j) {
1163           residual[i][j] = RightShiftWithRounding(residual[i][j], row_shift);
1164         }
1165       }
1166 
1167       ClampIntermediate<bitdepth, Residual>(residual[i], tx_width);
1168     }
1169     return;
1170   }
1171 
1172   assert(!is_row);
1173   constexpr uint8_t column_shift = lossless ? 0 : kTransformColumnShift;
1174   // This is the |range| parameter of the InverseTransform1dFunc.  For lossy
1175   // transforms, this will be equal to the clamping range.
1176   const int8_t column_clamp_range = lossless ? 0 : std::max(bitdepth + 6, 16);
1177   const bool flip_rows = transform1d_type == kTransform1dAdst &&
1178                          kTransformFlipRowsMask.Contains(tx_type);
1179   const bool flip_columns =
1180       !lossless && kTransformFlipColumnsMask.Contains(tx_type);
1181   const int min_value = 0;
1182   const int max_value = (1 << bitdepth) - 1;
1183   // Note: 64 is the maximum size of a 1D transform buffer (the largest
1184   // transform size is kTransformSize64x64).
1185   Residual tx_buffer[64];
1186   for (int j = 0; j < tx_width; ++j) {
1187     const int flipped_j = flip_columns ? tx_width - j - 1 : j;
1188     int i = 0;
1189     do {
1190       tx_buffer[i] = residual[i][flipped_j];
1191     } while (++i != tx_height);
1192     if (adjusted_tx_height == 1) {
1193       dconly_transform1d(tx_buffer, column_clamp_range, false, 0, false);
1194     } else {
1195       // For identity transform, |transform1d_func| also performs the
1196       // Round2(T[i], colShift) call in the spec.
1197       transform1d_func(tx_buffer,
1198                        is_identity ? column_shift : column_clamp_range);
1199     }
1200     const int x = start_x + j;
1201     for (int i = 0; i < tx_height; ++i) {
1202       const int y = start_y + i;
1203       const int index = flip_rows ? tx_height - i - 1 : i;
1204       Residual residual_value = tx_buffer[index];
1205       if (!lossless && !is_identity) {
1206         residual_value = RightShiftWithRounding(residual_value, column_shift);
1207       }
1208       (*frame)[y][x] =
1209           Clip3((*frame)[y][x] + residual_value, min_value, max_value);
1210     }
1211   }
1212 }
1213 
1214 //------------------------------------------------------------------------------
1215 
1216 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1217 template <int bitdepth, typename Residual, typename Pixel>
InitAll(Dsp * const dsp)1218 void InitAll(Dsp* const dsp) {
1219   // Maximum transform size for Dct is 64.
1220   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1221       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1222                       DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1223                       /*is_row=*/true>;
1224   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1225       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1226                       DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1227                       /*is_row=*/false>;
1228   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1229       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1230                       DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1231                       /*is_row=*/true>;
1232   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1233       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1234                       DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1235                       /*is_row=*/false>;
1236   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1237       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1238                       DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1239                       /*is_row=*/true>;
1240   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1241       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1242                       DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1243                       /*is_row=*/false>;
1244   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1245       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1246                       DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1247                       /*is_row=*/true>;
1248   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1249       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1250                       DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1251                       /*is_row=*/false>;
1252   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1253       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1254                       DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1255                       /*is_row=*/true>;
1256   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1257       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dDct,
1258                       DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1259                       /*is_row=*/false>;
1260 
1261   // Maximum transform size for Adst is 16.
1262   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1263       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1264                       Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1265                       /*is_row=*/true>;
1266   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1267       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1268                       Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1269                       /*is_row=*/false>;
1270   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1271       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1272                       Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1273                       /*is_row=*/true>;
1274   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1275       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1276                       Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1277                       /*is_row=*/false>;
1278   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1279       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1280                       Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1281                       /*is_row=*/true>;
1282   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1283       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dAdst,
1284                       Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1285                       /*is_row=*/false>;
1286 
1287   // Maximum transform size for Identity transform is 32.
1288   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1289       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1290                       Identity4DcOnly_C<bitdepth, Residual>,
1291                       Identity4Row_C<Residual>, /*is_row=*/true>;
1292   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1293       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1294                       Identity4DcOnly_C<bitdepth, Residual>,
1295                       Identity4Column_C<Residual>, /*is_row=*/false>;
1296   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1297       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1298                       Identity8DcOnly_C<bitdepth, Residual>,
1299                       Identity8Row_C<Residual>, /*is_row=*/true>;
1300   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1301       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1302                       Identity8DcOnly_C<bitdepth, Residual>,
1303                       Identity8Column_C<Residual>, /*is_row=*/false>;
1304   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1305       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1306                       Identity16DcOnly_C<bitdepth, Residual>,
1307                       Identity16Row_C<Residual>, /*is_row=*/true>;
1308   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1309       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1310                       Identity16DcOnly_C<bitdepth, Residual>,
1311                       Identity16Column_C<Residual>, /*is_row=*/false>;
1312   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1313       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1314                       Identity32DcOnly_C<bitdepth, Residual>,
1315                       Identity32Row_C<Residual>, /*is_row=*/true>;
1316   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1317       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dIdentity,
1318                       Identity32DcOnly_C<bitdepth, Residual>,
1319                       Identity32Column_C<Residual>, /*is_row=*/false>;
1320 
1321   // Maximum transform size for Wht is 4.
1322   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1323       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1324                       Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1325                       /*is_row=*/true>;
1326   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1327       TransformLoop_C<bitdepth, Residual, Pixel, kTransform1dWht,
1328                       Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1329                       /*is_row=*/false>;
1330 }
1331 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1332 
Init8bpp()1333 void Init8bpp() {
1334   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
1335   assert(dsp != nullptr);
1336   static_cast<void>(dsp);
1337 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1338   InitAll<8, int16_t, uint8_t>(dsp);
1339 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1340 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dDct
1341   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1342       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1343                       DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1344                       /*is_row=*/true>;
1345   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1346       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1347                       DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1348                       /*is_row=*/false>;
1349 #endif
1350 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dDct
1351   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1352       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1353                       DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1354                       /*is_row=*/true>;
1355   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1356       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1357                       DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1358                       /*is_row=*/false>;
1359 #endif
1360 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dDct
1361   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1362       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1363                       DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1364                       /*is_row=*/true>;
1365   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1366       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1367                       DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1368                       /*is_row=*/false>;
1369 #endif
1370 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dDct
1371   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1372       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1373                       DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1374                       /*is_row=*/true>;
1375   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1376       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1377                       DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1378                       /*is_row=*/false>;
1379 #endif
1380 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize64_Transform1dDct
1381   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1382       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1383                       DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1384                       /*is_row=*/true>;
1385   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1386       TransformLoop_C<8, int16_t, uint8_t, kTransform1dDct,
1387                       DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1388                       /*is_row=*/false>;
1389 #endif
1390 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dAdst
1391   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1392       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1393                       Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1394                       /*is_row=*/true>;
1395   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1396       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1397                       Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1398                       /*is_row=*/false>;
1399 #endif
1400 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dAdst
1401   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1402       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1403                       Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1404                       /*is_row=*/true>;
1405   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1406       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1407                       Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1408                       /*is_row=*/false>;
1409 #endif
1410 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dAdst
1411   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1412       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1413                       Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1414                       /*is_row=*/true>;
1415   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1416       TransformLoop_C<8, int16_t, uint8_t, kTransform1dAdst,
1417                       Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1418                       /*is_row=*/false>;
1419 #endif
1420 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dIdentity
1421   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1422       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1423                       Identity4DcOnly_C<8, int16_t>, Identity4Row_C<int16_t>,
1424                       /*is_row=*/true>;
1425   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1426       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1427                       Identity4DcOnly_C<8, int16_t>, Identity4Column_C<int16_t>,
1428                       /*is_row=*/false>;
1429 #endif
1430 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize8_Transform1dIdentity
1431   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1432       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1433                       Identity8DcOnly_C<8, int16_t>, Identity8Row_C<int16_t>,
1434                       /*is_row=*/true>;
1435   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1436       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1437                       Identity8DcOnly_C<8, int16_t>, Identity8Column_C<int16_t>,
1438                       /*is_row=*/false>;
1439 #endif
1440 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize16_Transform1dIdentity
1441   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1442       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1443                       Identity16DcOnly_C<8, int16_t>, Identity16Row_C<int16_t>,
1444                       /*is_row=*/true>;
1445   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1446       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1447                       Identity16DcOnly_C<8, int16_t>,
1448                       Identity16Column_C<int16_t>, /*is_row=*/false>;
1449 #endif
1450 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize32_Transform1dIdentity
1451   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1452       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1453                       Identity32DcOnly_C<8, int16_t>, Identity32Row_C<int16_t>,
1454                       /*is_row=*/true>;
1455   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1456       TransformLoop_C<8, int16_t, uint8_t, kTransform1dIdentity,
1457                       Identity32DcOnly_C<8, int16_t>,
1458                       Identity32Column_C<int16_t>, /*is_row=*/false>;
1459 #endif
1460 #ifndef LIBGAV1_Dsp8bpp_Transform1dSize4_Transform1dWht
1461   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1462       TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1463                       Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1464                       /*is_row=*/true>;
1465   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1466       TransformLoop_C<8, int16_t, uint8_t, kTransform1dWht,
1467                       Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1468                       /*is_row=*/false>;
1469 #endif
1470 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1471 }
1472 
1473 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()1474 void Init10bpp() {
1475   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
1476   assert(dsp != nullptr);
1477   static_cast<void>(dsp);
1478 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1479   InitAll<10, int32_t, uint16_t>(dsp);
1480 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1481 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dDct
1482   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kRow] =
1483       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1484                       DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1485                       /*is_row=*/true>;
1486   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize4][kColumn] =
1487       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1488                       DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1489                       /*is_row=*/false>;
1490 #endif
1491 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dDct
1492   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kRow] =
1493       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1494                       DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1495                       /*is_row=*/true>;
1496   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize8][kColumn] =
1497       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1498                       DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1499                       /*is_row=*/false>;
1500 #endif
1501 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dDct
1502   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kRow] =
1503       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1504                       DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1505                       /*is_row=*/true>;
1506   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize16][kColumn] =
1507       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1508                       DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1509                       /*is_row=*/false>;
1510 #endif
1511 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dDct
1512   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kRow] =
1513       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1514                       DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1515                       /*is_row=*/true>;
1516   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize32][kColumn] =
1517       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1518                       DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1519                       /*is_row=*/false>;
1520 #endif
1521 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize64_Transform1dDct
1522   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kRow] =
1523       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1524                       DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1525                       /*is_row=*/true>;
1526   dsp->inverse_transforms[kTransform1dDct][kTransform1dSize64][kColumn] =
1527       TransformLoop_C<10, int32_t, uint16_t, kTransform1dDct,
1528                       DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1529                       /*is_row=*/false>;
1530 #endif
1531 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dAdst
1532   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kRow] =
1533       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1534                       Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1535                       /*is_row=*/true>;
1536   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize4][kColumn] =
1537       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1538                       Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1539                       /*is_row=*/false>;
1540 #endif
1541 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dAdst
1542   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kRow] =
1543       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1544                       Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1545                       /*is_row=*/true>;
1546   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize8][kColumn] =
1547       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1548                       Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1549                       /*is_row=*/false>;
1550 #endif
1551 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dAdst
1552   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kRow] =
1553       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1554                       Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1555                       /*is_row=*/true>;
1556   dsp->inverse_transforms[kTransform1dAdst][kTransform1dSize16][kColumn] =
1557       TransformLoop_C<10, int32_t, uint16_t, kTransform1dAdst,
1558                       Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1559                       /*is_row=*/false>;
1560 #endif
1561 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dIdentity
1562   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kRow] =
1563       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1564                       Identity4DcOnly_C<10, int32_t>, Identity4Row_C<int32_t>,
1565                       /*is_row=*/true>;
1566   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize4][kColumn] =
1567       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1568                       Identity4DcOnly_C<10, int32_t>,
1569                       Identity4Column_C<int32_t>, /*is_row=*/false>;
1570 #endif
1571 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize8_Transform1dIdentity
1572   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kRow] =
1573       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1574                       Identity8DcOnly_C<10, int32_t>, Identity8Row_C<int32_t>,
1575                       /*is_row=*/true>;
1576   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize8][kColumn] =
1577       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1578                       Identity8DcOnly_C<10, int32_t>,
1579                       Identity8Column_C<int32_t>, /*is_row=*/false>;
1580 #endif
1581 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize16_Transform1dIdentity
1582   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kRow] =
1583       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1584                       Identity16DcOnly_C<10, int32_t>, Identity16Row_C<int32_t>,
1585                       /*is_row=*/true>;
1586   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize16][kColumn] =
1587       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1588                       Identity16DcOnly_C<10, int32_t>,
1589                       Identity16Column_C<int32_t>, /*is_row=*/false>;
1590 #endif
1591 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize32_Transform1dIdentity
1592   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kRow] =
1593       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1594                       Identity32DcOnly_C<10, int32_t>, Identity32Row_C<int32_t>,
1595                       /*is_row=*/true>;
1596   dsp->inverse_transforms[kTransform1dIdentity][kTransform1dSize32][kColumn] =
1597       TransformLoop_C<10, int32_t, uint16_t, kTransform1dIdentity,
1598                       Identity32DcOnly_C<10, int32_t>,
1599                       Identity32Column_C<int32_t>, /*is_row=*/false>;
1600 #endif
1601 #ifndef LIBGAV1_Dsp10bpp_Transform1dSize4_Transform1dWht
1602   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kRow] =
1603       TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1604                       Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1605                       /*is_row=*/true>;
1606   dsp->inverse_transforms[kTransform1dWht][kTransform1dSize4][kColumn] =
1607       TransformLoop_C<10, int32_t, uint16_t, kTransform1dWht,
1608                       Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1609                       /*is_row=*/false>;
1610 #endif
1611 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1612 }
1613 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
1614 
1615 }  // namespace
1616 
InverseTransformInit_C()1617 void InverseTransformInit_C() {
1618   Init8bpp();
1619 #if LIBGAV1_MAX_BITDEPTH >= 10
1620   Init10bpp();
1621 #endif
1622 
1623   // Local functions that may be unused depending on the optimizations
1624   // available.
1625   static_cast<void>(RangeCheckValue);
1626   static_cast<void>(kBitReverseLookup);
1627 }
1628 
1629 }  // namespace dsp
1630 }  // namespace libgav1
1631