• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/inverse_transform.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstdint>
20 #include <cstring>
21 
22 #include "src/dsp/dsp.h"
23 #include "src/utils/array_2d.h"
24 #include "src/utils/common.h"
25 #include "src/utils/compiler_attributes.h"
26 #include "src/utils/logging.h"
27 
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31 
32 // Include the constants and utility functions inside the anonymous namespace.
33 #include "src/dsp/inverse_transform.inc"
34 
35 constexpr uint8_t kTransformColumnShift = 4;
36 
37 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
38 #undef LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
39 #endif
40 
RangeCheckValue(int32_t value,int8_t range)41 int32_t RangeCheckValue(int32_t value, int8_t range) {
42 #if defined(LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK) && \
43     LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
44   assert(range <= 32);
45   const int32_t min = -(1 << (range - 1));
46   const int32_t max = (1 << (range - 1)) - 1;
47   if (min > value || value > max) {
48     LIBGAV1_DLOG(ERROR, "coeff out of bit range, value: %d bit range %d\n",
49                  value, range);
50     assert(min <= value && value <= max);
51   }
52 #endif  // LIBGAV1_ENABLE_TRANSFORM_RANGE_CHECK
53   static_cast<void>(range);
54   return value;
55 }
56 
57 template <typename Residual>
ButterflyRotation_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)58 LIBGAV1_ALWAYS_INLINE void ButterflyRotation_C(Residual* const dst, int a,
59                                                int b, int angle, bool flip,
60                                                int8_t range) {
61   // Note that we multiply in 32 bits and then add/subtract the products in 64
62   // bits. The 32-bit multiplications do not overflow. Please see the comment
63   // and assert() in Cos128().
64   const int64_t x = static_cast<int64_t>(dst[a] * Cos128(angle)) -
65                     static_cast<int64_t>(dst[b] * Sin128(angle));
66   const int64_t y = static_cast<int64_t>(dst[a] * Sin128(angle)) +
67                     static_cast<int64_t>(dst[b] * Cos128(angle));
68   // Section 7.13.2.1: It is a requirement of bitstream conformance that the
69   // values saved into the array T by this function are representable by a
70   // signed integer using |range| bits of precision.
71   dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
72   dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
73 }
74 
75 template <typename Residual>
ButterflyRotationFirstIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)76 void ButterflyRotationFirstIsZero_C(Residual* const dst, int a, int b,
77                                     int angle, bool flip, int8_t range) {
78   // Note that we multiply in 32 bits and then add/subtract the products in 64
79   // bits. The 32-bit multiplications do not overflow. Please see the comment
80   // and assert() in Cos128().
81   const auto x = static_cast<int64_t>(dst[b] * -Sin128(angle));
82   const auto y = static_cast<int64_t>(dst[b] * Cos128(angle));
83   // Section 7.13.2.1: It is a requirement of bitstream conformance that the
84   // values saved into the array T by this function are representable by a
85   // signed integer using |range| bits of precision.
86   dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
87   dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
88 }
89 
90 template <typename Residual>
ButterflyRotationSecondIsZero_C(Residual * const dst,int a,int b,int angle,bool flip,int8_t range)91 void ButterflyRotationSecondIsZero_C(Residual* const dst, int a, int b,
92                                      int angle, bool flip, int8_t range) {
93   // Note that we multiply in 32 bits and then add/subtract the products in 64
94   // bits. The 32-bit multiplications do not overflow. Please see the comment
95   // and assert() in Cos128().
96   const auto x = static_cast<int64_t>(dst[a] * Cos128(angle));
97   const auto y = static_cast<int64_t>(dst[a] * Sin128(angle));
98 
99   // Section 7.13.2.1: It is a requirement of bitstream conformance that the
100   // values saved into the array T by this function are representable by a
101   // signed integer using |range| bits of precision.
102   dst[a] = RangeCheckValue(RightShiftWithRounding(flip ? y : x, 12), range);
103   dst[b] = RangeCheckValue(RightShiftWithRounding(flip ? x : y, 12), range);
104 }
105 
106 template <typename Residual>
HadamardRotation_C(Residual * const dst,int a,int b,bool flip,int8_t range)107 void HadamardRotation_C(Residual* const dst, int a, int b, bool flip,
108                         int8_t range) {
109   if (flip) std::swap(a, b);
110   --range;
111   // For Adst and Dct, the maximum possible value for range is 20. So min and
112   // max should always fit into int32_t.
113   const int32_t min = -(1 << range);
114   const int32_t max = (1 << range) - 1;
115   const int32_t x = dst[a] + dst[b];
116   const int32_t y = dst[a] - dst[b];
117   dst[a] = Clip3(x, min, max);
118   dst[b] = Clip3(y, min, max);
119 }
120 
121 template <int bitdepth, typename Residual>
ClampIntermediate(Residual * const dst,int size)122 void ClampIntermediate(Residual* const dst, int size) {
123   // If Residual is int16_t (which implies bitdepth is 8), we don't need to
124   // clip residual[i][j] to 16 bits.
125   if (sizeof(Residual) > 2) {
126     const Residual intermediate_clamp_max =
127         (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
128     const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
129     for (int j = 0; j < size; ++j) {
130       dst[j] = Clip3(dst[j], intermediate_clamp_min, intermediate_clamp_max);
131     }
132   }
133 }
134 
135 //------------------------------------------------------------------------------
136 // Discrete Cosine Transforms (DCT).
137 
138 // Value for index (i, j) is computed as bitreverse(j) and interpreting that as
139 // an integer with bit-length i + 2.
140 // For e.g. index (2, 3) will be computed as follows:
141 //   * bitreverse(3) = bitreverse(..000011) = 110000...
142 //   * interpreting that as an integer with bit-length 2+2 = 4 will be 1100 = 12
143 constexpr uint8_t kBitReverseLookup[kNum1DTransformSizes][64] = {
144     {0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,
145      1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3,
146      0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3},
147     {0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5,
148      3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6,
149      1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7, 0, 4, 2, 6, 1, 5, 3, 7},
150     {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
151      0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
152      0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
153      0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15},
154     {0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
155      1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
156      0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
157      1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31},
158     {0, 32, 16, 48, 8,  40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
159      2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
160      1, 33, 17, 49, 9,  41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
161      3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63}};
162 
163 template <typename Residual, int size_log2>
Dct_C(void * dest,int8_t range)164 void Dct_C(void* dest, int8_t range) {
165   static_assert(size_log2 >= 2 && size_log2 <= 6, "");
166   auto* const dst = static_cast<Residual*>(dest);
167   // stage 1.
168   const int size = 1 << size_log2;
169   Residual temp[size];
170   memcpy(temp, dst, sizeof(temp));
171   for (int i = 0; i < size; ++i) {
172     dst[i] = temp[kBitReverseLookup[size_log2 - 2][i]];
173   }
174   // stages 2-32 are dependent on the value of size_log2.
175   // stage 2.
176   if (size_log2 == 6) {
177     for (int i = 0; i < 16; ++i) {
178       ButterflyRotation_C(dst, i + 32, 63 - i,
179                           63 - MultiplyBy4(kBitReverseLookup[2][i]), false,
180                           range);
181     }
182   }
183   // stage 3
184   if (size_log2 >= 5) {
185     for (int i = 0; i < 8; ++i) {
186       ButterflyRotation_C(dst, i + 16, 31 - i,
187                           6 + MultiplyBy8(kBitReverseLookup[1][7 - i]), false,
188                           range);
189     }
190   }
191   // stage 4.
192   if (size_log2 == 6) {
193     for (int i = 0; i < 16; ++i) {
194       HadamardRotation_C(dst, MultiplyBy2(i) + 32, MultiplyBy2(i) + 33,
195                          static_cast<bool>(i & 1), range);
196     }
197   }
198   // stage 5.
199   if (size_log2 >= 4) {
200     for (int i = 0; i < 4; ++i) {
201       ButterflyRotation_C(dst, i + 8, 15 - i,
202                           12 + MultiplyBy16(kBitReverseLookup[0][3 - i]), false,
203                           range);
204     }
205   }
206   // stage 6.
207   if (size_log2 >= 5) {
208     for (int i = 0; i < 8; ++i) {
209       HadamardRotation_C(dst, MultiplyBy2(i) + 16, MultiplyBy2(i) + 17,
210                          static_cast<bool>(i & 1), range);
211     }
212   }
213   // stage 7.
214   if (size_log2 == 6) {
215     for (int i = 0; i < 4; ++i) {
216       for (int j = 0; j < 2; ++j) {
217         ButterflyRotation_C(
218             dst, 62 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 33,
219             60 - MultiplyBy16(kBitReverseLookup[0][i]) + MultiplyBy64(j), true,
220             range);
221       }
222     }
223   }
224   // stage 8.
225   if (size_log2 >= 3) {
226     for (int i = 0; i < 2; ++i) {
227       ButterflyRotation_C(dst, i + 4, 7 - i, 56 - 32 * i, false, range);
228     }
229   }
230   // stage 9.
231   if (size_log2 >= 4) {
232     for (int i = 0; i < 4; ++i) {
233       HadamardRotation_C(dst, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
234                          static_cast<bool>(i & 1), range);
235     }
236   }
237   // stage 10.
238   if (size_log2 >= 5) {
239     for (int i = 0; i < 2; ++i) {
240       for (int j = 0; j < 2; ++j) {
241         ButterflyRotation_C(
242             dst, 30 - MultiplyBy4(i) - j, MultiplyBy4(i) + j + 17,
243             24 + MultiplyBy64(j) + MultiplyBy32(1 - i), true, range);
244       }
245     }
246   }
247   // stage 11.
248   if (size_log2 == 6) {
249     for (int i = 0; i < 8; ++i) {
250       for (int j = 0; j < 2; ++j) {
251         HadamardRotation_C(dst, MultiplyBy4(i) + j + 32,
252                            MultiplyBy4(i) - j + 35, static_cast<bool>(i & 1),
253                            range);
254       }
255     }
256   }
257   // stage 12.
258   for (int i = 0; i < 2; ++i) {
259     ButterflyRotation_C(dst, MultiplyBy2(i), MultiplyBy2(i) + 1, 32 + 16 * i,
260                         i == 0, range);
261   }
262   // stage 13.
263   if (size_log2 >= 3) {
264     for (int i = 0; i < 2; ++i) {
265       HadamardRotation_C(dst, MultiplyBy2(i) + 4, MultiplyBy2(i) + 5,
266                          /*flip=*/i != 0, range);
267     }
268   }
269   // stage 14.
270   if (size_log2 >= 4) {
271     for (int i = 0; i < 2; ++i) {
272       ButterflyRotation_C(dst, 14 - i, i + 9, 48 + 64 * i, true, range);
273     }
274   }
275   // stage 15.
276   if (size_log2 >= 5) {
277     for (int i = 0; i < 4; ++i) {
278       for (int j = 0; j < 2; ++j) {
279         HadamardRotation_C(dst, MultiplyBy4(i) + j + 16,
280                            MultiplyBy4(i) - j + 19, static_cast<bool>(i & 1),
281                            range);
282       }
283     }
284   }
285   // stage 16.
286   if (size_log2 == 6) {
287     for (int i = 0; i < 2; ++i) {
288       for (int j = 0; j < 4; ++j) {
289         ButterflyRotation_C(
290             dst, 61 - MultiplyBy8(i) - j, MultiplyBy8(i) + j + 34,
291             56 - MultiplyBy32(i) + MultiplyBy64(DivideBy2(j)), true, range);
292       }
293     }
294   }
295   // stage 17.
296   for (int i = 0; i < 2; ++i) {
297     HadamardRotation_C(dst, i, 3 - i, false, range);
298   }
299   // stage 18.
300   if (size_log2 >= 3) {
301     ButterflyRotation_C(dst, 6, 5, 32, true, range);
302   }
303   // stage 19.
304   if (size_log2 >= 4) {
305     for (int i = 0; i < 2; ++i) {
306       for (int j = 0; j < 2; ++j) {
307         HadamardRotation_C(dst, MultiplyBy4(i) + j + 8, MultiplyBy4(i) - j + 11,
308                            /*flip=*/i != 0, range);
309       }
310     }
311   }
312   // stage 20.
313   if (size_log2 >= 5) {
314     for (int i = 0; i < 4; ++i) {
315       ButterflyRotation_C(dst, 29 - i, i + 18, 48 + 64 * DivideBy2(i), true,
316                           range);
317     }
318   }
319   // stage 21.
320   if (size_log2 == 6) {
321     for (int i = 0; i < 4; ++i) {
322       for (int j = 0; j < 4; ++j) {
323         HadamardRotation_C(dst, MultiplyBy8(i) + j + 32,
324                            MultiplyBy8(i) - j + 39, static_cast<bool>(i & 1),
325                            range);
326       }
327     }
328   }
329   // stage 22.
330   if (size_log2 >= 3) {
331     for (int i = 0; i < 4; ++i) {
332       HadamardRotation_C(dst, i, 7 - i, false, range);
333     }
334   }
335   // stage 23.
336   if (size_log2 >= 4) {
337     for (int i = 0; i < 2; ++i) {
338       ButterflyRotation_C(dst, 13 - i, i + 10, 32, true, range);
339     }
340   }
341   // stage 24.
342   if (size_log2 >= 5) {
343     for (int i = 0; i < 2; ++i) {
344       for (int j = 0; j < 4; ++j) {
345         HadamardRotation_C(dst, MultiplyBy8(i) + j + 16,
346                            MultiplyBy8(i) - j + 23, i == 1, range);
347       }
348     }
349   }
350   // stage 25.
351   if (size_log2 == 6) {
352     for (int i = 0; i < 8; ++i) {
353       ButterflyRotation_C(dst, 59 - i, i + 36, (i < 4) ? 48 : 112, true, range);
354     }
355   }
356   // stage 26.
357   if (size_log2 >= 4) {
358     for (int i = 0; i < 8; ++i) {
359       HadamardRotation_C(dst, i, 15 - i, false, range);
360     }
361   }
362   // stage 27.
363   if (size_log2 >= 5) {
364     for (int i = 0; i < 4; ++i) {
365       ButterflyRotation_C(dst, 27 - i, i + 20, 32, true, range);
366     }
367   }
368   // stage 28.
369   if (size_log2 == 6) {
370     for (int i = 0; i < 8; ++i) {
371       HadamardRotation_C(dst, i + 32, 47 - i, false, range);
372       HadamardRotation_C(dst, i + 48, 63 - i, true, range);
373     }
374   }
375   // stage 29.
376   if (size_log2 >= 5) {
377     for (int i = 0; i < 16; ++i) {
378       HadamardRotation_C(dst, i, 31 - i, false, range);
379     }
380   }
381   // stage 30.
382   if (size_log2 == 6) {
383     for (int i = 0; i < 8; ++i) {
384       ButterflyRotation_C(dst, 55 - i, i + 40, 32, true, range);
385     }
386   }
387   // stage 31.
388   if (size_log2 == 6) {
389     for (int i = 0; i < 32; ++i) {
390       HadamardRotation_C(dst, i, 63 - i, false, range);
391     }
392   }
393 }
394 
395 template <int bitdepth, typename Residual, int size_log2>
DctDcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)396 void DctDcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
397                  bool is_row) {
398   auto* const dst = static_cast<Residual*>(dest);
399 
400   if (is_row && should_round) {
401     dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
402   }
403 
404   ButterflyRotationSecondIsZero_C(dst, 0, 1, 32, true, range);
405 
406   if (is_row && row_shift > 0) {
407     dst[0] = RightShiftWithRounding(dst[0], row_shift);
408   }
409 
410   ClampIntermediate<bitdepth, Residual>(dst, 1);
411 
412   const int size = 1 << size_log2;
413   for (int i = 1; i < size; ++i) {
414     dst[i] = dst[0];
415   }
416 }
417 
418 //------------------------------------------------------------------------------
419 // Asymmetric Discrete Sine Transforms (ADST).
420 
421 /*
422  * Row transform max range in bits for bitdepths 8/10/12: 28/30/32.
423  * Column transform max range in bits for bitdepths 8/10/12: 28/28/30.
424  */
425 template <typename Residual>
Adst4_C(void * dest,int8_t range)426 void Adst4_C(void* dest, int8_t range) {
427   auto* const dst = static_cast<Residual*>(dest);
428   if ((dst[0] | dst[1] | dst[2] | dst[3]) == 0) {
429     return;
430   }
431 
432   // stage 1.
433   // Section 7.13.2.6: It is a requirement of bitstream conformance that all
434   // values stored in the s and x arrays by this process are representable by
435   // a signed integer using range + 12 bits of precision.
436   int32_t s[7];
437   s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
438   s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
439   s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[1], range + 12);
440   s[3] = RangeCheckValue(kAdst4Multiplier[3] * dst[2], range + 12);
441   s[4] = RangeCheckValue(kAdst4Multiplier[0] * dst[2], range + 12);
442   s[5] = RangeCheckValue(kAdst4Multiplier[1] * dst[3], range + 12);
443   s[6] = RangeCheckValue(kAdst4Multiplier[3] * dst[3], range + 12);
444   // stage 2.
445   // Section 7.13.2.6: It is a requirement of bitstream conformance that
446   // values stored in the variable a7 by this process are representable by a
447   // signed integer using range + 1 bits of precision.
448   const int32_t a7 = RangeCheckValue(dst[0] - dst[2], range + 1);
449   // Section 7.13.2.6: It is a requirement of bitstream conformance that
450   // values stored in the variable b7 by this process are representable by a
451   // signed integer using |range| bits of precision.
452   const int32_t b7 = RangeCheckValue(a7 + dst[3], range);
453   // stage 3.
454   s[0] = RangeCheckValue(s[0] + s[3], range + 12);
455   s[1] = RangeCheckValue(s[1] - s[4], range + 12);
456   s[3] = s[2];
457   s[2] = RangeCheckValue(kAdst4Multiplier[2] * b7, range + 12);
458   // stage 4.
459   s[0] = RangeCheckValue(s[0] + s[5], range + 12);
460   s[1] = RangeCheckValue(s[1] - s[6], range + 12);
461   // stages 5 and 6.
462   const int32_t x0 = RangeCheckValue(s[0] + s[3], range + 12);
463   const int32_t x1 = RangeCheckValue(s[1] + s[3], range + 12);
464   int32_t x3 = RangeCheckValue(s[0] + s[1], range + 12);
465   x3 = RangeCheckValue(x3 - s[3], range + 12);
466   int32_t dst_0 = RightShiftWithRounding(x0, 12);
467   int32_t dst_1 = RightShiftWithRounding(x1, 12);
468   int32_t dst_2 = RightShiftWithRounding(s[2], 12);
469   int32_t dst_3 = RightShiftWithRounding(x3, 12);
470   if (sizeof(Residual) == 2) {
471     // If the first argument to RightShiftWithRounding(..., 12) is only
472     // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
473     // in RightShiftWithRounding(..., 12) will cause the function to return
474     // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
475     dst_0 -= (dst_0 == 0x8000);
476     dst_1 -= (dst_1 == 0x8000);
477     dst_3 -= (dst_3 == 0x8000);
478   }
479   dst[0] = dst_0;
480   dst[1] = dst_1;
481   dst[2] = dst_2;
482   dst[3] = dst_3;
483 }
484 
485 template <int bitdepth, typename Residual>
Adst4DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)486 void Adst4DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
487                    bool is_row) {
488   auto* const dst = static_cast<Residual*>(dest);
489 
490   if (is_row && should_round) {
491     dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
492   }
493 
494   // stage 1.
495   // Section 7.13.2.6: It is a requirement of bitstream conformance that all
496   // values stored in the s and x arrays by this process are representable by
497   // a signed integer using range + 12 bits of precision.
498   int32_t s[3];
499   s[0] = RangeCheckValue(kAdst4Multiplier[0] * dst[0], range + 12);
500   s[1] = RangeCheckValue(kAdst4Multiplier[1] * dst[0], range + 12);
501   s[2] = RangeCheckValue(kAdst4Multiplier[2] * dst[0], range + 12);
502   // stage 3.
503   // stage 4.
504   // stages 5 and 6.
505   int32_t dst_0 = RightShiftWithRounding(s[0], 12);
506   int32_t dst_1 = RightShiftWithRounding(s[1], 12);
507   int32_t dst_2 = RightShiftWithRounding(s[2], 12);
508   int32_t dst_3 =
509       RightShiftWithRounding(RangeCheckValue(s[0] + s[1], range + 12), 12);
510   if (sizeof(Residual) == 2) {
511     // If the first argument to RightShiftWithRounding(..., 12) is only
512     // slightly smaller than 2^27 - 1 (e.g., 0x7fffe4e), adding 2^11 to it
513     // in RightShiftWithRounding(..., 12) will cause the function to return
514     // 0x8000, which cannot be represented as an int16_t. Change it to 0x7fff.
515     dst_0 -= (dst_0 == 0x8000);
516     dst_1 -= (dst_1 == 0x8000);
517     dst_3 -= (dst_3 == 0x8000);
518   }
519   dst[0] = dst_0;
520   dst[1] = dst_1;
521   dst[2] = dst_2;
522   dst[3] = dst_3;
523 
524   const int size = 4;
525   if (is_row && row_shift > 0) {
526     for (int j = 0; j < size; ++j) {
527       dst[j] = RightShiftWithRounding(dst[j], row_shift);
528     }
529   }
530 
531   ClampIntermediate<bitdepth, Residual>(dst, 4);
532 }
533 
534 template <typename Residual>
AdstInputPermutation(int32_t * const dst,const Residual * const src,int n)535 void AdstInputPermutation(int32_t* const dst, const Residual* const src,
536                           int n) {
537   assert(n == 8 || n == 16);
538   for (int i = 0; i < n; ++i) {
539     dst[i] = src[((i & 1) == 0) ? n - i - 1 : i - 1];
540   }
541 }
542 
543 constexpr int8_t kAdstOutputPermutationLookup[16] = {
544     0, 8, 12, 4, 6, 14, 10, 2, 3, 11, 15, 7, 5, 13, 9, 1};
545 
546 template <typename Residual>
AdstOutputPermutation(Residual * const dst,const int32_t * const src,int n)547 void AdstOutputPermutation(Residual* const dst, const int32_t* const src,
548                            int n) {
549   assert(n == 8 || n == 16);
550   const auto shift = static_cast<int8_t>(n == 8);
551   for (int i = 0; i < n; ++i) {
552     const int8_t index = kAdstOutputPermutationLookup[i] >> shift;
553     int32_t dst_i = ((i & 1) == 0) ? src[index] : -src[index];
554     if (sizeof(Residual) == 2) {
555       // If i is odd and src[index] is -32768, dst_i will be 32768, which
556       // cannot be represented as an int16_t.
557       dst_i -= (dst_i == 0x8000);
558     }
559     dst[i] = dst_i;
560   }
561 }
562 
563 template <typename Residual>
Adst8_C(void * dest,int8_t range)564 void Adst8_C(void* dest, int8_t range) {
565   auto* const dst = static_cast<Residual*>(dest);
566   // stage 1.
567   int32_t temp[8];
568   AdstInputPermutation(temp, dst, 8);
569   // stage 2.
570   for (int i = 0; i < 4; ++i) {
571     ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 60 - 16 * i,
572                         true, range);
573   }
574   // stage 3.
575   for (int i = 0; i < 4; ++i) {
576     HadamardRotation_C(temp, i, i + 4, false, range);
577   }
578   // stage 4.
579   for (int i = 0; i < 2; ++i) {
580     ButterflyRotation_C(temp, i * 3 + 4, i + 5, 48 - 32 * i, true, range);
581   }
582   // stage 5.
583   for (int i = 0; i < 2; ++i) {
584     for (int j = 0; j < 2; ++j) {
585       HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
586                          false, range);
587     }
588   }
589   // stage 6.
590   for (int i = 0; i < 2; ++i) {
591     ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
592                         range);
593   }
594   // stage 7.
595   AdstOutputPermutation(dst, temp, 8);
596 }
597 
598 template <int bitdepth, typename Residual>
Adst8DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)599 void Adst8DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
600                    bool is_row) {
601   auto* const dst = static_cast<Residual*>(dest);
602 
603   // stage 1.
604   int32_t temp[8];
605   // After the permutation, the dc value is in temp[1]. The remaining are zero.
606   AdstInputPermutation(temp, dst, 8);
607 
608   if (is_row && should_round) {
609     temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
610   }
611 
612   // stage 2.
613   ButterflyRotationFirstIsZero_C(temp, 0, 1, 60, true, range);
614 
615   // stage 3.
616   temp[4] = temp[0];
617   temp[5] = temp[1];
618 
619   // stage 4.
620   ButterflyRotation_C(temp, 4, 5, 48, true, range);
621 
622   // stage 5.
623   temp[2] = temp[0];
624   temp[3] = temp[1];
625   temp[6] = temp[4];
626   temp[7] = temp[5];
627 
628   // stage 6.
629   ButterflyRotation_C(temp, 2, 3, 32, true, range);
630   ButterflyRotation_C(temp, 6, 7, 32, true, range);
631 
632   // stage 7.
633   AdstOutputPermutation(dst, temp, 8);
634 
635   const int size = 8;
636   if (is_row && row_shift > 0) {
637     for (int j = 0; j < size; ++j) {
638       dst[j] = RightShiftWithRounding(dst[j], row_shift);
639     }
640   }
641 
642   ClampIntermediate<bitdepth, Residual>(dst, 8);
643 }
644 
645 template <typename Residual>
Adst16_C(void * dest,int8_t range)646 void Adst16_C(void* dest, int8_t range) {
647   auto* const dst = static_cast<Residual*>(dest);
648   // stage 1.
649   int32_t temp[16];
650   AdstInputPermutation(temp, dst, 16);
651   // stage 2.
652   for (int i = 0; i < 8; ++i) {
653     ButterflyRotation_C(temp, MultiplyBy2(i), MultiplyBy2(i) + 1, 62 - 8 * i,
654                         true, range);
655   }
656   // stage 3.
657   for (int i = 0; i < 8; ++i) {
658     HadamardRotation_C(temp, i, i + 8, false, range);
659   }
660   // stage 4.
661   for (int i = 0; i < 2; ++i) {
662     ButterflyRotation_C(temp, MultiplyBy2(i) + 8, MultiplyBy2(i) + 9,
663                         56 - 32 * i, true, range);
664     ButterflyRotation_C(temp, MultiplyBy2(i) + 13, MultiplyBy2(i) + 12,
665                         8 + 32 * i, true, range);
666   }
667   // stage 5.
668   for (int i = 0; i < 4; ++i) {
669     for (int j = 0; j < 2; ++j) {
670       HadamardRotation_C(temp, i + MultiplyBy8(j), i + MultiplyBy8(j) + 4,
671                          false, range);
672     }
673   }
674   // stage 6.
675   for (int i = 0; i < 2; ++i) {
676     for (int j = 0; j < 2; ++j) {
677       ButterflyRotation_C(temp, i * 3 + MultiplyBy8(j) + 4,
678                           i + MultiplyBy8(j) + 5, 48 - 32 * i, true, range);
679     }
680   }
681   // stage 7.
682   for (int i = 0; i < 2; ++i) {
683     for (int j = 0; j < 4; ++j) {
684       HadamardRotation_C(temp, i + MultiplyBy4(j), i + MultiplyBy4(j) + 2,
685                          false, range);
686     }
687   }
688   // stage 8.
689   for (int i = 0; i < 4; ++i) {
690     ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
691                         range);
692   }
693   // stage 9.
694   AdstOutputPermutation(dst, temp, 16);
695 }
696 
697 template <int bitdepth, typename Residual>
Adst16DcOnly_C(void * dest,int8_t range,bool should_round,int row_shift,bool is_row)698 void Adst16DcOnly_C(void* dest, int8_t range, bool should_round, int row_shift,
699                     bool is_row) {
700   auto* const dst = static_cast<Residual*>(dest);
701 
702   // stage 1.
703   int32_t temp[16];
704   // After the permutation, the dc value is in temp[1].  The remaining are zero.
705   AdstInputPermutation(temp, dst, 16);
706 
707   if (is_row && should_round) {
708     temp[1] = RightShiftWithRounding(temp[1] * kTransformRowMultiplier, 12);
709   }
710 
711   // stage 2.
712   ButterflyRotationFirstIsZero_C(temp, 0, 1, 62, true, range);
713 
714   // stage 3.
715   temp[8] = temp[0];
716   temp[9] = temp[1];
717 
718   // stage 4.
719   ButterflyRotation_C(temp, 8, 9, 56, true, range);
720 
721   // stage 5.
722   temp[4] = temp[0];
723   temp[5] = temp[1];
724   temp[12] = temp[8];
725   temp[13] = temp[9];
726 
727   // stage 6.
728   ButterflyRotation_C(temp, 4, 5, 48, true, range);
729   ButterflyRotation_C(temp, 12, 13, 48, true, range);
730 
731   // stage 7.
732   temp[2] = temp[0];
733   temp[3] = temp[1];
734   temp[10] = temp[8];
735   temp[11] = temp[9];
736 
737   temp[6] = temp[4];
738   temp[7] = temp[5];
739   temp[14] = temp[12];
740   temp[15] = temp[13];
741 
742   // stage 8.
743   for (int i = 0; i < 4; ++i) {
744     ButterflyRotation_C(temp, MultiplyBy4(i) + 2, MultiplyBy4(i) + 3, 32, true,
745                         range);
746   }
747 
748   // stage 9.
749   AdstOutputPermutation(dst, temp, 16);
750 
751   const int size = 16;
752   if (is_row && row_shift > 0) {
753     for (int j = 0; j < size; ++j) {
754       dst[j] = RightShiftWithRounding(dst[j], row_shift);
755     }
756   }
757 
758   ClampIntermediate<bitdepth, Residual>(dst, 16);
759 }
760 
761 //------------------------------------------------------------------------------
762 // Identity Transforms.
763 //
764 // In the spec, the inverse identity transform is followed by a Round2() call:
765 //   The row transforms with i = 0..(h-1) are applied as follows:
766 //     ...
767 //     * Otherwise, invoke the inverse identity transform process specified in
768 //       section 7.13.2.15 with the input variable n equal to log2W.
769 //     * Set Residual[ i ][ j ] equal to Round2( T[ j ], rowShift )
770 //       for j = 0..(w-1).
771 //   ...
772 //   The column transforms with j = 0..(w-1) are applied as follows:
773 //     ...
774 //     * Otherwise, invoke the inverse identity transform process specified in
775 //       section 7.13.2.15 with the input variable n equal to log2H.
776 //     * Residual[ i ][ j ] is set equal to Round2( T[ i ], colShift )
777 //       for i = 0..(h-1).
778 //
779 // Therefore, we define the identity transform functions to perform both the
780 // inverse identity transform and the Round2() call. This has two advantages:
781 // 1. The outputs of the inverse identity transform do not need to be stored
782 //    in the Residual array. They can be stored in int32_t local variables,
783 //    which have a larger range if Residual is an int16_t array.
784 // 2. The inverse identity transform and the Round2() call can be jointly
785 //    optimized.
786 //
787 // The identity transform functions have the following prototype:
788 //   void Identity_C(void* dest, int8_t shift);
789 //
790 // The |shift| parameter is the amount of shift for the Round2() call. For row
791 // transforms, |shift| is 0, 1, or 2. For column transforms, |shift| is always
792 // 4. Therefore, an identity transform function can detect whether it is being
793 // invoked as a row transform or a column transform by checking whether |shift|
794 // is equal to 4.
795 //
796 // Input Range
797 //
798 // The inputs of row transforms, stored in the 2D array Dequant, are
799 // representable by a signed integer using 8 + BitDepth bits of precision:
800 //   f. Dequant[ i ][ j ] is set equal to
801 //   Clip3( - ( 1 << ( 7 + BitDepth ) ), ( 1 << ( 7 + BitDepth ) ) - 1, dq2 ).
802 //
803 // The inputs of column transforms are representable by a signed integer using
804 // Max( BitDepth + 6, 16 ) bits of precision:
805 //   Set the variable colClampRange equal to Max( BitDepth + 6, 16 ).
806 //   ...
807 //   Between the row and column transforms, Residual[ i ][ j ] is set equal to
808 //   Clip3( - ( 1 << ( colClampRange - 1 ) ),
809 //          ( 1 << (colClampRange - 1 ) ) - 1,
810 //          Residual[ i ][ j ] )
811 //   for i = 0..(h-1), for j = 0..(w-1).
812 //
813 // Output Range
814 //
815 // The outputs of row transforms are representable by a signed integer using
816 // 8 + BitDepth + 1 = 9 + BitDepth bits of precision, because the net effect
817 // of the multiplicative factor of inverse identity transforms minus the
818 // smallest row shift is an increase of at most one bit.
819 //
820 // Transform | Multiplicative factor | Smallest row | Net increase
821 // width     | (in bits)             | shift        | in bits
822 // ---------------------------------------------------------------
823 //     4     |  sqrt(2)  (0.5 bits)  |      0       |    +0.5
824 //     8     |     2     (1 bit)     |      0       |    +1
825 //    16     | 2*sqrt(2) (1.5 bits)  |      1       |    +0.5
826 //    32     |     4     (2 bits)    |      1       |    +1
827 //
828 // If BitDepth is 8 and Residual is an int16_t array, to avoid truncation we
829 // clip the outputs (which have 17 bits of precision) to the range of int16_t
830 // before storing them in the Residual array. This clipping happens to be the
831 // same as the required clipping after the row transform (see the spec quoted
832 // above), so we remain compliant with the spec. (In this case,
833 // TransformLoop_C() skips clipping the outputs of row transforms to avoid
834 // duplication of effort.)
835 //
836 // The outputs of column transforms are representable by a signed integer using
837 // Max( BitDepth + 6, 16 ) + 2 - 4 = Max( BitDepth + 4, 14 ) bits of precision,
838 // because the multiplicative factor of inverse identity transforms is at most
839 // 4 (2 bits) and |shift| is always 4.
840 
841 template <typename Residual>
Identity4Row_C(void * dest,int8_t shift)842 void Identity4Row_C(void* dest, int8_t shift) {
843   assert(shift == 0 || shift == 1);
844   auto* const dst = static_cast<Residual*>(dest);
845   // If |shift| is 0, |rounding| should be 1 << 11. If |shift| is 1, |rounding|
846   // should be (1 + (1 << 1)) << 11. The following expression works for both
847   // values of |shift|.
848   const int32_t rounding = (1 + (shift << 1)) << 11;
849   for (int i = 0; i < 4; ++i) {
850     // The intermediate value here will have to fit into an int32_t for it to be
851     // bitstream conformant. The multiplication is promoted to int32_t by
852     // defining kIdentity4Multiplier as int32_t.
853     int32_t dst_i = (dst[i] * kIdentity4Multiplier + rounding) >> (12 + shift);
854     if (sizeof(Residual) == 2) {
855       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
856     }
857     dst[i] = static_cast<Residual>(dst_i);
858   }
859 }
860 
861 template <typename Residual>
Identity4Column_C(void * dest,int8_t)862 void Identity4Column_C(void* dest, int8_t /*shift*/) {
863   auto* const dst = static_cast<Residual*>(dest);
864   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
865   for (int i = 0; i < 4; ++i) {
866     // The intermediate value here will have to fit into an int32_t for it to be
867     // bitstream conformant. The multiplication is promoted to int32_t by
868     // defining kIdentity4Multiplier as int32_t.
869     dst[i] = static_cast<Residual>((dst[i] * kIdentity4Multiplier + rounding) >>
870                                    (12 + kTransformColumnShift));
871   }
872 }
873 
874 template <int bitdepth, typename Residual>
Identity4DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)875 void Identity4DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
876                        int row_shift, bool is_row) {
877   auto* const dst = static_cast<Residual*>(dest);
878 
879   if (is_row) {
880     if (should_round) {
881       dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
882     }
883 
884     const int32_t rounding = (1 + (row_shift << 1)) << 11;
885     int32_t dst_i =
886         (dst[0] * kIdentity4Multiplier + rounding) >> (12 + row_shift);
887     if (sizeof(Residual) == 2) {
888       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
889     }
890     dst[0] = static_cast<Residual>(dst_i);
891 
892     ClampIntermediate<bitdepth, Residual>(dst, 1);
893     return;
894   }
895 
896   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
897   dst[0] = static_cast<Residual>((dst[0] * kIdentity4Multiplier + rounding) >>
898                                  (12 + kTransformColumnShift));
899 }
900 
901 template <typename Residual>
Identity8Row_C(void * dest,int8_t shift)902 void Identity8Row_C(void* dest, int8_t shift) {
903   assert(shift == 0 || shift == 1 || shift == 2);
904   auto* const dst = static_cast<Residual*>(dest);
905   for (int i = 0; i < 8; ++i) {
906     int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[i]), shift);
907     if (sizeof(Residual) == 2) {
908       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
909     }
910     dst[i] = static_cast<Residual>(dst_i);
911   }
912 }
913 
914 template <typename Residual>
Identity8Column_C(void * dest,int8_t)915 void Identity8Column_C(void* dest, int8_t /*shift*/) {
916   auto* const dst = static_cast<Residual*>(dest);
917   for (int i = 0; i < 8; ++i) {
918     dst[i] = static_cast<Residual>(
919         RightShiftWithRounding(dst[i], kTransformColumnShift - 1));
920   }
921 }
922 
923 template <int bitdepth, typename Residual>
Identity8DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)924 void Identity8DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
925                        int row_shift, bool is_row) {
926   auto* const dst = static_cast<Residual*>(dest);
927 
928   if (is_row) {
929     if (should_round) {
930       dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
931     }
932 
933     int32_t dst_i = RightShiftWithRounding(MultiplyBy2(dst[0]), row_shift);
934     if (sizeof(Residual) == 2) {
935       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
936     }
937     dst[0] = static_cast<Residual>(dst_i);
938 
939     // If Residual is int16_t (which implies bitdepth is 8), we don't need to
940     // clip residual[i][j] to 16 bits.
941     if (sizeof(Residual) > 2) {
942       const Residual intermediate_clamp_max =
943           (1 << (std::max(bitdepth + 6, 16) - 1)) - 1;
944       const Residual intermediate_clamp_min = -intermediate_clamp_max - 1;
945       dst[0] = Clip3(dst[0], intermediate_clamp_min, intermediate_clamp_max);
946     }
947     return;
948   }
949 
950   dst[0] = static_cast<Residual>(
951       RightShiftWithRounding(dst[0], kTransformColumnShift - 1));
952 }
953 
954 template <typename Residual>
Identity16Row_C(void * dest,int8_t shift)955 void Identity16Row_C(void* dest, int8_t shift) {
956   assert(shift == 1 || shift == 2);
957   auto* const dst = static_cast<Residual*>(dest);
958   const int32_t rounding = (1 + (1 << shift)) << 11;
959   for (int i = 0; i < 16; ++i) {
960     // The intermediate value here will have to fit into an int32_t for it to be
961     // bitstream conformant. The multiplication is promoted to int32_t by
962     // defining kIdentity16Multiplier as int32_t.
963     int32_t dst_i = (dst[i] * kIdentity16Multiplier + rounding) >> (12 + shift);
964     if (sizeof(Residual) == 2) {
965       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
966     }
967     dst[i] = static_cast<Residual>(dst_i);
968   }
969 }
970 
971 template <typename Residual>
Identity16Column_C(void * dest,int8_t)972 void Identity16Column_C(void* dest, int8_t /*shift*/) {
973   auto* const dst = static_cast<Residual*>(dest);
974   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
975   for (int i = 0; i < 16; ++i) {
976     // The intermediate value here will have to fit into an int32_t for it to be
977     // bitstream conformant. The multiplication is promoted to int32_t by
978     // defining kIdentity16Multiplier as int32_t.
979     dst[i] =
980         static_cast<Residual>((dst[i] * kIdentity16Multiplier + rounding) >>
981                               (12 + kTransformColumnShift));
982   }
983 }
984 
985 template <int bitdepth, typename Residual>
Identity16DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)986 void Identity16DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
987                         int row_shift, bool is_row) {
988   auto* const dst = static_cast<Residual*>(dest);
989 
990   if (is_row) {
991     if (should_round) {
992       dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
993     }
994 
995     const int32_t rounding = (1 + (1 << row_shift)) << 11;
996     int32_t dst_i =
997         (dst[0] * kIdentity16Multiplier + rounding) >> (12 + row_shift);
998     if (sizeof(Residual) == 2) {
999       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1000     }
1001     dst[0] = static_cast<Residual>(dst_i);
1002 
1003     ClampIntermediate<bitdepth, Residual>(dst, 1);
1004     return;
1005   }
1006 
1007   const int32_t rounding = (1 + (1 << kTransformColumnShift)) << 11;
1008   dst[0] = static_cast<Residual>((dst[0] * kIdentity16Multiplier + rounding) >>
1009                                  (12 + kTransformColumnShift));
1010 }
1011 
1012 template <typename Residual>
Identity32Row_C(void * dest,int8_t shift)1013 void Identity32Row_C(void* dest, int8_t shift) {
1014   assert(shift == 1 || shift == 2);
1015   auto* const dst = static_cast<Residual*>(dest);
1016   for (int i = 0; i < 32; ++i) {
1017     int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[i]), shift);
1018     if (sizeof(Residual) == 2) {
1019       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1020     }
1021     dst[i] = static_cast<Residual>(dst_i);
1022   }
1023 }
1024 
1025 template <typename Residual>
Identity32Column_C(void * dest,int8_t)1026 void Identity32Column_C(void* dest, int8_t /*shift*/) {
1027   auto* const dst = static_cast<Residual*>(dest);
1028   for (int i = 0; i < 32; ++i) {
1029     dst[i] = static_cast<Residual>(
1030         RightShiftWithRounding(dst[i], kTransformColumnShift - 2));
1031   }
1032 }
1033 
1034 template <int bitdepth, typename Residual>
Identity32DcOnly_C(void * dest,int8_t,bool should_round,int row_shift,bool is_row)1035 void Identity32DcOnly_C(void* dest, int8_t /*range*/, bool should_round,
1036                         int row_shift, bool is_row) {
1037   auto* const dst = static_cast<Residual*>(dest);
1038 
1039   if (is_row) {
1040     if (should_round) {
1041       dst[0] = RightShiftWithRounding(dst[0] * kTransformRowMultiplier, 12);
1042     }
1043 
1044     int32_t dst_i = RightShiftWithRounding(MultiplyBy4(dst[0]), row_shift);
1045     if (sizeof(Residual) == 2) {
1046       dst_i = Clip3(dst_i, INT16_MIN, INT16_MAX);
1047     }
1048     dst[0] = static_cast<Residual>(dst_i);
1049 
1050     ClampIntermediate<bitdepth, Residual>(dst, 1);
1051     return;
1052   }
1053 
1054   dst[0] = static_cast<Residual>(
1055       RightShiftWithRounding(dst[0], kTransformColumnShift - 2));
1056 }
1057 
1058 //------------------------------------------------------------------------------
1059 // Walsh Hadamard Transform.
1060 
1061 template <typename Residual>
Wht4_C(void * dest,int8_t shift)1062 void Wht4_C(void* dest, int8_t shift) {
1063   auto* const dst = static_cast<Residual*>(dest);
1064   Residual temp[4];
1065   temp[0] = dst[0] >> shift;
1066   temp[2] = dst[1] >> shift;
1067   temp[3] = dst[2] >> shift;
1068   temp[1] = dst[3] >> shift;
1069   temp[0] += temp[2];
1070   temp[3] -= temp[1];
1071   // This signed right shift must be an arithmetic shift.
1072   Residual e = (temp[0] - temp[3]) >> 1;
1073   dst[1] = e - temp[1];
1074   dst[2] = e - temp[2];
1075   dst[0] = temp[0] - dst[1];
1076   dst[3] = temp[3] + dst[2];
1077 }
1078 
1079 template <int bitdepth, typename Residual>
Wht4DcOnly_C(void * dest,int8_t range,bool,int,bool)1080 void Wht4DcOnly_C(void* dest, int8_t range, bool /*should_round*/,
1081                   int /*row_shift*/, bool /*is_row*/) {
1082   auto* const dst = static_cast<Residual*>(dest);
1083   const int shift = range;
1084 
1085   Residual temp = dst[0] >> shift;
1086   // This signed right shift must be an arithmetic shift.
1087   Residual e = temp >> 1;
1088   dst[0] = temp - e;
1089   dst[1] = e;
1090   dst[2] = e;
1091   dst[3] = e;
1092 
1093   ClampIntermediate<bitdepth, Residual>(dst, 4);
1094 }
1095 
1096 //------------------------------------------------------------------------------
1097 // row/column transform loop
1098 
1099 using InverseTransform1DFunc = void (*)(void* dst, int8_t range);
1100 using InverseTransformDcOnlyFunc = void (*)(void* dest, int8_t range,
1101                                             bool should_round, int row_shift,
1102                                             bool is_row);
1103 
1104 template <int bitdepth, typename Residual, typename Pixel,
1105           Transform1D transform1d_type,
1106           InverseTransformDcOnlyFunc dconly_transform1d,
1107           InverseTransform1DFunc transform1d_func, bool is_row>
TransformLoop_C(TransformType tx_type,TransformSize tx_size,int adjusted_tx_height,void * src_buffer,int start_x,int start_y,void * dst_frame)1108 void TransformLoop_C(TransformType tx_type, TransformSize tx_size,
1109                      int adjusted_tx_height, void* src_buffer, int start_x,
1110                      int start_y, void* dst_frame) {
1111   constexpr bool lossless = transform1d_type == k1DTransformWht;
1112   constexpr bool is_identity = transform1d_type == k1DTransformIdentity;
1113   // The transform size of the WHT is always 4x4. Setting tx_width and
1114   // tx_height to the constant 4 for the WHT speeds the code up.
1115   assert(!lossless || tx_size == kTransformSize4x4);
1116   const int tx_width = lossless ? 4 : kTransformWidth[tx_size];
1117   const int tx_height = lossless ? 4 : kTransformHeight[tx_size];
1118   const int tx_width_log2 = kTransformWidthLog2[tx_size];
1119   const int tx_height_log2 = kTransformHeightLog2[tx_size];
1120   auto* frame = static_cast<Array2DView<Pixel>*>(dst_frame);
1121 
1122   // Initially this points to the dequantized values. After the transforms are
1123   // applied, this buffer contains the residual.
1124   Array2DView<Residual> residual(tx_height, tx_width,
1125                                  static_cast<Residual*>(src_buffer));
1126 
1127   if (is_row) {
1128     // Row transform.
1129     const uint8_t row_shift = lossless ? 0 : kTransformRowShift[tx_size];
1130     // This is the |range| parameter of the InverseTransform1DFunc.  For lossy
1131     // transforms, this will be equal to the clamping range.
1132     const int8_t row_clamp_range = lossless ? 2 : (bitdepth + 8);
1133     // If the width:height ratio of the transform size is 2:1 or 1:2, multiply
1134     // the input to the row transform by 1 / sqrt(2), which is approximated by
1135     // the fraction 2896 / 2^12.
1136     const bool should_round = std::abs(tx_width_log2 - tx_height_log2) == 1;
1137 
1138     if (adjusted_tx_height == 1) {
1139       dconly_transform1d(residual[0], row_clamp_range, should_round, row_shift,
1140                          true);
1141       return;
1142     }
1143 
1144     // Row transforms need to be done only up to 32 because the rest of the rows
1145     // are always all zero if |tx_height| is 64.  Otherwise, only process the
1146     // rows that have a non zero coefficients.
1147     for (int i = 0; i < adjusted_tx_height; ++i) {
1148       // If lossless, the transform size is 4x4, so should_round is false.
1149       if (!lossless && should_round) {
1150         // The last 32 values of every row are always zero if the |tx_width| is
1151         // 64.
1152         for (int j = 0; j < std::min(tx_width, 32); ++j) {
1153           residual[i][j] = RightShiftWithRounding(
1154               residual[i][j] * kTransformRowMultiplier, 12);
1155         }
1156       }
1157       // For identity transform, |transform1d_func| also performs the
1158       // Round2(T[j], rowShift) call in the spec.
1159       transform1d_func(residual[i], is_identity ? row_shift : row_clamp_range);
1160       if (!lossless && !is_identity && row_shift > 0) {
1161         for (int j = 0; j < tx_width; ++j) {
1162           residual[i][j] = RightShiftWithRounding(residual[i][j], row_shift);
1163         }
1164       }
1165 
1166       ClampIntermediate<bitdepth, Residual>(residual[i], tx_width);
1167     }
1168     return;
1169   }
1170 
1171   assert(!is_row);
1172   constexpr uint8_t column_shift = lossless ? 0 : kTransformColumnShift;
1173   // This is the |range| parameter of the InverseTransform1DFunc.  For lossy
1174   // transforms, this will be equal to the clamping range.
1175   const int8_t column_clamp_range = lossless ? 0 : std::max(bitdepth + 6, 16);
1176   const bool flip_rows = transform1d_type == k1DTransformAdst &&
1177                          kTransformFlipRowsMask.Contains(tx_type);
1178   const bool flip_columns =
1179       !lossless && kTransformFlipColumnsMask.Contains(tx_type);
1180   const int min_value = 0;
1181   const int max_value = (1 << bitdepth) - 1;
1182   // Note: 64 is the maximum size of a 1D transform buffer (the largest
1183   // transform size is kTransformSize64x64).
1184   Residual tx_buffer[64];
1185   for (int j = 0; j < tx_width; ++j) {
1186     const int flipped_j = flip_columns ? tx_width - j - 1 : j;
1187     int i = 0;
1188     do {
1189       tx_buffer[i] = residual[i][flipped_j];
1190     } while (++i != tx_height);
1191     if (adjusted_tx_height == 1) {
1192       dconly_transform1d(tx_buffer, column_clamp_range, false, 0, false);
1193     } else {
1194       // For identity transform, |transform1d_func| also performs the
1195       // Round2(T[i], colShift) call in the spec.
1196       transform1d_func(tx_buffer,
1197                        is_identity ? column_shift : column_clamp_range);
1198     }
1199     const int x = start_x + j;
1200     for (int i = 0; i < tx_height; ++i) {
1201       const int y = start_y + i;
1202       const int index = flip_rows ? tx_height - i - 1 : i;
1203       Residual residual_value = tx_buffer[index];
1204       if (!lossless && !is_identity) {
1205         residual_value = RightShiftWithRounding(residual_value, column_shift);
1206       }
1207       (*frame)[y][x] =
1208           Clip3((*frame)[y][x] + residual_value, min_value, max_value);
1209     }
1210   }
1211 }
1212 
1213 //------------------------------------------------------------------------------
1214 
1215 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1216 template <int bitdepth, typename Residual, typename Pixel>
InitAll(Dsp * const dsp)1217 void InitAll(Dsp* const dsp) {
1218   // Maximum transform size for Dct is 64.
1219   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
1220       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1221                       DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1222                       /*is_row=*/true>;
1223   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
1224       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1225                       DctDcOnly_C<bitdepth, Residual, 2>, Dct_C<Residual, 2>,
1226                       /*is_row=*/false>;
1227   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
1228       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1229                       DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1230                       /*is_row=*/true>;
1231   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
1232       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1233                       DctDcOnly_C<bitdepth, Residual, 3>, Dct_C<Residual, 3>,
1234                       /*is_row=*/false>;
1235   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
1236       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1237                       DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1238                       /*is_row=*/true>;
1239   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
1240       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1241                       DctDcOnly_C<bitdepth, Residual, 4>, Dct_C<Residual, 4>,
1242                       /*is_row=*/false>;
1243   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
1244       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1245                       DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1246                       /*is_row=*/true>;
1247   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
1248       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1249                       DctDcOnly_C<bitdepth, Residual, 5>, Dct_C<Residual, 5>,
1250                       /*is_row=*/false>;
1251   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
1252       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1253                       DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1254                       /*is_row=*/true>;
1255   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
1256       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformDct,
1257                       DctDcOnly_C<bitdepth, Residual, 6>, Dct_C<Residual, 6>,
1258                       /*is_row=*/false>;
1259 
1260   // Maximum transform size for Adst is 16.
1261   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
1262       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1263                       Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1264                       /*is_row=*/true>;
1265   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
1266       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1267                       Adst4DcOnly_C<bitdepth, Residual>, Adst4_C<Residual>,
1268                       /*is_row=*/false>;
1269   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
1270       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1271                       Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1272                       /*is_row=*/true>;
1273   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
1274       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1275                       Adst8DcOnly_C<bitdepth, Residual>, Adst8_C<Residual>,
1276                       /*is_row=*/false>;
1277   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
1278       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1279                       Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1280                       /*is_row=*/true>;
1281   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
1282       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformAdst,
1283                       Adst16DcOnly_C<bitdepth, Residual>, Adst16_C<Residual>,
1284                       /*is_row=*/false>;
1285 
1286   // Maximum transform size for Identity transform is 32.
1287   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
1288       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1289                       Identity4DcOnly_C<bitdepth, Residual>,
1290                       Identity4Row_C<Residual>, /*is_row=*/true>;
1291   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
1292       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1293                       Identity4DcOnly_C<bitdepth, Residual>,
1294                       Identity4Column_C<Residual>, /*is_row=*/false>;
1295   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
1296       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1297                       Identity8DcOnly_C<bitdepth, Residual>,
1298                       Identity8Row_C<Residual>, /*is_row=*/true>;
1299   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
1300       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1301                       Identity8DcOnly_C<bitdepth, Residual>,
1302                       Identity8Column_C<Residual>, /*is_row=*/false>;
1303   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
1304       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1305                       Identity16DcOnly_C<bitdepth, Residual>,
1306                       Identity16Row_C<Residual>, /*is_row=*/true>;
1307   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
1308       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1309                       Identity16DcOnly_C<bitdepth, Residual>,
1310                       Identity16Column_C<Residual>, /*is_row=*/false>;
1311   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
1312       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1313                       Identity32DcOnly_C<bitdepth, Residual>,
1314                       Identity32Row_C<Residual>, /*is_row=*/true>;
1315   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
1316       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformIdentity,
1317                       Identity32DcOnly_C<bitdepth, Residual>,
1318                       Identity32Column_C<Residual>, /*is_row=*/false>;
1319 
1320   // Maximum transform size for Wht is 4.
1321   dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
1322       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformWht,
1323                       Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1324                       /*is_row=*/true>;
1325   dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
1326       TransformLoop_C<bitdepth, Residual, Pixel, k1DTransformWht,
1327                       Wht4DcOnly_C<bitdepth, Residual>, Wht4_C<Residual>,
1328                       /*is_row=*/false>;
1329 }
1330 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1331 
Init8bpp()1332 void Init8bpp() {
1333   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
1334   assert(dsp != nullptr);
1335   for (auto& inverse_transform_by_size : dsp->inverse_transforms) {
1336     for (auto& inverse_transform : inverse_transform_by_size) {
1337       inverse_transform[kRow] = nullptr;
1338       inverse_transform[kColumn] = nullptr;
1339     }
1340   }
1341 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1342   InitAll<8, int16_t, uint8_t>(dsp);
1343 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1344 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformDct
1345   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
1346       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1347                       DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1348                       /*is_row=*/true>;
1349   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
1350       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1351                       DctDcOnly_C<8, int16_t, 2>, Dct_C<int16_t, 2>,
1352                       /*is_row=*/false>;
1353 #endif
1354 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformDct
1355   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
1356       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1357                       DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1358                       /*is_row=*/true>;
1359   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
1360       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1361                       DctDcOnly_C<8, int16_t, 3>, Dct_C<int16_t, 3>,
1362                       /*is_row=*/false>;
1363 #endif
1364 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformDct
1365   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
1366       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1367                       DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1368                       /*is_row=*/true>;
1369   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
1370       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1371                       DctDcOnly_C<8, int16_t, 4>, Dct_C<int16_t, 4>,
1372                       /*is_row=*/false>;
1373 #endif
1374 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformDct
1375   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
1376       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1377                       DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1378                       /*is_row=*/true>;
1379   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
1380       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1381                       DctDcOnly_C<8, int16_t, 5>, Dct_C<int16_t, 5>,
1382                       /*is_row=*/false>;
1383 #endif
1384 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize64_1DTransformDct
1385   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
1386       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1387                       DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1388                       /*is_row=*/true>;
1389   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
1390       TransformLoop_C<8, int16_t, uint8_t, k1DTransformDct,
1391                       DctDcOnly_C<8, int16_t, 6>, Dct_C<int16_t, 6>,
1392                       /*is_row=*/false>;
1393 #endif
1394 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformAdst
1395   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
1396       TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1397                       Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1398                       /*is_row=*/true>;
1399   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
1400       TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1401                       Adst4DcOnly_C<8, int16_t>, Adst4_C<int16_t>,
1402                       /*is_row=*/false>;
1403 #endif
1404 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformAdst
1405   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
1406       TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1407                       Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1408                       /*is_row=*/true>;
1409   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
1410       TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1411                       Adst8DcOnly_C<8, int16_t>, Adst8_C<int16_t>,
1412                       /*is_row=*/false>;
1413 #endif
1414 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformAdst
1415   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
1416       TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1417                       Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1418                       /*is_row=*/true>;
1419   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
1420       TransformLoop_C<8, int16_t, uint8_t, k1DTransformAdst,
1421                       Adst16DcOnly_C<8, int16_t>, Adst16_C<int16_t>,
1422                       /*is_row=*/false>;
1423 #endif
1424 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformIdentity
1425   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
1426       TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1427                       Identity4DcOnly_C<8, int16_t>, Identity4Row_C<int16_t>,
1428                       /*is_row=*/true>;
1429   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
1430       TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1431                       Identity4DcOnly_C<8, int16_t>, Identity4Column_C<int16_t>,
1432                       /*is_row=*/false>;
1433 #endif
1434 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize8_1DTransformIdentity
1435   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
1436       TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1437                       Identity8DcOnly_C<8, int16_t>, Identity8Row_C<int16_t>,
1438                       /*is_row=*/true>;
1439   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
1440       TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1441                       Identity8DcOnly_C<8, int16_t>, Identity8Column_C<int16_t>,
1442                       /*is_row=*/false>;
1443 #endif
1444 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize16_1DTransformIdentity
1445   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
1446       TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1447                       Identity16DcOnly_C<8, int16_t>, Identity16Row_C<int16_t>,
1448                       /*is_row=*/true>;
1449   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
1450       TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1451                       Identity16DcOnly_C<8, int16_t>,
1452                       Identity16Column_C<int16_t>, /*is_row=*/false>;
1453 #endif
1454 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize32_1DTransformIdentity
1455   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
1456       TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1457                       Identity32DcOnly_C<8, int16_t>, Identity32Row_C<int16_t>,
1458                       /*is_row=*/true>;
1459   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
1460       TransformLoop_C<8, int16_t, uint8_t, k1DTransformIdentity,
1461                       Identity32DcOnly_C<8, int16_t>,
1462                       Identity32Column_C<int16_t>, /*is_row=*/false>;
1463 #endif
1464 #ifndef LIBGAV1_Dsp8bpp_1DTransformSize4_1DTransformWht
1465   dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
1466       TransformLoop_C<8, int16_t, uint8_t, k1DTransformWht,
1467                       Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1468                       /*is_row=*/true>;
1469   dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
1470       TransformLoop_C<8, int16_t, uint8_t, k1DTransformWht,
1471                       Wht4DcOnly_C<8, int16_t>, Wht4_C<int16_t>,
1472                       /*is_row=*/false>;
1473 #endif
1474 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1475 }
1476 
1477 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()1478 void Init10bpp() {
1479   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
1480   assert(dsp != nullptr);
1481   for (auto& inverse_transform_by_size : dsp->inverse_transforms) {
1482     for (auto& inverse_transform : inverse_transform_by_size) {
1483       inverse_transform[kRow] = nullptr;
1484       inverse_transform[kColumn] = nullptr;
1485     }
1486   }
1487 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1488   InitAll<10, int32_t, uint16_t>(dsp);
1489 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1490 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformDct
1491   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kRow] =
1492       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1493                       DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1494                       /*is_row=*/true>;
1495   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize4][kColumn] =
1496       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1497                       DctDcOnly_C<10, int32_t, 2>, Dct_C<int32_t, 2>,
1498                       /*is_row=*/false>;
1499 #endif
1500 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformDct
1501   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kRow] =
1502       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1503                       DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1504                       /*is_row=*/true>;
1505   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize8][kColumn] =
1506       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1507                       DctDcOnly_C<10, int32_t, 3>, Dct_C<int32_t, 3>,
1508                       /*is_row=*/false>;
1509 #endif
1510 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformDct
1511   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kRow] =
1512       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1513                       DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1514                       /*is_row=*/true>;
1515   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize16][kColumn] =
1516       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1517                       DctDcOnly_C<10, int32_t, 4>, Dct_C<int32_t, 4>,
1518                       /*is_row=*/false>;
1519 #endif
1520 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize32_1DTransformDct
1521   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kRow] =
1522       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1523                       DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1524                       /*is_row=*/true>;
1525   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize32][kColumn] =
1526       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1527                       DctDcOnly_C<10, int32_t, 5>, Dct_C<int32_t, 5>,
1528                       /*is_row=*/false>;
1529 #endif
1530 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize64_1DTransformDct
1531   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kRow] =
1532       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1533                       DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1534                       /*is_row=*/true>;
1535   dsp->inverse_transforms[k1DTransformDct][k1DTransformSize64][kColumn] =
1536       TransformLoop_C<10, int32_t, uint16_t, k1DTransformDct,
1537                       DctDcOnly_C<10, int32_t, 6>, Dct_C<int32_t, 6>,
1538                       /*is_row=*/false>;
1539 #endif
1540 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformAdst
1541   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kRow] =
1542       TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1543                       Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1544                       /*is_row=*/true>;
1545   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize4][kColumn] =
1546       TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1547                       Adst4DcOnly_C<10, int32_t>, Adst4_C<int32_t>,
1548                       /*is_row=*/false>;
1549 #endif
1550 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformAdst
1551   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kRow] =
1552       TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1553                       Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1554                       /*is_row=*/true>;
1555   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize8][kColumn] =
1556       TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1557                       Adst8DcOnly_C<10, int32_t>, Adst8_C<int32_t>,
1558                       /*is_row=*/false>;
1559 #endif
1560 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformAdst
1561   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kRow] =
1562       TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1563                       Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1564                       /*is_row=*/true>;
1565   dsp->inverse_transforms[k1DTransformAdst][k1DTransformSize16][kColumn] =
1566       TransformLoop_C<10, int32_t, uint16_t, k1DTransformAdst,
1567                       Adst16DcOnly_C<10, int32_t>, Adst16_C<int32_t>,
1568                       /*is_row=*/false>;
1569 #endif
1570 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformIdentity
1571   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kRow] =
1572       TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1573                       Identity4DcOnly_C<10, int32_t>, Identity4Row_C<int32_t>,
1574                       /*is_row=*/true>;
1575   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize4][kColumn] =
1576       TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1577                       Identity4DcOnly_C<10, int32_t>,
1578                       Identity4Column_C<int32_t>, /*is_row=*/false>;
1579 #endif
1580 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize8_1DTransformIdentity
1581   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kRow] =
1582       TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1583                       Identity8DcOnly_C<10, int32_t>, Identity8Row_C<int32_t>,
1584                       /*is_row=*/true>;
1585   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize8][kColumn] =
1586       TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1587                       Identity8DcOnly_C<10, int32_t>,
1588                       Identity8Column_C<int32_t>, /*is_row=*/false>;
1589 #endif
1590 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize16_1DTransformIdentity
1591   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kRow] =
1592       TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1593                       Identity16DcOnly_C<10, int32_t>, Identity16Row_C<int32_t>,
1594                       /*is_row=*/true>;
1595   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize16][kColumn] =
1596       TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1597                       Identity16DcOnly_C<10, int32_t>,
1598                       Identity16Column_C<int32_t>, /*is_row=*/false>;
1599 #endif
1600 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize32_1DTransformIdentity
1601   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kRow] =
1602       TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1603                       Identity32DcOnly_C<10, int32_t>, Identity32Row_C<int32_t>,
1604                       /*is_row=*/true>;
1605   dsp->inverse_transforms[k1DTransformIdentity][k1DTransformSize32][kColumn] =
1606       TransformLoop_C<10, int32_t, uint16_t, k1DTransformIdentity,
1607                       Identity32DcOnly_C<10, int32_t>,
1608                       Identity32Column_C<int32_t>, /*is_row=*/false>;
1609 #endif
1610 #ifndef LIBGAV1_Dsp10bpp_1DTransformSize4_1DTransformWht
1611   dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kRow] =
1612       TransformLoop_C<10, int32_t, uint16_t, k1DTransformWht,
1613                       Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1614                       /*is_row=*/true>;
1615   dsp->inverse_transforms[k1DTransformWht][k1DTransformSize4][kColumn] =
1616       TransformLoop_C<10, int32_t, uint16_t, k1DTransformWht,
1617                       Wht4DcOnly_C<10, int32_t>, Wht4_C<int32_t>,
1618                       /*is_row=*/false>;
1619 #endif
1620 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
1621 }
1622 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
1623 
1624 }  // namespace
1625 
InverseTransformInit_C()1626 void InverseTransformInit_C() {
1627   Init8bpp();
1628 #if LIBGAV1_MAX_BITDEPTH >= 10
1629   Init10bpp();
1630 #endif
1631 
1632   // Local functions that may be unused depending on the optimizations
1633   // available.
1634   static_cast<void>(RangeCheckValue);
1635   static_cast<void>(kBitReverseLookup);
1636 }
1637 
1638 }  // namespace dsp
1639 }  // namespace libgav1
1640