• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15 
16 #include "third_party/googletest/src/include/gtest/gtest.h"
17 
18 #include "./vp9_rtcd.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/clear_system_state.h"
22 #include "test/register_state_check.h"
23 #include "test/util.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_scan.h"
26 #include "vpx/vpx_codec.h"
27 #include "vpx/vpx_integer.h"
28 #include "vpx_ports/mem.h"
29 
30 using libvpx_test::ACMRandom;
31 
32 namespace {
33 
34 const int kNumCoeffs = 64;
35 const double kPi = 3.141592653589793238462643383279502884;
36 
37 const int kSignBiasMaxDiff255 = 1500;
38 const int kSignBiasMaxDiff15 = 10000;
39 
40 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
41 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
42 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
43                         int tx_type);
44 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
45                         int tx_type);
46 
47 typedef std::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
48 typedef std::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
49 typedef std::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
50 
reference_8x8_dct_1d(const double in[8],double out[8])51 void reference_8x8_dct_1d(const double in[8], double out[8]) {
52   const double kInvSqrt2 = 0.707106781186547524400844362104;
53   for (int k = 0; k < 8; k++) {
54     out[k] = 0.0;
55     for (int n = 0; n < 8; n++) {
56       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
57     }
58     if (k == 0) out[k] = out[k] * kInvSqrt2;
59   }
60 }
61 
reference_8x8_dct_2d(const int16_t input[kNumCoeffs],double output[kNumCoeffs])62 void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
63                           double output[kNumCoeffs]) {
64   // First transform columns
65   for (int i = 0; i < 8; ++i) {
66     double temp_in[8], temp_out[8];
67     for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
68     reference_8x8_dct_1d(temp_in, temp_out);
69     for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
70   }
71   // Then transform rows
72   for (int i = 0; i < 8; ++i) {
73     double temp_in[8], temp_out[8];
74     for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
75     reference_8x8_dct_1d(temp_in, temp_out);
76     // Scale by some magic number
77     for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
78   }
79 }
80 
fdct8x8_ref(const int16_t * in,tran_low_t * out,int stride,int)81 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
82                  int /*tx_type*/) {
83   vpx_fdct8x8_c(in, out, stride);
84 }
85 
fht8x8_ref(const int16_t * in,tran_low_t * out,int stride,int tx_type)86 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
87   vp9_fht8x8_c(in, out, stride, tx_type);
88 }
89 
90 #if CONFIG_VP9_HIGHBITDEPTH
idct8x8_10(const tran_low_t * in,uint8_t * out,int stride)91 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
92   vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
93 }
94 
idct8x8_12(const tran_low_t * in,uint8_t * out,int stride)95 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
96   vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
97 }
98 
iht8x8_10(const tran_low_t * in,uint8_t * out,int stride,int tx_type)99 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
100   vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
101 }
102 
iht8x8_12(const tran_low_t * in,uint8_t * out,int stride,int tx_type)103 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
104   vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
105 }
106 
107 #if HAVE_SSE2
108 
idct8x8_12_add_10_c(const tran_low_t * in,uint8_t * out,int stride)109 void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
110   vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
111 }
112 
idct8x8_12_add_12_c(const tran_low_t * in,uint8_t * out,int stride)113 void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
114   vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
115 }
116 
idct8x8_12_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)117 void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
118   vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
119 }
120 
idct8x8_12_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)121 void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
122   vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
123 }
124 
idct8x8_64_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)125 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
126   vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
127 }
128 
idct8x8_64_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)129 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
130   vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
131 }
132 #endif  // HAVE_SSE2
133 #endif  // CONFIG_VP9_HIGHBITDEPTH
134 
135 // Visual Studio 2022 (cl.exe) targeting AArch64 with optimizations enabled
136 // produces invalid code in RunExtremalCheck() and RunInvAccuracyCheck().
137 // See:
138 // https://developercommunity.visualstudio.com/t/1770-preview-1:-Misoptimization-for-AR/10369786
139 // TODO(jzern): check the compiler version after a fix for the issue is
140 // released.
141 #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
142 #pragma optimize("", off)
143 #endif
144 class FwdTrans8x8TestBase {
145  public:
146   virtual ~FwdTrans8x8TestBase() = default;
147 
148  protected:
149   virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
150   virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
151 
RunSignBiasCheck()152   void RunSignBiasCheck() {
153     ACMRandom rnd(ACMRandom::DeterministicSeed());
154     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
155     DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
156     int count_sign_block[64][2];
157     const int count_test_block = 100000;
158 
159     memset(count_sign_block, 0, sizeof(count_sign_block));
160 
161     for (int i = 0; i < count_test_block; ++i) {
162       // Initialize a test block with input range [-255, 255].
163       for (int j = 0; j < 64; ++j) {
164         test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
165                               ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
166       }
167       ASM_REGISTER_STATE_CHECK(
168           RunFwdTxfm(test_input_block, test_output_block, pitch_));
169 
170       for (int j = 0; j < 64; ++j) {
171         if (test_output_block[j] < 0) {
172           ++count_sign_block[j][0];
173         } else if (test_output_block[j] > 0) {
174           ++count_sign_block[j][1];
175         }
176       }
177     }
178 
179     for (int j = 0; j < 64; ++j) {
180       const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
181       const int max_diff = kSignBiasMaxDiff255;
182       ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
183           << "Error: 8x8 FDCT/FHT has a sign bias > "
184           << 1. * max_diff / count_test_block * 100 << "%"
185           << " for input range [-255, 255] at index " << j
186           << " count0: " << count_sign_block[j][0]
187           << " count1: " << count_sign_block[j][1] << " diff: " << diff;
188     }
189 
190     memset(count_sign_block, 0, sizeof(count_sign_block));
191 
192     for (int i = 0; i < count_test_block; ++i) {
193       // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
194       for (int j = 0; j < 64; ++j) {
195         test_input_block[j] =
196             ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
197       }
198       ASM_REGISTER_STATE_CHECK(
199           RunFwdTxfm(test_input_block, test_output_block, pitch_));
200 
201       for (int j = 0; j < 64; ++j) {
202         if (test_output_block[j] < 0) {
203           ++count_sign_block[j][0];
204         } else if (test_output_block[j] > 0) {
205           ++count_sign_block[j][1];
206         }
207       }
208     }
209 
210     for (int j = 0; j < 64; ++j) {
211       const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
212       const int max_diff = kSignBiasMaxDiff15;
213       ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
214           << "Error: 8x8 FDCT/FHT has a sign bias > "
215           << 1. * max_diff / count_test_block * 100 << "%"
216           << " for input range [-15, 15] at index " << j
217           << " count0: " << count_sign_block[j][0]
218           << " count1: " << count_sign_block[j][1] << " diff: " << diff;
219     }
220   }
221 
RunRoundTripErrorCheck()222   void RunRoundTripErrorCheck() {
223     ACMRandom rnd(ACMRandom::DeterministicSeed());
224     int max_error = 0;
225     int total_error = 0;
226     const int count_test_block = 100000;
227     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
228     DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
229     DECLARE_ALIGNED(16, uint8_t, dst[64]);
230     DECLARE_ALIGNED(16, uint8_t, src[64]);
231 #if CONFIG_VP9_HIGHBITDEPTH
232     DECLARE_ALIGNED(16, uint16_t, dst16[64]);
233     DECLARE_ALIGNED(16, uint16_t, src16[64]);
234 #endif
235 
236     for (int i = 0; i < count_test_block; ++i) {
237       // Initialize a test block with input range [-mask_, mask_].
238       for (int j = 0; j < 64; ++j) {
239         if (bit_depth_ == VPX_BITS_8) {
240           src[j] = rnd.Rand8();
241           dst[j] = rnd.Rand8();
242           test_input_block[j] = src[j] - dst[j];
243 #if CONFIG_VP9_HIGHBITDEPTH
244         } else {
245           src16[j] = rnd.Rand16() & mask_;
246           dst16[j] = rnd.Rand16() & mask_;
247           test_input_block[j] = src16[j] - dst16[j];
248 #endif
249         }
250       }
251 
252       ASM_REGISTER_STATE_CHECK(
253           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
254       for (int j = 0; j < 64; ++j) {
255         if (test_temp_block[j] > 0) {
256           test_temp_block[j] += 2;
257           test_temp_block[j] /= 4;
258           test_temp_block[j] *= 4;
259         } else {
260           test_temp_block[j] -= 2;
261           test_temp_block[j] /= 4;
262           test_temp_block[j] *= 4;
263         }
264       }
265       if (bit_depth_ == VPX_BITS_8) {
266         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
267 #if CONFIG_VP9_HIGHBITDEPTH
268       } else {
269         ASM_REGISTER_STATE_CHECK(
270             RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
271 #endif
272       }
273 
274       for (int j = 0; j < 64; ++j) {
275 #if CONFIG_VP9_HIGHBITDEPTH
276         const int diff =
277             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
278 #else
279         const int diff = dst[j] - src[j];
280 #endif
281         const int error = diff * diff;
282         if (max_error < error) max_error = error;
283         total_error += error;
284       }
285     }
286 
287     ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
288         << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
289         << " roundtrip error > 1";
290 
291     ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
292         << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
293         << "error > 1/5 per block";
294   }
295 
RunExtremalCheck()296   void RunExtremalCheck() {
297     ACMRandom rnd(ACMRandom::DeterministicSeed());
298     int max_error = 0;
299     int total_error = 0;
300     int total_coeff_error = 0;
301     const int count_test_block = 100000;
302     DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
303     DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
304     DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
305     DECLARE_ALIGNED(16, uint8_t, dst[64]);
306     DECLARE_ALIGNED(16, uint8_t, src[64]);
307 #if CONFIG_VP9_HIGHBITDEPTH
308     DECLARE_ALIGNED(16, uint16_t, dst16[64]);
309     DECLARE_ALIGNED(16, uint16_t, src16[64]);
310 #endif
311 
312     for (int i = 0; i < count_test_block; ++i) {
313       // Initialize a test block with input range [-mask_, mask_].
314       for (int j = 0; j < 64; ++j) {
315         if (bit_depth_ == VPX_BITS_8) {
316           if (i == 0) {
317             src[j] = 255;
318             dst[j] = 0;
319           } else if (i == 1) {
320             src[j] = 0;
321             dst[j] = 255;
322           } else {
323             src[j] = rnd.Rand8() % 2 ? 255 : 0;
324             dst[j] = rnd.Rand8() % 2 ? 255 : 0;
325           }
326           test_input_block[j] = src[j] - dst[j];
327 #if CONFIG_VP9_HIGHBITDEPTH
328         } else {
329           if (i == 0) {
330             src16[j] = mask_;
331             dst16[j] = 0;
332           } else if (i == 1) {
333             src16[j] = 0;
334             dst16[j] = mask_;
335           } else {
336             src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
337             dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
338           }
339           test_input_block[j] = src16[j] - dst16[j];
340 #endif
341         }
342       }
343 
344       ASM_REGISTER_STATE_CHECK(
345           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
346       ASM_REGISTER_STATE_CHECK(
347           fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
348       if (bit_depth_ == VPX_BITS_8) {
349         ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
350 #if CONFIG_VP9_HIGHBITDEPTH
351       } else {
352         ASM_REGISTER_STATE_CHECK(
353             RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
354 #endif
355       }
356 
357       for (int j = 0; j < 64; ++j) {
358 #if CONFIG_VP9_HIGHBITDEPTH
359         const int diff =
360             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
361 #else
362         const int diff = dst[j] - src[j];
363 #endif
364         const int error = diff * diff;
365         if (max_error < error) max_error = error;
366         total_error += error;
367 
368         const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
369         total_coeff_error += abs(coeff_diff);
370       }
371 
372       ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
373           << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
374           << " an individual roundtrip error > 1";
375 
376       ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
377           << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
378           << " roundtrip error > 1/5 per block";
379 
380       ASSERT_EQ(0, total_coeff_error)
381           << "Error: Extremal 8x8 FDCT/FHT has"
382           << " overflow issues in the intermediate steps > 1";
383     }
384   }
385 
RunInvAccuracyCheck()386   void RunInvAccuracyCheck() {
387     ACMRandom rnd(ACMRandom::DeterministicSeed());
388     const int count_test_block = 1000;
389     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
390     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
391     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
392     DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
393 #if CONFIG_VP9_HIGHBITDEPTH
394     DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
395     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
396 #endif
397 
398     for (int i = 0; i < count_test_block; ++i) {
399       double out_r[kNumCoeffs];
400 
401       // Initialize a test block with input range [-255, 255].
402       for (int j = 0; j < kNumCoeffs; ++j) {
403         if (bit_depth_ == VPX_BITS_8) {
404           src[j] = rnd.Rand8() % 2 ? 255 : 0;
405           dst[j] = src[j] > 0 ? 0 : 255;
406           in[j] = src[j] - dst[j];
407 #if CONFIG_VP9_HIGHBITDEPTH
408         } else {
409           src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
410           dst16[j] = src16[j] > 0 ? 0 : mask_;
411           in[j] = src16[j] - dst16[j];
412 #endif
413         }
414       }
415 
416       reference_8x8_dct_2d(in, out_r);
417       for (int j = 0; j < kNumCoeffs; ++j) {
418         coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
419       }
420 
421       if (bit_depth_ == VPX_BITS_8) {
422         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
423 #if CONFIG_VP9_HIGHBITDEPTH
424       } else {
425         ASM_REGISTER_STATE_CHECK(
426             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
427 #endif
428       }
429 
430       for (int j = 0; j < kNumCoeffs; ++j) {
431 #if CONFIG_VP9_HIGHBITDEPTH
432         const int diff =
433             bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
434 #else
435         const int diff = dst[j] - src[j];
436 #endif
437         const uint32_t error = diff * diff;
438         ASSERT_GE(1u << 2 * (bit_depth_ - 8), error)
439             << "Error: 8x8 IDCT has error " << error << " at index " << j;
440       }
441     }
442   }
443 
RunFwdAccuracyCheck()444   void RunFwdAccuracyCheck() {
445     ACMRandom rnd(ACMRandom::DeterministicSeed());
446     const int count_test_block = 1000;
447     DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
448     DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
449     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
450 
451     for (int i = 0; i < count_test_block; ++i) {
452       double out_r[kNumCoeffs];
453 
454       // Initialize a test block with input range [-mask_, mask_].
455       for (int j = 0; j < kNumCoeffs; ++j) {
456         in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
457       }
458 
459       RunFwdTxfm(in, coeff, pitch_);
460       reference_8x8_dct_2d(in, out_r);
461       for (int j = 0; j < kNumCoeffs; ++j) {
462         coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
463       }
464 
465       for (int j = 0; j < kNumCoeffs; ++j) {
466         const int32_t diff = coeff[j] - coeff_r[j];
467         const uint32_t error = diff * diff;
468         ASSERT_GE(9u << 2 * (bit_depth_ - 8), error)
469             << "Error: 8x8 DCT has error " << error << " at index " << j;
470       }
471     }
472   }
473 
CompareInvReference(IdctFunc ref_txfm,int thresh)474   void CompareInvReference(IdctFunc ref_txfm, int thresh) {
475     ACMRandom rnd(ACMRandom::DeterministicSeed());
476     const int count_test_block = 10000;
477     const int eob = 12;
478     DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
479     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
480     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
481 #if CONFIG_VP9_HIGHBITDEPTH
482     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
483     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
484 #endif
485     const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
486 
487     for (int i = 0; i < count_test_block; ++i) {
488       for (int j = 0; j < kNumCoeffs; ++j) {
489         if (j < eob) {
490           // Random values less than the threshold, either positive or negative
491           coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
492         } else {
493           coeff[scan[j]] = 0;
494         }
495         if (bit_depth_ == VPX_BITS_8) {
496           dst[j] = 0;
497           ref[j] = 0;
498 #if CONFIG_VP9_HIGHBITDEPTH
499         } else {
500           dst16[j] = 0;
501           ref16[j] = 0;
502 #endif
503         }
504       }
505       if (bit_depth_ == VPX_BITS_8) {
506         ref_txfm(coeff, ref, pitch_);
507         ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
508 #if CONFIG_VP9_HIGHBITDEPTH
509       } else {
510         ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
511         ASM_REGISTER_STATE_CHECK(
512             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
513 #endif
514       }
515 
516       for (int j = 0; j < kNumCoeffs; ++j) {
517 #if CONFIG_VP9_HIGHBITDEPTH
518         const int diff =
519             bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
520 #else
521         const int diff = dst[j] - ref[j];
522 #endif
523         const uint32_t error = diff * diff;
524         ASSERT_EQ(0u, error)
525             << "Error: 8x8 IDCT has error " << error << " at index " << j;
526       }
527     }
528   }
529   int pitch_;
530   int tx_type_;
531   FhtFunc fwd_txfm_ref;
532   vpx_bit_depth_t bit_depth_;
533   int mask_;
534 };
535 #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
536 #pragma optimize("", on)
537 #endif
538 
539 class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
540                        public ::testing::TestWithParam<Dct8x8Param> {
541  public:
542   ~FwdTrans8x8DCT() override = default;
543 
SetUp()544   void SetUp() override {
545     fwd_txfm_ = GET_PARAM(0);
546     inv_txfm_ = GET_PARAM(1);
547     tx_type_ = GET_PARAM(2);
548     pitch_ = 8;
549     fwd_txfm_ref = fdct8x8_ref;
550     bit_depth_ = GET_PARAM(3);
551     mask_ = (1 << bit_depth_) - 1;
552   }
553 
TearDown()554   void TearDown() override { libvpx_test::ClearSystemState(); }
555 
556  protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)557   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) override {
558     fwd_txfm_(in, out, stride);
559   }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)560   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
561     inv_txfm_(out, dst, stride);
562   }
563 
564   FdctFunc fwd_txfm_;
565   IdctFunc inv_txfm_;
566 };
567 
TEST_P(FwdTrans8x8DCT,SignBiasCheck)568 TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
569 
TEST_P(FwdTrans8x8DCT,RoundTripErrorCheck)570 TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
571 
TEST_P(FwdTrans8x8DCT,ExtremalCheck)572 TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
573 
TEST_P(FwdTrans8x8DCT,FwdAccuracyCheck)574 TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
575 
TEST_P(FwdTrans8x8DCT,InvAccuracyCheck)576 TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
577 
578 class FwdTrans8x8HT : public FwdTrans8x8TestBase,
579                       public ::testing::TestWithParam<Ht8x8Param> {
580  public:
581   ~FwdTrans8x8HT() override = default;
582 
SetUp()583   void SetUp() override {
584     fwd_txfm_ = GET_PARAM(0);
585     inv_txfm_ = GET_PARAM(1);
586     tx_type_ = GET_PARAM(2);
587     pitch_ = 8;
588     fwd_txfm_ref = fht8x8_ref;
589     bit_depth_ = GET_PARAM(3);
590     mask_ = (1 << bit_depth_) - 1;
591   }
592 
TearDown()593   void TearDown() override { libvpx_test::ClearSystemState(); }
594 
595  protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)596   void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) override {
597     fwd_txfm_(in, out, stride, tx_type_);
598   }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)599   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
600     inv_txfm_(out, dst, stride, tx_type_);
601   }
602 
603   FhtFunc fwd_txfm_;
604   IhtFunc inv_txfm_;
605 };
606 
TEST_P(FwdTrans8x8HT,SignBiasCheck)607 TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
608 
TEST_P(FwdTrans8x8HT,RoundTripErrorCheck)609 TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
610 
TEST_P(FwdTrans8x8HT,ExtremalCheck)611 TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
612 
613 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
614 class InvTrans8x8DCT : public FwdTrans8x8TestBase,
615                        public ::testing::TestWithParam<Idct8x8Param> {
616  public:
617   ~InvTrans8x8DCT() override = default;
618 
SetUp()619   void SetUp() override {
620     ref_txfm_ = GET_PARAM(0);
621     inv_txfm_ = GET_PARAM(1);
622     thresh_ = GET_PARAM(2);
623     pitch_ = 8;
624     bit_depth_ = GET_PARAM(3);
625     mask_ = (1 << bit_depth_) - 1;
626   }
627 
TearDown()628   void TearDown() override { libvpx_test::ClearSystemState(); }
629 
630  protected:
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)631   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
632     inv_txfm_(out, dst, stride);
633   }
RunFwdTxfm(int16_t *,tran_low_t *,int)634   void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/,
635                   int /*stride*/) override {}
636 
637   IdctFunc ref_txfm_;
638   IdctFunc inv_txfm_;
639   int thresh_;
640 };
641 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans8x8DCT);
642 
TEST_P(InvTrans8x8DCT,CompareReference)643 TEST_P(InvTrans8x8DCT, CompareReference) {
644   CompareInvReference(ref_txfm_, thresh_);
645 }
646 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
647 
648 using std::make_tuple;
649 
650 #if CONFIG_VP9_HIGHBITDEPTH
651 INSTANTIATE_TEST_SUITE_P(
652     C, FwdTrans8x8DCT,
653     ::testing::Values(
654         make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
655         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
656         make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
657 #else
658 INSTANTIATE_TEST_SUITE_P(C, FwdTrans8x8DCT,
659                          ::testing::Values(make_tuple(&vpx_fdct8x8_c,
660                                                       &vpx_idct8x8_64_add_c, 0,
661                                                       VPX_BITS_8)));
662 #endif  // CONFIG_VP9_HIGHBITDEPTH
663 
664 #if CONFIG_VP9_HIGHBITDEPTH
665 INSTANTIATE_TEST_SUITE_P(
666     C, FwdTrans8x8HT,
667     ::testing::Values(
668         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
669         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
670         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
671         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
672         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
673         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
674         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
675         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
676         make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
677         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
678         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
679         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
680 #else
681 INSTANTIATE_TEST_SUITE_P(
682     C, FwdTrans8x8HT,
683     ::testing::Values(
684         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
685         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
686         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
687         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
688 #endif  // CONFIG_VP9_HIGHBITDEPTH
689 
690 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
691 INSTANTIATE_TEST_SUITE_P(NEON, FwdTrans8x8DCT,
692                          ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
693                                                       &vpx_idct8x8_64_add_neon,
694                                                       0, VPX_BITS_8)));
695 
696 #if !CONFIG_VP9_HIGHBITDEPTH
697 INSTANTIATE_TEST_SUITE_P(
698     NEON, FwdTrans8x8HT,
699     ::testing::Values(
700         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
701         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
702         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
703         make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
704 #endif  // !CONFIG_VP9_HIGHBITDEPTH
705 #endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
706 
707 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
708 INSTANTIATE_TEST_SUITE_P(SSE2, FwdTrans8x8DCT,
709                          ::testing::Values(make_tuple(&vpx_fdct8x8_sse2,
710                                                       &vpx_idct8x8_64_add_sse2,
711                                                       0, VPX_BITS_8)));
712 INSTANTIATE_TEST_SUITE_P(
713     SSE2, FwdTrans8x8HT,
714     ::testing::Values(
715         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
716         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
717         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
718         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
719 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
720 
721 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
722 INSTANTIATE_TEST_SUITE_P(
723     SSE2, FwdTrans8x8DCT,
724     ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0,
725                                  VPX_BITS_8),
726                       make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
727                                  12, VPX_BITS_10),
728                       make_tuple(&vpx_highbd_fdct8x8_sse2,
729                                  &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
730                       make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
731                                  12, VPX_BITS_12),
732                       make_tuple(&vpx_highbd_fdct8x8_sse2,
733                                  &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
734 
735 INSTANTIATE_TEST_SUITE_P(
736     SSE2, FwdTrans8x8HT,
737     ::testing::Values(
738         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
739         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
740         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
741         make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
742 
743 // Optimizations take effect at a threshold of 6201, so we use a value close to
744 // that to test both branches.
745 INSTANTIATE_TEST_SUITE_P(
746     SSE2, InvTrans8x8DCT,
747     ::testing::Values(
748         make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
749                    VPX_BITS_10),
750         make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
751         make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
752                    VPX_BITS_12),
753         make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
754 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
755 
756 #if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
757     !CONFIG_EMULATE_HARDWARE
758 INSTANTIATE_TEST_SUITE_P(SSSE3, FwdTrans8x8DCT,
759                          ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
760                                                       &vpx_idct8x8_64_add_sse2,
761                                                       0, VPX_BITS_8)));
762 #endif
763 
764 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
765 INSTANTIATE_TEST_SUITE_P(MSA, FwdTrans8x8DCT,
766                          ::testing::Values(make_tuple(&vpx_fdct8x8_msa,
767                                                       &vpx_idct8x8_64_add_msa,
768                                                       0, VPX_BITS_8)));
769 INSTANTIATE_TEST_SUITE_P(
770     MSA, FwdTrans8x8HT,
771     ::testing::Values(
772         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
773         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
774         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
775         make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
776 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
777 
778 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
779 INSTANTIATE_TEST_SUITE_P(VSX, FwdTrans8x8DCT,
780                          ::testing::Values(make_tuple(&vpx_fdct8x8_c,
781                                                       &vpx_idct8x8_64_add_vsx,
782                                                       0, VPX_BITS_8)));
783 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
784 
785 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
786 INSTANTIATE_TEST_SUITE_P(LSX, FwdTrans8x8DCT,
787                          ::testing::Values(make_tuple(&vpx_fdct8x8_lsx,
788                                                       &vpx_idct8x8_64_add_c, 0,
789                                                       VPX_BITS_8)));
790 #endif  // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
791 }  // namespace
792