• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15 
16 #include "third_party/googletest/src/include/gtest/gtest.h"
17 
18 #include "./vp9_rtcd.h"
19 #include "./vpx_config.h"
20 #include "./vpx_dsp_rtcd.h"
21 #include "test/acm_random.h"
22 #include "test/bench.h"
23 #include "test/clear_system_state.h"
24 #include "test/register_state_check.h"
25 #include "test/util.h"
26 #include "vp9/common/vp9_entropy.h"
27 #include "vp9/common/vp9_scan.h"
28 #include "vpx/vpx_codec.h"
29 #include "vpx/vpx_integer.h"
30 #include "vpx_ports/mem.h"
31 #include "vpx_ports/msvc.h"  // for round()
32 #include "vpx_ports/vpx_timer.h"
33 
34 using libvpx_test::ACMRandom;
35 
36 namespace {
37 
38 const int kNumCoeffs = 1024;
39 const double kPi = 3.141592653589793238462643383279502884;
reference_32x32_dct_1d(const double in[32],double out[32])40 void reference_32x32_dct_1d(const double in[32], double out[32]) {
41   const double kInvSqrt2 = 0.707106781186547524400844362104;
42   for (int k = 0; k < 32; k++) {
43     out[k] = 0.0;
44     for (int n = 0; n < 32; n++) {
45       out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
46     }
47     if (k == 0) out[k] = out[k] * kInvSqrt2;
48   }
49 }
50 
reference_32x32_dct_2d(const int16_t input[kNumCoeffs],double output[kNumCoeffs])51 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
52                             double output[kNumCoeffs]) {
53   // First transform columns
54   for (int i = 0; i < 32; ++i) {
55     double temp_in[32], temp_out[32];
56     for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i];
57     reference_32x32_dct_1d(temp_in, temp_out);
58     for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j];
59   }
60   // Then transform rows
61   for (int i = 0; i < 32; ++i) {
62     double temp_in[32], temp_out[32];
63     for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
64     reference_32x32_dct_1d(temp_in, temp_out);
65     // Scale by some magic number
66     for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4;
67   }
68 }
69 
70 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
71 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
72 
73 typedef std::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
74     Trans32x32Param;
75 
76 typedef std::tuple<InvTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t, int, int>
77     InvTrans32x32Param;
78 
79 #if CONFIG_VP9_HIGHBITDEPTH
idct32x32_10(const tran_low_t * in,uint8_t * out,int stride)80 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
81   vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
82 }
83 
idct32x32_12(const tran_low_t * in,uint8_t * out,int stride)84 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
85   vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
86 }
87 #endif  // CONFIG_VP9_HIGHBITDEPTH
88 
89 class Trans32x32Test : public AbstractBench,
90                        public ::testing::TestWithParam<Trans32x32Param> {
91  public:
92   ~Trans32x32Test() override = default;
SetUp()93   void SetUp() override {
94     fwd_txfm_ = GET_PARAM(0);
95     inv_txfm_ = GET_PARAM(1);
96     version_ = GET_PARAM(2);  // 0: high precision forward transform
97                               // 1: low precision version for rd loop
98     bit_depth_ = GET_PARAM(3);
99     mask_ = (1 << bit_depth_) - 1;
100   }
101 
TearDown()102   void TearDown() override { libvpx_test::ClearSystemState(); }
103 
104  protected:
105   int version_;
106   vpx_bit_depth_t bit_depth_;
107   int mask_;
108   FwdTxfmFunc fwd_txfm_;
109   InvTxfmFunc inv_txfm_;
110 
111   int16_t *bench_in_;
112   tran_low_t *bench_out_;
113   void Run() override;
114 };
115 
Run()116 void Trans32x32Test::Run() { fwd_txfm_(bench_in_, bench_out_, 32); }
117 
TEST_P(Trans32x32Test,AccuracyCheck)118 TEST_P(Trans32x32Test, AccuracyCheck) {
119   ACMRandom rnd(ACMRandom::DeterministicSeed());
120   uint32_t max_error = 0;
121   int64_t total_error = 0;
122   const int count_test_block = 10000;
123   DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
124   DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
125   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
126   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
127 #if CONFIG_VP9_HIGHBITDEPTH
128   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
129   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
130 #endif
131 
132   for (int i = 0; i < count_test_block; ++i) {
133     // Initialize a test block with input range [-mask_, mask_].
134     for (int j = 0; j < kNumCoeffs; ++j) {
135       if (bit_depth_ == VPX_BITS_8) {
136         src[j] = rnd.Rand8();
137         dst[j] = rnd.Rand8();
138         test_input_block[j] = src[j] - dst[j];
139 #if CONFIG_VP9_HIGHBITDEPTH
140       } else {
141         src16[j] = rnd.Rand16() & mask_;
142         dst16[j] = rnd.Rand16() & mask_;
143         test_input_block[j] = src16[j] - dst16[j];
144 #endif
145       }
146     }
147 
148     ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
149     if (bit_depth_ == VPX_BITS_8) {
150       ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
151 #if CONFIG_VP9_HIGHBITDEPTH
152     } else {
153       ASM_REGISTER_STATE_CHECK(
154           inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32));
155 #endif
156     }
157 
158     for (int j = 0; j < kNumCoeffs; ++j) {
159 #if CONFIG_VP9_HIGHBITDEPTH
160       const int32_t diff =
161           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
162 #else
163       const int32_t diff = dst[j] - src[j];
164 #endif
165       const uint32_t error = diff * diff;
166       if (max_error < error) max_error = error;
167       total_error += error;
168     }
169   }
170 
171   if (version_ == 1) {
172     max_error /= 2;
173     total_error /= 45;
174   }
175 
176   EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
177       << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
178 
179   EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
180       << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
181 }
182 
TEST_P(Trans32x32Test,CoeffCheck)183 TEST_P(Trans32x32Test, CoeffCheck) {
184   ACMRandom rnd(ACMRandom::DeterministicSeed());
185   const int count_test_block = 1000;
186 
187   DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
188   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
189   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
190 
191   for (int i = 0; i < count_test_block; ++i) {
192     for (int j = 0; j < kNumCoeffs; ++j) {
193       input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
194     }
195 
196     const int stride = 32;
197     vpx_fdct32x32_c(input_block, output_ref_block, stride);
198     ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
199 
200     if (version_ == 0) {
201       for (int j = 0; j < kNumCoeffs; ++j)
202         EXPECT_EQ(output_block[j], output_ref_block[j])
203             << "Error: 32x32 FDCT versions have mismatched coefficients";
204     } else {
205       for (int j = 0; j < kNumCoeffs; ++j)
206         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
207             << "Error: 32x32 FDCT rd has mismatched coefficients";
208     }
209   }
210 }
211 
TEST_P(Trans32x32Test,MemCheck)212 TEST_P(Trans32x32Test, MemCheck) {
213   ACMRandom rnd(ACMRandom::DeterministicSeed());
214   const int count_test_block = 2000;
215 
216   DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
217   DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
218   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
219 
220   for (int i = 0; i < count_test_block; ++i) {
221     // Initialize a test block with input range [-mask_, mask_].
222     for (int j = 0; j < kNumCoeffs; ++j) {
223       input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
224     }
225     if (i == 0) {
226       for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
227     } else if (i == 1) {
228       for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
229     }
230 
231     const int stride = 32;
232     vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
233     ASM_REGISTER_STATE_CHECK(
234         fwd_txfm_(input_extreme_block, output_block, stride));
235 
236     // The minimum quant value is 4.
237     for (int j = 0; j < kNumCoeffs; ++j) {
238       if (version_ == 0) {
239         EXPECT_EQ(output_block[j], output_ref_block[j])
240             << "Error: 32x32 FDCT versions have mismatched coefficients";
241       } else {
242         EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
243             << "Error: 32x32 FDCT rd has mismatched coefficients";
244       }
245       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
246           << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
247       EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
248           << "Error: 32x32 FDCT has coefficient larger than "
249           << "4*DCT_MAX_VALUE";
250     }
251   }
252 }
253 
TEST_P(Trans32x32Test,DISABLED_Speed)254 TEST_P(Trans32x32Test, DISABLED_Speed) {
255   ACMRandom rnd(ACMRandom::DeterministicSeed());
256 
257   DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
258   DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
259 
260   bench_in_ = input_extreme_block;
261   bench_out_ = output_block;
262 
263   RunNTimes(INT16_MAX);
264   PrintMedian("32x32");
265 }
266 
TEST_P(Trans32x32Test,InverseAccuracy)267 TEST_P(Trans32x32Test, InverseAccuracy) {
268   ACMRandom rnd(ACMRandom::DeterministicSeed());
269   const int count_test_block = 1000;
270   DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
271   DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
272   DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
273   DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
274 #if CONFIG_VP9_HIGHBITDEPTH
275   DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
276   DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
277 #endif
278 
279   for (int i = 0; i < count_test_block; ++i) {
280     double out_r[kNumCoeffs];
281 
282     // Initialize a test block with input range [-255, 255]
283     for (int j = 0; j < kNumCoeffs; ++j) {
284       if (bit_depth_ == VPX_BITS_8) {
285         src[j] = rnd.Rand8();
286         dst[j] = rnd.Rand8();
287         in[j] = src[j] - dst[j];
288 #if CONFIG_VP9_HIGHBITDEPTH
289       } else {
290         src16[j] = rnd.Rand16() & mask_;
291         dst16[j] = rnd.Rand16() & mask_;
292         in[j] = src16[j] - dst16[j];
293 #endif
294       }
295     }
296 
297     reference_32x32_dct_2d(in, out_r);
298     for (int j = 0; j < kNumCoeffs; ++j) {
299       coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
300     }
301     if (bit_depth_ == VPX_BITS_8) {
302       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
303 #if CONFIG_VP9_HIGHBITDEPTH
304     } else {
305       ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32));
306 #endif
307     }
308     for (int j = 0; j < kNumCoeffs; ++j) {
309 #if CONFIG_VP9_HIGHBITDEPTH
310       const int diff =
311           bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
312 #else
313       const int diff = dst[j] - src[j];
314 #endif
315       const int error = diff * diff;
316       EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error
317                           << " at index " << j;
318     }
319   }
320 }
321 
322 class InvTrans32x32Test : public ::testing::TestWithParam<InvTrans32x32Param> {
323  public:
324   ~InvTrans32x32Test() override = default;
SetUp()325   void SetUp() override {
326     ref_txfm_ = GET_PARAM(0);
327     inv_txfm_ = GET_PARAM(1);
328     version_ = GET_PARAM(2);  // 0: high precision forward transform
329                               // 1: low precision version for rd loop
330     bit_depth_ = GET_PARAM(3);
331     eob_ = GET_PARAM(4);
332     thresh_ = GET_PARAM(4);
333     mask_ = (1 << bit_depth_) - 1;
334     pitch_ = 32;
335   }
336 
TearDown()337   void TearDown() override { libvpx_test::ClearSystemState(); }
338 
339  protected:
RunRefTxfm(tran_low_t * out,uint8_t * dst,int stride)340   void RunRefTxfm(tran_low_t *out, uint8_t *dst, int stride) {
341     ref_txfm_(out, dst, stride);
342   }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)343   void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
344     inv_txfm_(out, dst, stride);
345   }
346   int version_;
347   vpx_bit_depth_t bit_depth_;
348   int mask_;
349   int eob_;
350   int thresh_;
351 
352   InvTxfmFunc ref_txfm_;
353   InvTxfmFunc inv_txfm_;
354   int pitch_;
355 
RunInvTrans32x32SpeedTest()356   void RunInvTrans32x32SpeedTest() {
357     ACMRandom rnd(ACMRandom::DeterministicSeed());
358     const int count_test_block = 10000;
359     int64_t c_sum_time = 0;
360     int64_t simd_sum_time = 0;
361     const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
362     DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
363     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
364     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
365 #if CONFIG_VP9_HIGHBITDEPTH
366     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
367     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
368 #endif  // CONFIG_VP9_HIGHBITDEPTH
369 
370     for (int j = 0; j < kNumCoeffs; ++j) {
371       if (j < eob_) {
372         // Random values less than the threshold, either positive or negative
373         coeff[scan[j]] = rnd(thresh_);
374       } else {
375         coeff[scan[j]] = 0;
376       }
377       if (bit_depth_ == VPX_BITS_8) {
378         dst[j] = 0;
379         ref[j] = 0;
380 #if CONFIG_VP9_HIGHBITDEPTH
381       } else {
382         dst16[j] = 0;
383         ref16[j] = 0;
384 #endif  // CONFIG_VP9_HIGHBITDEPTH
385       }
386     }
387 
388     if (bit_depth_ == VPX_BITS_8) {
389       vpx_usec_timer timer_c;
390       vpx_usec_timer_start(&timer_c);
391       for (int i = 0; i < count_test_block; ++i) {
392         RunRefTxfm(coeff, ref, pitch_);
393       }
394       vpx_usec_timer_mark(&timer_c);
395       c_sum_time += vpx_usec_timer_elapsed(&timer_c);
396 
397       vpx_usec_timer timer_mod;
398       vpx_usec_timer_start(&timer_mod);
399       for (int i = 0; i < count_test_block; ++i) {
400         RunInvTxfm(coeff, dst, pitch_);
401       }
402       vpx_usec_timer_mark(&timer_mod);
403       simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
404     } else {
405 #if CONFIG_VP9_HIGHBITDEPTH
406       vpx_usec_timer timer_c;
407       vpx_usec_timer_start(&timer_c);
408       for (int i = 0; i < count_test_block; ++i) {
409         RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
410       }
411       vpx_usec_timer_mark(&timer_c);
412       c_sum_time += vpx_usec_timer_elapsed(&timer_c);
413 
414       vpx_usec_timer timer_mod;
415       vpx_usec_timer_start(&timer_mod);
416       for (int i = 0; i < count_test_block; ++i) {
417         RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_);
418       }
419       vpx_usec_timer_mark(&timer_mod);
420       simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
421 #endif  // CONFIG_VP9_HIGHBITDEPTH
422     }
423     printf(
424         "c_time = %" PRId64 " \t simd_time = %" PRId64 " \t Gain = %4.2f \n",
425         c_sum_time, simd_sum_time,
426         (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
427   }
428 
CompareInvReference32x32()429   void CompareInvReference32x32() {
430     ACMRandom rnd(ACMRandom::DeterministicSeed());
431     const int count_test_block = 10000;
432     const int eob = 31;
433     const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
434     DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
435     DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
436     DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
437 #if CONFIG_VP9_HIGHBITDEPTH
438     DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
439     DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
440 #endif  // CONFIG_VP9_HIGHBITDEPTH
441 
442     for (int i = 0; i < count_test_block; ++i) {
443       for (int j = 0; j < kNumCoeffs; ++j) {
444         if (j < eob) {
445           coeff[scan[j]] = rnd.Rand8Extremes();
446         } else {
447           coeff[scan[j]] = 0;
448         }
449         if (bit_depth_ == VPX_BITS_8) {
450           dst[j] = 0;
451           ref[j] = 0;
452 #if CONFIG_VP9_HIGHBITDEPTH
453         } else {
454           dst16[j] = 0;
455           ref16[j] = 0;
456 #endif  // CONFIG_VP9_HIGHBITDEPTH
457         }
458       }
459       if (bit_depth_ == VPX_BITS_8) {
460         RunRefTxfm(coeff, ref, pitch_);
461         RunInvTxfm(coeff, dst, pitch_);
462       } else {
463 #if CONFIG_VP9_HIGHBITDEPTH
464         RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
465         ASM_REGISTER_STATE_CHECK(
466             RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
467 #endif  // CONFIG_VP9_HIGHBITDEPTH
468       }
469 
470       for (int j = 0; j < kNumCoeffs; ++j) {
471 #if CONFIG_VP9_HIGHBITDEPTH
472         const uint32_t diff =
473             bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
474 #else
475         const uint32_t diff = dst[j] - ref[j];
476 #endif  // CONFIG_VP9_HIGHBITDEPTH
477         const uint32_t error = diff * diff;
478         EXPECT_EQ(0u, error) << "Error: 32x32 IDCT Comparison has error "
479                              << error << " at index " << j;
480       }
481     }
482   }
483 };
484 
485 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans32x32Test);
486 
TEST_P(InvTrans32x32Test,DISABLED_Speed)487 TEST_P(InvTrans32x32Test, DISABLED_Speed) { RunInvTrans32x32SpeedTest(); }
TEST_P(InvTrans32x32Test,CompareReference)488 TEST_P(InvTrans32x32Test, CompareReference) { CompareInvReference32x32(); }
489 
490 using std::make_tuple;
491 
492 #if CONFIG_VP9_HIGHBITDEPTH
493 INSTANTIATE_TEST_SUITE_P(
494     C, Trans32x32Test,
495     ::testing::Values(
496         make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 0, VPX_BITS_10),
497         make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_10, 1, VPX_BITS_10),
498         make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 0, VPX_BITS_12),
499         make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_12, 1, VPX_BITS_12),
500         make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
501         make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1,
502                    VPX_BITS_8)));
503 #else
504 INSTANTIATE_TEST_SUITE_P(
505     C, Trans32x32Test,
506     ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0,
507                                  VPX_BITS_8),
508                       make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
509                                  1, VPX_BITS_8)));
510 
511 INSTANTIATE_TEST_SUITE_P(
512     C, InvTrans32x32Test,
513     ::testing::Values(
514         (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_c, 0,
515                     VPX_BITS_8, 32, 6225)),
516         make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_c, 0,
517                    VPX_BITS_8, 16, 6255)));
518 #endif  // CONFIG_VP9_HIGHBITDEPTH
519 
520 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
521 INSTANTIATE_TEST_SUITE_P(
522     NEON, Trans32x32Test,
523     ::testing::Values(make_tuple(&vpx_fdct32x32_neon,
524                                  &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
525                       make_tuple(&vpx_fdct32x32_rd_neon,
526                                  &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
527 #endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
528 
529 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
530 INSTANTIATE_TEST_SUITE_P(
531     SSE2, Trans32x32Test,
532     ::testing::Values(make_tuple(&vpx_fdct32x32_sse2,
533                                  &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
534                       make_tuple(&vpx_fdct32x32_rd_sse2,
535                                  &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
536 
537 INSTANTIATE_TEST_SUITE_P(
538     SSE2, InvTrans32x32Test,
539     ::testing::Values(
540         (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_sse2, 0,
541                     VPX_BITS_8, 32, 6225)),
542         make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_sse2, 0,
543                    VPX_BITS_8, 16, 6225)));
544 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
545 
546 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
547 INSTANTIATE_TEST_SUITE_P(
548     SSE2, Trans32x32Test,
549     ::testing::Values(
550         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
551         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
552                    VPX_BITS_10),
553         make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
554         make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
555                    VPX_BITS_12),
556         make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
557                    VPX_BITS_8),
558         make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
559                    VPX_BITS_8)));
560 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
561 
562 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
563 INSTANTIATE_TEST_SUITE_P(
564     AVX2, Trans32x32Test,
565     ::testing::Values(make_tuple(&vpx_fdct32x32_avx2,
566                                  &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
567                       make_tuple(&vpx_fdct32x32_rd_avx2,
568                                  &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
569 
570 INSTANTIATE_TEST_SUITE_P(
571     AVX2, InvTrans32x32Test,
572     ::testing::Values(
573         (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_avx2, 0,
574                     VPX_BITS_8, 32, 6225)),
575         make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_avx2, 0,
576                    VPX_BITS_8, 16, 6225)));
577 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
578 
579 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
580 INSTANTIATE_TEST_SUITE_P(
581     MSA, Trans32x32Test,
582     ::testing::Values(make_tuple(&vpx_fdct32x32_msa,
583                                  &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
584                       make_tuple(&vpx_fdct32x32_rd_msa,
585                                  &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
586 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
587 
588 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
589 INSTANTIATE_TEST_SUITE_P(
590     VSX, Trans32x32Test,
591     ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_vsx,
592                                  0, VPX_BITS_8),
593                       make_tuple(&vpx_fdct32x32_rd_vsx,
594                                  &vpx_idct32x32_1024_add_vsx, 1, VPX_BITS_8)));
595 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
596 
597 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
598 INSTANTIATE_TEST_SUITE_P(
599     LSX, Trans32x32Test,
600     ::testing::Values(make_tuple(&vpx_fdct32x32_lsx,
601                                  &vpx_idct32x32_1024_add_lsx, 0, VPX_BITS_8),
602                       make_tuple(&vpx_fdct32x32_rd_lsx,
603                                  &vpx_idct32x32_1024_add_lsx, 1, VPX_BITS_8)));
604 #endif  // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
605 }  // namespace
606