• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15 
16 #include "third_party/googletest/src/include/gtest/gtest.h"
17 
18 #include "./vp9_rtcd.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/buffer.h"
22 #include "test/clear_system_state.h"
23 #include "test/register_state_check.h"
24 #include "test/util.h"
25 #include "vp9/common/vp9_entropy.h"
26 #include "vpx/vpx_codec.h"
27 #include "vpx/vpx_integer.h"
28 #include "vpx_ports/mem.h"
29 
30 using libvpx_test::ACMRandom;
31 using libvpx_test::Buffer;
32 using std::make_tuple;
33 using std::tuple;
34 
35 namespace {
36 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
37 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
38 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
39                         int tx_type);
40 typedef void (*FhtFuncRef)(const Buffer<int16_t> &in, Buffer<tran_low_t> *out,
41                            int size, int tx_type);
42 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
43                         int tx_type);
44 typedef void (*IhtWithBdFunc)(const tran_low_t *in, uint8_t *out, int stride,
45                               int tx_type, int bd);
46 
47 template <FdctFunc fn>
fdct_wrapper(const int16_t * in,tran_low_t * out,int stride,int tx_type)48 void fdct_wrapper(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
49   (void)tx_type;
50   fn(in, out, stride);
51 }
52 
53 template <IdctFunc fn>
idct_wrapper(const tran_low_t * in,uint8_t * out,int stride,int tx_type,int bd)54 void idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type,
55                   int bd) {
56   (void)tx_type;
57   (void)bd;
58   fn(in, out, stride);
59 }
60 
61 template <IhtFunc fn>
iht_wrapper(const tran_low_t * in,uint8_t * out,int stride,int tx_type,int bd)62 void iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type,
63                  int bd) {
64   (void)bd;
65   fn(in, out, stride, tx_type);
66 }
67 
68 #if CONFIG_VP9_HIGHBITDEPTH
69 typedef void (*HighbdIdctFunc)(const tran_low_t *in, uint16_t *out, int stride,
70                                int bd);
71 
72 typedef void (*HighbdIhtFunc)(const tran_low_t *in, uint16_t *out, int stride,
73                               int tx_type, int bd);
74 
75 template <HighbdIdctFunc fn>
highbd_idct_wrapper(const tran_low_t * in,uint8_t * out,int stride,int tx_type,int bd)76 void highbd_idct_wrapper(const tran_low_t *in, uint8_t *out, int stride,
77                          int tx_type, int bd) {
78   (void)tx_type;
79   fn(in, CAST_TO_SHORTPTR(out), stride, bd);
80 }
81 
82 template <HighbdIhtFunc fn>
highbd_iht_wrapper(const tran_low_t * in,uint8_t * out,int stride,int tx_type,int bd)83 void highbd_iht_wrapper(const tran_low_t *in, uint8_t *out, int stride,
84                         int tx_type, int bd) {
85   fn(in, CAST_TO_SHORTPTR(out), stride, tx_type, bd);
86 }
87 #endif  // CONFIG_VP9_HIGHBITDEPTH
88 
89 struct FuncInfo {
90   FhtFunc ft_func;
91   IhtWithBdFunc it_func;
92   int size;
93   int pixel_size;
94 };
95 
96 /* forward transform, inverse transform, size, transform type, bit depth */
97 typedef tuple<int, const FuncInfo *, int, vpx_bit_depth_t> DctParam;
98 
fdct_ref(const Buffer<int16_t> & in,Buffer<tran_low_t> * out,int size,int)99 void fdct_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
100               int /*tx_type*/) {
101   const int16_t *i = in.TopLeftPixel();
102   const int i_stride = in.stride();
103   tran_low_t *o = out->TopLeftPixel();
104   if (size == 4) {
105     vpx_fdct4x4_c(i, o, i_stride);
106   } else if (size == 8) {
107     vpx_fdct8x8_c(i, o, i_stride);
108   } else if (size == 16) {
109     vpx_fdct16x16_c(i, o, i_stride);
110   } else if (size == 32) {
111     vpx_fdct32x32_c(i, o, i_stride);
112   }
113 }
114 
fht_ref(const Buffer<int16_t> & in,Buffer<tran_low_t> * out,int size,int tx_type)115 void fht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
116              int tx_type) {
117   const int16_t *i = in.TopLeftPixel();
118   const int i_stride = in.stride();
119   tran_low_t *o = out->TopLeftPixel();
120   if (size == 4) {
121     vp9_fht4x4_c(i, o, i_stride, tx_type);
122   } else if (size == 8) {
123     vp9_fht8x8_c(i, o, i_stride, tx_type);
124   } else if (size == 16) {
125     vp9_fht16x16_c(i, o, i_stride, tx_type);
126   }
127 }
128 
fwht_ref(const Buffer<int16_t> & in,Buffer<tran_low_t> * out,int size,int)129 void fwht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
130               int /*tx_type*/) {
131   ASSERT_EQ(size, 4);
132   vp9_fwht4x4_c(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
133 }
134 
135 class TransTestBase : public ::testing::TestWithParam<DctParam> {
136  public:
SetUp()137   void SetUp() override {
138     rnd_.Reset(ACMRandom::DeterministicSeed());
139     const int idx = GET_PARAM(0);
140     const FuncInfo *func_info = &(GET_PARAM(1)[idx]);
141     tx_type_ = GET_PARAM(2);
142     bit_depth_ = GET_PARAM(3);
143     fwd_txfm_ = func_info->ft_func;
144     inv_txfm_ = func_info->it_func;
145     size_ = func_info->size;
146     pixel_size_ = func_info->pixel_size;
147     max_pixel_value_ = (1 << bit_depth_) - 1;
148 
149     // Randomize stride_ to a value less than or equal to 1024
150     stride_ = rnd_(1024) + 1;
151     if (stride_ < size_) {
152       stride_ = size_;
153     }
154     // Align stride_ to 16 if it's bigger than 16.
155     if (stride_ > 16) {
156       stride_ &= ~15;
157     }
158 
159     block_size_ = size_ * stride_;
160 
161     src_ = reinterpret_cast<uint8_t *>(
162         vpx_memalign(16, pixel_size_ * block_size_));
163     ASSERT_NE(src_, nullptr);
164     dst_ = reinterpret_cast<uint8_t *>(
165         vpx_memalign(16, pixel_size_ * block_size_));
166     ASSERT_NE(dst_, nullptr);
167   }
168 
TearDown()169   void TearDown() override {
170     vpx_free(src_);
171     src_ = nullptr;
172     vpx_free(dst_);
173     dst_ = nullptr;
174     libvpx_test::ClearSystemState();
175   }
176 
InitMem()177   void InitMem() {
178     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
179     if (pixel_size_ == 1) {
180       for (int j = 0; j < block_size_; ++j) {
181         src_[j] = rnd_.Rand16() & max_pixel_value_;
182       }
183       for (int j = 0; j < block_size_; ++j) {
184         dst_[j] = rnd_.Rand16() & max_pixel_value_;
185       }
186     } else {
187       ASSERT_EQ(pixel_size_, 2);
188       uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
189       uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
190       for (int j = 0; j < block_size_; ++j) {
191         src[j] = rnd_.Rand16() & max_pixel_value_;
192       }
193       for (int j = 0; j < block_size_; ++j) {
194         dst[j] = rnd_.Rand16() & max_pixel_value_;
195       }
196     }
197   }
198 
RunFwdTxfm(const Buffer<int16_t> & in,Buffer<tran_low_t> * out)199   void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
200     fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_);
201   }
202 
RunInvTxfm(const Buffer<tran_low_t> & in,uint8_t * out)203   void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
204     inv_txfm_(in.TopLeftPixel(), out, stride_, tx_type_, bit_depth_);
205   }
206 
207  protected:
RunAccuracyCheck(int limit)208   void RunAccuracyCheck(int limit) {
209     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
210     ACMRandom rnd(ACMRandom::DeterministicSeed());
211     Buffer<int16_t> test_input_block =
212         Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
213     ASSERT_TRUE(test_input_block.Init());
214     ASSERT_NE(test_input_block.TopLeftPixel(), nullptr);
215     Buffer<tran_low_t> test_temp_block =
216         Buffer<tran_low_t>(size_, size_, 0, 16);
217     ASSERT_TRUE(test_temp_block.Init());
218     uint32_t max_error = 0;
219     int64_t total_error = 0;
220     const int count_test_block = 10000;
221     for (int i = 0; i < count_test_block; ++i) {
222       InitMem();
223       for (int h = 0; h < size_; ++h) {
224         for (int w = 0; w < size_; ++w) {
225           if (pixel_size_ == 1) {
226             test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
227                 src_[h * stride_ + w] - dst_[h * stride_ + w];
228           } else {
229             ASSERT_EQ(pixel_size_, 2);
230             const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
231             const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
232             test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
233                 src[h * stride_ + w] - dst[h * stride_ + w];
234           }
235         }
236       }
237 
238       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, &test_temp_block));
239       ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst_));
240 
241       for (int h = 0; h < size_; ++h) {
242         for (int w = 0; w < size_; ++w) {
243           int diff;
244           if (pixel_size_ == 1) {
245             diff = dst_[h * stride_ + w] - src_[h * stride_ + w];
246           } else {
247             ASSERT_EQ(pixel_size_, 2);
248             const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
249             const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
250             diff = dst[h * stride_ + w] - src[h * stride_ + w];
251           }
252           const uint32_t error = diff * diff;
253           if (max_error < error) max_error = error;
254           total_error += error;
255         }
256       }
257     }
258 
259     EXPECT_GE(static_cast<uint32_t>(limit), max_error)
260         << "Error: " << size_ << "x" << size_
261         << " transform/inverse transform has an individual round trip error > "
262         << limit;
263 
264     EXPECT_GE(count_test_block * limit, total_error)
265         << "Error: " << size_ << "x" << size_
266         << " transform/inverse transform has average round trip error > "
267         << limit << " per block";
268   }
269 
RunCoeffCheck()270   void RunCoeffCheck() {
271     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
272     ACMRandom rnd(ACMRandom::DeterministicSeed());
273     const int count_test_block = 5000;
274     Buffer<int16_t> input_block =
275         Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
276     ASSERT_TRUE(input_block.Init());
277     Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
278     ASSERT_TRUE(output_ref_block.Init());
279     Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
280     ASSERT_TRUE(output_block.Init());
281 
282     for (int i = 0; i < count_test_block; ++i) {
283       // Initialize a test block with input range [-max_pixel_value_,
284       // max_pixel_value_].
285       input_block.Set(&rnd, -max_pixel_value_, max_pixel_value_);
286 
287       fwd_txfm_ref(input_block, &output_ref_block, size_, tx_type_);
288       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, &output_block));
289 
290       // The minimum quant value is 4.
291       EXPECT_TRUE(output_block.CheckValues(output_ref_block));
292       if (::testing::Test::HasFailure()) {
293         printf("Size: %d Transform type: %d\n", size_, tx_type_);
294         output_block.PrintDifference(output_ref_block);
295         return;
296       }
297     }
298   }
299 
RunMemCheck()300   void RunMemCheck() {
301     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
302     ACMRandom rnd(ACMRandom::DeterministicSeed());
303     const int count_test_block = 5000;
304     Buffer<int16_t> input_extreme_block =
305         Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
306     ASSERT_TRUE(input_extreme_block.Init());
307     Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
308     ASSERT_TRUE(output_ref_block.Init());
309     Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
310     ASSERT_TRUE(output_block.Init());
311 
312     for (int i = 0; i < count_test_block; ++i) {
313       // Initialize a test block with -max_pixel_value_ or max_pixel_value_.
314       if (i == 0) {
315         input_extreme_block.Set(max_pixel_value_);
316       } else if (i == 1) {
317         input_extreme_block.Set(-max_pixel_value_);
318       } else {
319         ASSERT_NE(input_extreme_block.TopLeftPixel(), nullptr);
320         for (int h = 0; h < size_; ++h) {
321           for (int w = 0; w < size_; ++w) {
322             input_extreme_block
323                 .TopLeftPixel()[h * input_extreme_block.stride() + w] =
324                 rnd.Rand8() % 2 ? max_pixel_value_ : -max_pixel_value_;
325           }
326         }
327       }
328 
329       fwd_txfm_ref(input_extreme_block, &output_ref_block, size_, tx_type_);
330       ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, &output_block));
331 
332       // The minimum quant value is 4.
333       EXPECT_TRUE(output_block.CheckValues(output_ref_block));
334       ASSERT_NE(output_block.TopLeftPixel(), nullptr);
335       for (int h = 0; h < size_; ++h) {
336         for (int w = 0; w < size_; ++w) {
337           EXPECT_GE(
338               4 * DCT_MAX_VALUE << (bit_depth_ - 8),
339               abs(output_block.TopLeftPixel()[h * output_block.stride() + w]))
340               << "Error: " << size_ << "x" << size_
341               << " transform has coefficient larger than 4*DCT_MAX_VALUE"
342               << " at " << w << "," << h;
343           if (::testing::Test::HasFailure()) {
344             printf("Size: %d Transform type: %d\n", size_, tx_type_);
345             output_block.DumpBuffer();
346             return;
347           }
348         }
349       }
350     }
351   }
352 
RunInvAccuracyCheck(int limit)353   void RunInvAccuracyCheck(int limit) {
354     if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
355     ACMRandom rnd(ACMRandom::DeterministicSeed());
356     const int count_test_block = 1000;
357     Buffer<int16_t> in = Buffer<int16_t>(size_, size_, 4);
358     ASSERT_TRUE(in.Init());
359     Buffer<tran_low_t> coeff = Buffer<tran_low_t>(size_, size_, 0, 16);
360     ASSERT_TRUE(coeff.Init());
361 
362     for (int i = 0; i < count_test_block; ++i) {
363       InitMem();
364       ASSERT_NE(in.TopLeftPixel(), nullptr);
365       // Initialize a test block with input range [-max_pixel_value_,
366       // max_pixel_value_].
367       for (int h = 0; h < size_; ++h) {
368         for (int w = 0; w < size_; ++w) {
369           if (pixel_size_ == 1) {
370             in.TopLeftPixel()[h * in.stride() + w] =
371                 src_[h * stride_ + w] - dst_[h * stride_ + w];
372           } else {
373             ASSERT_EQ(pixel_size_, 2);
374             const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
375             const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
376             in.TopLeftPixel()[h * in.stride() + w] =
377                 src[h * stride_ + w] - dst[h * stride_ + w];
378           }
379         }
380       }
381 
382       fwd_txfm_ref(in, &coeff, size_, tx_type_);
383 
384       ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst_));
385 
386       for (int h = 0; h < size_; ++h) {
387         for (int w = 0; w < size_; ++w) {
388           int diff;
389           if (pixel_size_ == 1) {
390             diff = dst_[h * stride_ + w] - src_[h * stride_ + w];
391           } else {
392             ASSERT_EQ(pixel_size_, 2);
393             const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
394             const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
395             diff = dst[h * stride_ + w] - src[h * stride_ + w];
396           }
397           const uint32_t error = diff * diff;
398           EXPECT_GE(static_cast<uint32_t>(limit), error)
399               << "Error: " << size_ << "x" << size_
400               << " inverse transform has error " << error << " at " << w << ","
401               << h;
402           if (::testing::Test::HasFailure()) {
403             printf("Size: %d Transform type: %d\n", size_, tx_type_);
404             return;
405           }
406         }
407       }
408     }
409   }
410 
411   FhtFunc fwd_txfm_;
412   FhtFuncRef fwd_txfm_ref;
413   IhtWithBdFunc inv_txfm_;
414   ACMRandom rnd_;
415   uint8_t *src_;
416   uint8_t *dst_;
417   vpx_bit_depth_t bit_depth_;
418   int tx_type_;
419   int max_pixel_value_;
420   int size_;
421   int stride_;
422   int pixel_size_;
423   int block_size_;
424 };
425 
426 /* -------------------------------------------------------------------------- */
427 
428 class TransDCT : public TransTestBase {
429  public:
TransDCT()430   TransDCT() { fwd_txfm_ref = fdct_ref; }
431 };
432 
TEST_P(TransDCT,AccuracyCheck)433 TEST_P(TransDCT, AccuracyCheck) {
434   int t = 1;
435   if (size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2) {
436     t = 2;
437   } else if (size_ == 32 && bit_depth_ > 10 && pixel_size_ == 2) {
438     t = 7;
439   }
440   RunAccuracyCheck(t);
441 }
442 
TEST_P(TransDCT,CoeffCheck)443 TEST_P(TransDCT, CoeffCheck) { RunCoeffCheck(); }
444 
TEST_P(TransDCT,MemCheck)445 TEST_P(TransDCT, MemCheck) { RunMemCheck(); }
446 
TEST_P(TransDCT,InvAccuracyCheck)447 TEST_P(TransDCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
448 
449 static const FuncInfo dct_c_func_info[] = {
450 #if CONFIG_VP9_HIGHBITDEPTH
451   { &fdct_wrapper<vpx_highbd_fdct4x4_c>,
452     &highbd_idct_wrapper<vpx_highbd_idct4x4_16_add_c>, 4, 2 },
453   { &fdct_wrapper<vpx_highbd_fdct8x8_c>,
454     &highbd_idct_wrapper<vpx_highbd_idct8x8_64_add_c>, 8, 2 },
455   { &fdct_wrapper<vpx_highbd_fdct16x16_c>,
456     &highbd_idct_wrapper<vpx_highbd_idct16x16_256_add_c>, 16, 2 },
457   { &fdct_wrapper<vpx_highbd_fdct32x32_c>,
458     &highbd_idct_wrapper<vpx_highbd_idct32x32_1024_add_c>, 32, 2 },
459 #endif
460   { &fdct_wrapper<vpx_fdct4x4_c>, &idct_wrapper<vpx_idct4x4_16_add_c>, 4, 1 },
461   { &fdct_wrapper<vpx_fdct8x8_c>, &idct_wrapper<vpx_idct8x8_64_add_c>, 8, 1 },
462   { &fdct_wrapper<vpx_fdct16x16_c>, &idct_wrapper<vpx_idct16x16_256_add_c>, 16,
463     1 },
464   { &fdct_wrapper<vpx_fdct32x32_c>, &idct_wrapper<vpx_idct32x32_1024_add_c>, 32,
465     1 }
466 };
467 
468 INSTANTIATE_TEST_SUITE_P(
469     C, TransDCT,
470     ::testing::Combine(
471         ::testing::Range(0, static_cast<int>(sizeof(dct_c_func_info) /
472                                              sizeof(dct_c_func_info[0]))),
473         ::testing::Values(dct_c_func_info), ::testing::Values(0),
474         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
475 
476 #if !CONFIG_EMULATE_HARDWARE
477 
478 #if HAVE_SSE2
479 static const FuncInfo dct_sse2_func_info[] = {
480 #if CONFIG_VP9_HIGHBITDEPTH
481   { &fdct_wrapper<vpx_highbd_fdct4x4_sse2>,
482     &highbd_idct_wrapper<vpx_highbd_idct4x4_16_add_sse2>, 4, 2 },
483   { &fdct_wrapper<vpx_highbd_fdct8x8_sse2>,
484     &highbd_idct_wrapper<vpx_highbd_idct8x8_64_add_sse2>, 8, 2 },
485   { &fdct_wrapper<vpx_highbd_fdct16x16_sse2>,
486     &highbd_idct_wrapper<vpx_highbd_idct16x16_256_add_sse2>, 16, 2 },
487   { &fdct_wrapper<vpx_highbd_fdct32x32_sse2>,
488     &highbd_idct_wrapper<vpx_highbd_idct32x32_1024_add_sse2>, 32, 2 },
489 #endif
490   { &fdct_wrapper<vpx_fdct4x4_sse2>, &idct_wrapper<vpx_idct4x4_16_add_sse2>, 4,
491     1 },
492   { &fdct_wrapper<vpx_fdct8x8_sse2>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8,
493     1 },
494   { &fdct_wrapper<vpx_fdct16x16_sse2>,
495     &idct_wrapper<vpx_idct16x16_256_add_sse2>, 16, 1 },
496   { &fdct_wrapper<vpx_fdct32x32_sse2>,
497     &idct_wrapper<vpx_idct32x32_1024_add_sse2>, 32, 1 }
498 };
499 
500 INSTANTIATE_TEST_SUITE_P(
501     SSE2, TransDCT,
502     ::testing::Combine(
503         ::testing::Range(0, static_cast<int>(sizeof(dct_sse2_func_info) /
504                                              sizeof(dct_sse2_func_info[0]))),
505         ::testing::Values(dct_sse2_func_info), ::testing::Values(0),
506         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
507 #endif  // HAVE_SSE2
508 
509 #if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
510 // vpx_fdct8x8_ssse3 is only available in 64 bit builds.
511 static const FuncInfo dct_ssse3_func_info = {
512   &fdct_wrapper<vpx_fdct8x8_ssse3>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8, 1
513 };
514 
515 // TODO(johannkoenig): high bit depth fdct8x8.
516 INSTANTIATE_TEST_SUITE_P(SSSE3, TransDCT,
517                          ::testing::Values(make_tuple(0, &dct_ssse3_func_info,
518                                                       0, VPX_BITS_8)));
519 #endif  // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
520 
521 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
522 static const FuncInfo dct_avx2_func_info = {
523   &fdct_wrapper<vpx_fdct32x32_avx2>, &idct_wrapper<vpx_idct32x32_1024_add_sse2>,
524   32, 1
525 };
526 
527 // TODO(johannkoenig): high bit depth fdct32x32.
528 INSTANTIATE_TEST_SUITE_P(AVX2, TransDCT,
529                          ::testing::Values(make_tuple(0, &dct_avx2_func_info, 0,
530                                                       VPX_BITS_8)));
531 #endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
532 
533 #if HAVE_NEON
534 #if CONFIG_VP9_HIGHBITDEPTH
535 static const FuncInfo dct_neon_func_info[] = {
536   { &fdct_wrapper<vpx_highbd_fdct4x4_neon>,
537     &highbd_idct_wrapper<vpx_highbd_idct4x4_16_add_neon>, 4, 2 },
538   { &fdct_wrapper<vpx_highbd_fdct8x8_neon>,
539     &highbd_idct_wrapper<vpx_highbd_idct8x8_64_add_neon>, 8, 2 },
540   { &fdct_wrapper<vpx_highbd_fdct16x16_neon>,
541     &highbd_idct_wrapper<vpx_highbd_idct16x16_256_add_neon>, 16, 2 },
542   /* { &fdct_wrapper<vpx_highbd_fdct32x32_neon>,
543        &highbd_idct_wrapper<vpx_highbd_idct32x32_1024_add_neon>, 32, 2 },*/
544 };
545 #else
546 static const FuncInfo dct_neon_func_info[4] = {
547   { &fdct_wrapper<vpx_fdct4x4_neon>, &idct_wrapper<vpx_idct4x4_16_add_neon>, 4,
548     1 },
549   { &fdct_wrapper<vpx_fdct8x8_neon>, &idct_wrapper<vpx_idct8x8_64_add_neon>, 8,
550     1 },
551   { &fdct_wrapper<vpx_fdct16x16_neon>,
552     &idct_wrapper<vpx_idct16x16_256_add_neon>, 16, 1 },
553   { &fdct_wrapper<vpx_fdct32x32_neon>,
554     &idct_wrapper<vpx_idct32x32_1024_add_neon>, 32, 1 }
555 };
556 #endif  // CONFIG_VP9_HIGHBITDEPTH
557 
558 INSTANTIATE_TEST_SUITE_P(
559     NEON, TransDCT,
560     ::testing::Combine(
561         ::testing::Range(0, static_cast<int>(sizeof(dct_neon_func_info) /
562                                              sizeof(dct_neon_func_info[0]))),
563         ::testing::Values(dct_neon_func_info), ::testing::Values(0),
564         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
565 #endif  // HAVE_NEON
566 
567 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH
568 static const FuncInfo dct_msa_func_info[4] = {
569   { &fdct_wrapper<vpx_fdct4x4_msa>, &idct_wrapper<vpx_idct4x4_16_add_msa>, 4,
570     1 },
571   { &fdct_wrapper<vpx_fdct8x8_msa>, &idct_wrapper<vpx_idct8x8_64_add_msa>, 8,
572     1 },
573   { &fdct_wrapper<vpx_fdct16x16_msa>, &idct_wrapper<vpx_idct16x16_256_add_msa>,
574     16, 1 },
575   { &fdct_wrapper<vpx_fdct32x32_msa>, &idct_wrapper<vpx_idct32x32_1024_add_msa>,
576     32, 1 }
577 };
578 
579 INSTANTIATE_TEST_SUITE_P(
580     MSA, TransDCT,
581     ::testing::Combine(::testing::Range(0, 4),
582                        ::testing::Values(dct_msa_func_info),
583                        ::testing::Values(0), ::testing::Values(VPX_BITS_8)));
584 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH
585 
586 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
587 static const FuncInfo dct_vsx_func_info = {
588   &fdct_wrapper<vpx_fdct4x4_c>, &idct_wrapper<vpx_idct4x4_16_add_vsx>, 4, 1
589 };
590 
591 INSTANTIATE_TEST_SUITE_P(VSX, TransDCT,
592                          ::testing::Values(make_tuple(0, &dct_vsx_func_info, 0,
593                                                       VPX_BITS_8)));
594 #endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH &&
595 
596 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH
597 static const FuncInfo dct_lsx_func_info[4] = {
598   { &fdct_wrapper<vpx_fdct4x4_lsx>, &idct_wrapper<vpx_idct4x4_16_add_c>, 4, 1 },
599   { &fdct_wrapper<vpx_fdct8x8_lsx>, &idct_wrapper<vpx_idct8x8_64_add_c>, 8, 1 },
600   { &fdct_wrapper<vpx_fdct16x16_lsx>, &idct_wrapper<vpx_idct16x16_256_add_c>,
601     16, 1 },
602   { &fdct_wrapper<vpx_fdct32x32_lsx>, &idct_wrapper<vpx_idct32x32_1024_add_lsx>,
603     32, 1 }
604 };
605 
606 INSTANTIATE_TEST_SUITE_P(
607     LSX, TransDCT,
608     ::testing::Combine(::testing::Range(0, 4),
609                        ::testing::Values(dct_lsx_func_info),
610                        ::testing::Values(0), ::testing::Values(VPX_BITS_8)));
611 #endif  // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH
612 
613 #endif  // !CONFIG_EMULATE_HARDWARE
614 
615 /* -------------------------------------------------------------------------- */
616 
617 class TransHT : public TransTestBase {
618  public:
TransHT()619   TransHT() { fwd_txfm_ref = fht_ref; }
620 };
621 
TEST_P(TransHT,AccuracyCheck)622 TEST_P(TransHT, AccuracyCheck) {
623   RunAccuracyCheck(size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2 ? 2 : 1);
624 }
625 
TEST_P(TransHT,CoeffCheck)626 TEST_P(TransHT, CoeffCheck) { RunCoeffCheck(); }
627 
TEST_P(TransHT,MemCheck)628 TEST_P(TransHT, MemCheck) { RunMemCheck(); }
629 
TEST_P(TransHT,InvAccuracyCheck)630 TEST_P(TransHT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
631 
632 static const FuncInfo ht_c_func_info[] = {
633 #if CONFIG_VP9_HIGHBITDEPTH
634   { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_c>, 4,
635     2 },
636   { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_c>, 8,
637     2 },
638   { &vp9_highbd_fht16x16_c, &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_c>,
639     16, 2 },
640 #endif
641   { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_c>, 4, 1 },
642   { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_c>, 8, 1 },
643   { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_c>, 16, 1 }
644 };
645 
646 INSTANTIATE_TEST_SUITE_P(
647     C, TransHT,
648     ::testing::Combine(
649         ::testing::Range(0, static_cast<int>(sizeof(ht_c_func_info) /
650                                              sizeof(ht_c_func_info[0]))),
651         ::testing::Values(ht_c_func_info), ::testing::Range(0, 4),
652         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
653 
654 #if !CONFIG_EMULATE_HARDWARE
655 
656 #if HAVE_NEON
657 
658 static const FuncInfo ht_neon_func_info[] = {
659 #if CONFIG_VP9_HIGHBITDEPTH
660   { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_neon>, 4,
661     2 },
662   { &vp9_highbd_fht4x4_neon, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_neon>,
663     4, 2 },
664   { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_neon>, 8,
665     2 },
666   { &vp9_highbd_fht8x8_neon, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_neon>,
667     8, 2 },
668   { &vp9_highbd_fht16x16_c,
669     &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_neon>, 16, 2 },
670   { &vp9_highbd_fht16x16_neon,
671     &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_neon>, 16, 2 },
672 #endif
673   { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_neon>, 4, 1 },
674   { &vp9_fht4x4_neon, &iht_wrapper<vp9_iht4x4_16_add_neon>, 4, 1 },
675   { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_neon>, 8, 1 },
676   { &vp9_fht8x8_neon, &iht_wrapper<vp9_iht8x8_64_add_neon>, 8, 1 },
677   { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_neon>, 16, 1 },
678   { &vp9_fht16x16_neon, &iht_wrapper<vp9_iht16x16_256_add_neon>, 16, 1 }
679 };
680 
681 INSTANTIATE_TEST_SUITE_P(
682     NEON, TransHT,
683     ::testing::Combine(
684         ::testing::Range(0, static_cast<int>(sizeof(ht_neon_func_info) /
685                                              sizeof(ht_neon_func_info[0]))),
686         ::testing::Values(ht_neon_func_info), ::testing::Range(0, 4),
687         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
688 #endif  // HAVE_NEON
689 
690 #if HAVE_SSE2
691 
692 static const FuncInfo ht_sse2_func_info[3] = {
693   { &vp9_fht4x4_sse2, &iht_wrapper<vp9_iht4x4_16_add_sse2>, 4, 1 },
694   { &vp9_fht8x8_sse2, &iht_wrapper<vp9_iht8x8_64_add_sse2>, 8, 1 },
695   { &vp9_fht16x16_sse2, &iht_wrapper<vp9_iht16x16_256_add_sse2>, 16, 1 }
696 };
697 
698 INSTANTIATE_TEST_SUITE_P(
699     SSE2, TransHT,
700     ::testing::Combine(::testing::Range(0, 3),
701                        ::testing::Values(ht_sse2_func_info),
702                        ::testing::Range(0, 4), ::testing::Values(VPX_BITS_8)));
703 #endif  // HAVE_SSE2
704 
705 #if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
706 static const FuncInfo ht_sse4_1_func_info[3] = {
707   { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_sse4_1>,
708     4, 2 },
709   { vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_sse4_1>,
710     8, 2 },
711   { &vp9_highbd_fht16x16_c,
712     &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_sse4_1>, 16, 2 }
713 };
714 
715 INSTANTIATE_TEST_SUITE_P(
716     SSE4_1, TransHT,
717     ::testing::Combine(::testing::Range(0, 3),
718                        ::testing::Values(ht_sse4_1_func_info),
719                        ::testing::Range(0, 4),
720                        ::testing::Values(VPX_BITS_8, VPX_BITS_10,
721                                          VPX_BITS_12)));
722 #endif  // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
723 
724 #if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
725 static const FuncInfo ht_vsx_func_info[3] = {
726   { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_vsx>, 4, 1 },
727   { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_vsx>, 8, 1 },
728   { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_vsx>, 16, 1 }
729 };
730 
731 INSTANTIATE_TEST_SUITE_P(VSX, TransHT,
732                          ::testing::Combine(::testing::Range(0, 3),
733                                             ::testing::Values(ht_vsx_func_info),
734                                             ::testing::Range(0, 4),
735                                             ::testing::Values(VPX_BITS_8)));
736 #endif  // HAVE_VSX
737 #endif  // !CONFIG_EMULATE_HARDWARE
738 
739 /* -------------------------------------------------------------------------- */
740 
741 class TransWHT : public TransTestBase {
742  public:
TransWHT()743   TransWHT() { fwd_txfm_ref = fwht_ref; }
744 };
745 
TEST_P(TransWHT,AccuracyCheck)746 TEST_P(TransWHT, AccuracyCheck) { RunAccuracyCheck(0); }
747 
TEST_P(TransWHT,CoeffCheck)748 TEST_P(TransWHT, CoeffCheck) { RunCoeffCheck(); }
749 
TEST_P(TransWHT,MemCheck)750 TEST_P(TransWHT, MemCheck) { RunMemCheck(); }
751 
TEST_P(TransWHT,InvAccuracyCheck)752 TEST_P(TransWHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
753 
754 static const FuncInfo wht_c_func_info[] = {
755 #if CONFIG_VP9_HIGHBITDEPTH
756   { &fdct_wrapper<vp9_highbd_fwht4x4_c>,
757     &highbd_idct_wrapper<vpx_highbd_iwht4x4_16_add_c>, 4, 2 },
758 #endif
759   { &fdct_wrapper<vp9_fwht4x4_c>, &idct_wrapper<vpx_iwht4x4_16_add_c>, 4, 1 }
760 };
761 
762 INSTANTIATE_TEST_SUITE_P(
763     C, TransWHT,
764     ::testing::Combine(
765         ::testing::Range(0, static_cast<int>(sizeof(wht_c_func_info) /
766                                              sizeof(wht_c_func_info[0]))),
767         ::testing::Values(wht_c_func_info), ::testing::Values(0),
768         ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
769 
770 #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
771 static const FuncInfo wht_sse2_func_info = {
772   &fdct_wrapper<vp9_fwht4x4_sse2>, &idct_wrapper<vpx_iwht4x4_16_add_sse2>, 4, 1
773 };
774 
775 INSTANTIATE_TEST_SUITE_P(SSE2, TransWHT,
776                          ::testing::Values(make_tuple(0, &wht_sse2_func_info, 0,
777                                                       VPX_BITS_8)));
778 #endif  // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
779 
780 #if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
781 static const FuncInfo wht_vsx_func_info = {
782   &fdct_wrapper<vp9_fwht4x4_c>, &idct_wrapper<vpx_iwht4x4_16_add_vsx>, 4, 1
783 };
784 
785 INSTANTIATE_TEST_SUITE_P(VSX, TransWHT,
786                          ::testing::Values(make_tuple(0, &wht_vsx_func_info, 0,
787                                                       VPX_BITS_8)));
788 #endif  // HAVE_VSX && !CONFIG_EMULATE_HARDWARE
789 
790 }  // namespace
791