1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15
16 #include "third_party/googletest/src/include/gtest/gtest.h"
17
18 #include "./vp9_rtcd.h"
19 #include "./vpx_config.h"
20 #include "./vpx_dsp_rtcd.h"
21 #include "test/acm_random.h"
22 #include "test/bench.h"
23 #include "test/clear_system_state.h"
24 #include "test/register_state_check.h"
25 #include "test/util.h"
26 #include "vp9/common/vp9_entropy.h"
27 #include "vp9/common/vp9_scan.h"
28 #include "vpx/vpx_codec.h"
29 #include "vpx/vpx_integer.h"
30 #include "vpx_ports/mem.h"
31 #include "vpx_ports/msvc.h" // for round()
32 #include "vpx_ports/vpx_timer.h"
33
34 using libvpx_test::ACMRandom;
35
36 namespace {
37
38 const int kNumCoeffs = 1024;
39 const double kPi = 3.141592653589793238462643383279502884;
reference_32x32_dct_1d(const double in[32],double out[32])40 void reference_32x32_dct_1d(const double in[32], double out[32]) {
41 const double kInvSqrt2 = 0.707106781186547524400844362104;
42 for (int k = 0; k < 32; k++) {
43 out[k] = 0.0;
44 for (int n = 0; n < 32; n++) {
45 out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
46 }
47 if (k == 0) out[k] = out[k] * kInvSqrt2;
48 }
49 }
50
reference_32x32_dct_2d(const int16_t input[kNumCoeffs],double output[kNumCoeffs])51 void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
52 double output[kNumCoeffs]) {
53 // First transform columns
54 for (int i = 0; i < 32; ++i) {
55 double temp_in[32], temp_out[32];
56 for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i];
57 reference_32x32_dct_1d(temp_in, temp_out);
58 for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j];
59 }
60 // Then transform rows
61 for (int i = 0; i < 32; ++i) {
62 double temp_in[32], temp_out[32];
63 for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
64 reference_32x32_dct_1d(temp_in, temp_out);
65 // Scale by some magic number
66 for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4;
67 }
68 }
69
70 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
71 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
72
73 typedef std::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
74 Trans32x32Param;
75
76 typedef std::tuple<InvTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t, int, int>
77 InvTrans32x32Param;
78
79 #if CONFIG_VP9_HIGHBITDEPTH
idct32x32_10(const tran_low_t * in,uint8_t * out,int stride)80 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
81 vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
82 }
83
idct32x32_12(const tran_low_t * in,uint8_t * out,int stride)84 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
85 vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
86 }
87 #endif // CONFIG_VP9_HIGHBITDEPTH
88
89 class Trans32x32Test : public AbstractBench,
90 public ::testing::TestWithParam<Trans32x32Param> {
91 public:
92 ~Trans32x32Test() override = default;
SetUp()93 void SetUp() override {
94 fwd_txfm_ = GET_PARAM(0);
95 inv_txfm_ = GET_PARAM(1);
96 version_ = GET_PARAM(2); // 0: high precision forward transform
97 // 1: low precision version for rd loop
98 bit_depth_ = GET_PARAM(3);
99 mask_ = (1 << bit_depth_) - 1;
100 }
101
TearDown()102 void TearDown() override { libvpx_test::ClearSystemState(); }
103
104 protected:
105 int version_;
106 vpx_bit_depth_t bit_depth_;
107 int mask_;
108 FwdTxfmFunc fwd_txfm_;
109 InvTxfmFunc inv_txfm_;
110
111 int16_t *bench_in_;
112 tran_low_t *bench_out_;
113 void Run() override;
114 };
115
Run()116 void Trans32x32Test::Run() { fwd_txfm_(bench_in_, bench_out_, 32); }
117
TEST_P(Trans32x32Test,AccuracyCheck)118 TEST_P(Trans32x32Test, AccuracyCheck) {
119 ACMRandom rnd(ACMRandom::DeterministicSeed());
120 uint32_t max_error = 0;
121 int64_t total_error = 0;
122 const int count_test_block = 10000;
123 DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
124 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
125 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
126 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
127 #if CONFIG_VP9_HIGHBITDEPTH
128 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
129 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
130 #endif
131
132 for (int i = 0; i < count_test_block; ++i) {
133 // Initialize a test block with input range [-mask_, mask_].
134 for (int j = 0; j < kNumCoeffs; ++j) {
135 if (bit_depth_ == VPX_BITS_8) {
136 src[j] = rnd.Rand8();
137 dst[j] = rnd.Rand8();
138 test_input_block[j] = src[j] - dst[j];
139 #if CONFIG_VP9_HIGHBITDEPTH
140 } else {
141 src16[j] = rnd.Rand16() & mask_;
142 dst16[j] = rnd.Rand16() & mask_;
143 test_input_block[j] = src16[j] - dst16[j];
144 #endif
145 }
146 }
147
148 ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
149 if (bit_depth_ == VPX_BITS_8) {
150 ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
151 #if CONFIG_VP9_HIGHBITDEPTH
152 } else {
153 ASM_REGISTER_STATE_CHECK(
154 inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32));
155 #endif
156 }
157
158 for (int j = 0; j < kNumCoeffs; ++j) {
159 #if CONFIG_VP9_HIGHBITDEPTH
160 const int32_t diff =
161 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
162 #else
163 const int32_t diff = dst[j] - src[j];
164 #endif
165 const uint32_t error = diff * diff;
166 if (max_error < error) max_error = error;
167 total_error += error;
168 }
169 }
170
171 if (version_ == 1) {
172 max_error /= 2;
173 total_error /= 45;
174 }
175
176 EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
177 << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
178
179 EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
180 << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
181 }
182
TEST_P(Trans32x32Test,CoeffCheck)183 TEST_P(Trans32x32Test, CoeffCheck) {
184 ACMRandom rnd(ACMRandom::DeterministicSeed());
185 const int count_test_block = 1000;
186
187 DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
188 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
189 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
190
191 for (int i = 0; i < count_test_block; ++i) {
192 for (int j = 0; j < kNumCoeffs; ++j) {
193 input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
194 }
195
196 const int stride = 32;
197 vpx_fdct32x32_c(input_block, output_ref_block, stride);
198 ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
199
200 if (version_ == 0) {
201 for (int j = 0; j < kNumCoeffs; ++j)
202 EXPECT_EQ(output_block[j], output_ref_block[j])
203 << "Error: 32x32 FDCT versions have mismatched coefficients";
204 } else {
205 for (int j = 0; j < kNumCoeffs; ++j)
206 EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
207 << "Error: 32x32 FDCT rd has mismatched coefficients";
208 }
209 }
210 }
211
TEST_P(Trans32x32Test,MemCheck)212 TEST_P(Trans32x32Test, MemCheck) {
213 ACMRandom rnd(ACMRandom::DeterministicSeed());
214 const int count_test_block = 2000;
215
216 DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
217 DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
218 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
219
220 for (int i = 0; i < count_test_block; ++i) {
221 // Initialize a test block with input range [-mask_, mask_].
222 for (int j = 0; j < kNumCoeffs; ++j) {
223 input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
224 }
225 if (i == 0) {
226 for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
227 } else if (i == 1) {
228 for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
229 }
230
231 const int stride = 32;
232 vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
233 ASM_REGISTER_STATE_CHECK(
234 fwd_txfm_(input_extreme_block, output_block, stride));
235
236 // The minimum quant value is 4.
237 for (int j = 0; j < kNumCoeffs; ++j) {
238 if (version_ == 0) {
239 EXPECT_EQ(output_block[j], output_ref_block[j])
240 << "Error: 32x32 FDCT versions have mismatched coefficients";
241 } else {
242 EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
243 << "Error: 32x32 FDCT rd has mismatched coefficients";
244 }
245 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
246 << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
247 EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
248 << "Error: 32x32 FDCT has coefficient larger than "
249 << "4*DCT_MAX_VALUE";
250 }
251 }
252 }
253
TEST_P(Trans32x32Test,DISABLED_Speed)254 TEST_P(Trans32x32Test, DISABLED_Speed) {
255 ACMRandom rnd(ACMRandom::DeterministicSeed());
256
257 DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
258 DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
259
260 bench_in_ = input_extreme_block;
261 bench_out_ = output_block;
262
263 RunNTimes(INT16_MAX);
264 PrintMedian("32x32");
265 }
266
TEST_P(Trans32x32Test,InverseAccuracy)267 TEST_P(Trans32x32Test, InverseAccuracy) {
268 ACMRandom rnd(ACMRandom::DeterministicSeed());
269 const int count_test_block = 1000;
270 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
271 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
272 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
273 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
274 #if CONFIG_VP9_HIGHBITDEPTH
275 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
276 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
277 #endif
278
279 for (int i = 0; i < count_test_block; ++i) {
280 double out_r[kNumCoeffs];
281
282 // Initialize a test block with input range [-255, 255]
283 for (int j = 0; j < kNumCoeffs; ++j) {
284 if (bit_depth_ == VPX_BITS_8) {
285 src[j] = rnd.Rand8();
286 dst[j] = rnd.Rand8();
287 in[j] = src[j] - dst[j];
288 #if CONFIG_VP9_HIGHBITDEPTH
289 } else {
290 src16[j] = rnd.Rand16() & mask_;
291 dst16[j] = rnd.Rand16() & mask_;
292 in[j] = src16[j] - dst16[j];
293 #endif
294 }
295 }
296
297 reference_32x32_dct_2d(in, out_r);
298 for (int j = 0; j < kNumCoeffs; ++j) {
299 coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
300 }
301 if (bit_depth_ == VPX_BITS_8) {
302 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
303 #if CONFIG_VP9_HIGHBITDEPTH
304 } else {
305 ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32));
306 #endif
307 }
308 for (int j = 0; j < kNumCoeffs; ++j) {
309 #if CONFIG_VP9_HIGHBITDEPTH
310 const int diff =
311 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
312 #else
313 const int diff = dst[j] - src[j];
314 #endif
315 const int error = diff * diff;
316 EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error
317 << " at index " << j;
318 }
319 }
320 }
321
322 class InvTrans32x32Test : public ::testing::TestWithParam<InvTrans32x32Param> {
323 public:
324 ~InvTrans32x32Test() override = default;
SetUp()325 void SetUp() override {
326 ref_txfm_ = GET_PARAM(0);
327 inv_txfm_ = GET_PARAM(1);
328 version_ = GET_PARAM(2); // 0: high precision forward transform
329 // 1: low precision version for rd loop
330 bit_depth_ = GET_PARAM(3);
331 eob_ = GET_PARAM(4);
332 thresh_ = GET_PARAM(4);
333 mask_ = (1 << bit_depth_) - 1;
334 pitch_ = 32;
335 }
336
TearDown()337 void TearDown() override { libvpx_test::ClearSystemState(); }
338
339 protected:
RunRefTxfm(tran_low_t * out,uint8_t * dst,int stride)340 void RunRefTxfm(tran_low_t *out, uint8_t *dst, int stride) {
341 ref_txfm_(out, dst, stride);
342 }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)343 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
344 inv_txfm_(out, dst, stride);
345 }
346 int version_;
347 vpx_bit_depth_t bit_depth_;
348 int mask_;
349 int eob_;
350 int thresh_;
351
352 InvTxfmFunc ref_txfm_;
353 InvTxfmFunc inv_txfm_;
354 int pitch_;
355
RunInvTrans32x32SpeedTest()356 void RunInvTrans32x32SpeedTest() {
357 ACMRandom rnd(ACMRandom::DeterministicSeed());
358 const int count_test_block = 10000;
359 int64_t c_sum_time = 0;
360 int64_t simd_sum_time = 0;
361 const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
362 DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
363 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
364 DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
365 #if CONFIG_VP9_HIGHBITDEPTH
366 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
367 DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
368 #endif // CONFIG_VP9_HIGHBITDEPTH
369
370 for (int j = 0; j < kNumCoeffs; ++j) {
371 if (j < eob_) {
372 // Random values less than the threshold, either positive or negative
373 coeff[scan[j]] = rnd(thresh_);
374 } else {
375 coeff[scan[j]] = 0;
376 }
377 if (bit_depth_ == VPX_BITS_8) {
378 dst[j] = 0;
379 ref[j] = 0;
380 #if CONFIG_VP9_HIGHBITDEPTH
381 } else {
382 dst16[j] = 0;
383 ref16[j] = 0;
384 #endif // CONFIG_VP9_HIGHBITDEPTH
385 }
386 }
387
388 if (bit_depth_ == VPX_BITS_8) {
389 vpx_usec_timer timer_c;
390 vpx_usec_timer_start(&timer_c);
391 for (int i = 0; i < count_test_block; ++i) {
392 RunRefTxfm(coeff, ref, pitch_);
393 }
394 vpx_usec_timer_mark(&timer_c);
395 c_sum_time += vpx_usec_timer_elapsed(&timer_c);
396
397 vpx_usec_timer timer_mod;
398 vpx_usec_timer_start(&timer_mod);
399 for (int i = 0; i < count_test_block; ++i) {
400 RunInvTxfm(coeff, dst, pitch_);
401 }
402 vpx_usec_timer_mark(&timer_mod);
403 simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
404 } else {
405 #if CONFIG_VP9_HIGHBITDEPTH
406 vpx_usec_timer timer_c;
407 vpx_usec_timer_start(&timer_c);
408 for (int i = 0; i < count_test_block; ++i) {
409 RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
410 }
411 vpx_usec_timer_mark(&timer_c);
412 c_sum_time += vpx_usec_timer_elapsed(&timer_c);
413
414 vpx_usec_timer timer_mod;
415 vpx_usec_timer_start(&timer_mod);
416 for (int i = 0; i < count_test_block; ++i) {
417 RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_);
418 }
419 vpx_usec_timer_mark(&timer_mod);
420 simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
421 #endif // CONFIG_VP9_HIGHBITDEPTH
422 }
423 printf(
424 "c_time = %" PRId64 " \t simd_time = %" PRId64 " \t Gain = %4.2f \n",
425 c_sum_time, simd_sum_time,
426 (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
427 }
428
CompareInvReference32x32()429 void CompareInvReference32x32() {
430 ACMRandom rnd(ACMRandom::DeterministicSeed());
431 const int count_test_block = 10000;
432 const int eob = 31;
433 const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
434 DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
435 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
436 DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
437 #if CONFIG_VP9_HIGHBITDEPTH
438 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
439 DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
440 #endif // CONFIG_VP9_HIGHBITDEPTH
441
442 for (int i = 0; i < count_test_block; ++i) {
443 for (int j = 0; j < kNumCoeffs; ++j) {
444 if (j < eob) {
445 coeff[scan[j]] = rnd.Rand8Extremes();
446 } else {
447 coeff[scan[j]] = 0;
448 }
449 if (bit_depth_ == VPX_BITS_8) {
450 dst[j] = 0;
451 ref[j] = 0;
452 #if CONFIG_VP9_HIGHBITDEPTH
453 } else {
454 dst16[j] = 0;
455 ref16[j] = 0;
456 #endif // CONFIG_VP9_HIGHBITDEPTH
457 }
458 }
459 if (bit_depth_ == VPX_BITS_8) {
460 RunRefTxfm(coeff, ref, pitch_);
461 RunInvTxfm(coeff, dst, pitch_);
462 } else {
463 #if CONFIG_VP9_HIGHBITDEPTH
464 RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
465 ASM_REGISTER_STATE_CHECK(
466 RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
467 #endif // CONFIG_VP9_HIGHBITDEPTH
468 }
469
470 for (int j = 0; j < kNumCoeffs; ++j) {
471 #if CONFIG_VP9_HIGHBITDEPTH
472 const uint32_t diff =
473 bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
474 #else
475 const uint32_t diff = dst[j] - ref[j];
476 #endif // CONFIG_VP9_HIGHBITDEPTH
477 const uint32_t error = diff * diff;
478 EXPECT_EQ(0u, error) << "Error: 32x32 IDCT Comparison has error "
479 << error << " at index " << j;
480 }
481 }
482 }
483 };
484
485 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans32x32Test);
486
TEST_P(InvTrans32x32Test,DISABLED_Speed)487 TEST_P(InvTrans32x32Test, DISABLED_Speed) { RunInvTrans32x32SpeedTest(); }
TEST_P(InvTrans32x32Test,CompareReference)488 TEST_P(InvTrans32x32Test, CompareReference) { CompareInvReference32x32(); }
489
490 using std::make_tuple;
491
492 #if CONFIG_VP9_HIGHBITDEPTH
493 INSTANTIATE_TEST_SUITE_P(
494 C, Trans32x32Test,
495 ::testing::Values(
496 make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 0, VPX_BITS_10),
497 make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_10, 1, VPX_BITS_10),
498 make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 0, VPX_BITS_12),
499 make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_12, 1, VPX_BITS_12),
500 make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
501 make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1,
502 VPX_BITS_8)));
503 #else
504 INSTANTIATE_TEST_SUITE_P(
505 C, Trans32x32Test,
506 ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0,
507 VPX_BITS_8),
508 make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
509 1, VPX_BITS_8)));
510
511 INSTANTIATE_TEST_SUITE_P(
512 C, InvTrans32x32Test,
513 ::testing::Values(
514 (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_c, 0,
515 VPX_BITS_8, 32, 6225)),
516 make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_c, 0,
517 VPX_BITS_8, 16, 6255)));
518 #endif // CONFIG_VP9_HIGHBITDEPTH
519
520 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
521 INSTANTIATE_TEST_SUITE_P(
522 NEON, Trans32x32Test,
523 ::testing::Values(make_tuple(&vpx_fdct32x32_neon,
524 &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
525 make_tuple(&vpx_fdct32x32_rd_neon,
526 &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
527 #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
528
529 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
530 INSTANTIATE_TEST_SUITE_P(
531 SSE2, Trans32x32Test,
532 ::testing::Values(make_tuple(&vpx_fdct32x32_sse2,
533 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
534 make_tuple(&vpx_fdct32x32_rd_sse2,
535 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
536
537 INSTANTIATE_TEST_SUITE_P(
538 SSE2, InvTrans32x32Test,
539 ::testing::Values(
540 (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_sse2, 0,
541 VPX_BITS_8, 32, 6225)),
542 make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_sse2, 0,
543 VPX_BITS_8, 16, 6225)));
544 #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
545
546 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
547 INSTANTIATE_TEST_SUITE_P(
548 SSE2, Trans32x32Test,
549 ::testing::Values(
550 make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
551 make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
552 VPX_BITS_10),
553 make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
554 make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
555 VPX_BITS_12),
556 make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
557 VPX_BITS_8),
558 make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
559 VPX_BITS_8)));
560 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
561
562 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
563 INSTANTIATE_TEST_SUITE_P(
564 AVX2, Trans32x32Test,
565 ::testing::Values(make_tuple(&vpx_fdct32x32_avx2,
566 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
567 make_tuple(&vpx_fdct32x32_rd_avx2,
568 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
569
570 INSTANTIATE_TEST_SUITE_P(
571 AVX2, InvTrans32x32Test,
572 ::testing::Values(
573 (make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_avx2, 0,
574 VPX_BITS_8, 32, 6225)),
575 make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_avx2, 0,
576 VPX_BITS_8, 16, 6225)));
577 #endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
578
579 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
580 INSTANTIATE_TEST_SUITE_P(
581 MSA, Trans32x32Test,
582 ::testing::Values(make_tuple(&vpx_fdct32x32_msa,
583 &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
584 make_tuple(&vpx_fdct32x32_rd_msa,
585 &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
586 #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
587
588 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
589 INSTANTIATE_TEST_SUITE_P(
590 VSX, Trans32x32Test,
591 ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_vsx,
592 0, VPX_BITS_8),
593 make_tuple(&vpx_fdct32x32_rd_vsx,
594 &vpx_idct32x32_1024_add_vsx, 1, VPX_BITS_8)));
595 #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
596
597 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
598 INSTANTIATE_TEST_SUITE_P(
599 LSX, Trans32x32Test,
600 ::testing::Values(make_tuple(&vpx_fdct32x32_lsx,
601 &vpx_idct32x32_1024_add_lsx, 0, VPX_BITS_8),
602 make_tuple(&vpx_fdct32x32_rd_lsx,
603 &vpx_idct32x32_1024_add_lsx, 1, VPX_BITS_8)));
604 #endif // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
605 } // namespace
606