1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <tuple>
15
16 #include "third_party/googletest/src/include/gtest/gtest.h"
17
18 #include "./vp9_rtcd.h"
19 #include "./vpx_dsp_rtcd.h"
20 #include "test/acm_random.h"
21 #include "test/clear_system_state.h"
22 #include "test/register_state_check.h"
23 #include "test/util.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_scan.h"
26 #include "vpx/vpx_codec.h"
27 #include "vpx/vpx_integer.h"
28 #include "vpx_ports/mem.h"
29
30 using libvpx_test::ACMRandom;
31
32 namespace {
33
34 const int kNumCoeffs = 64;
35 const double kPi = 3.141592653589793238462643383279502884;
36
37 const int kSignBiasMaxDiff255 = 1500;
38 const int kSignBiasMaxDiff15 = 10000;
39
40 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
41 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
42 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
43 int tx_type);
44 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
45 int tx_type);
46
47 typedef std::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
48 typedef std::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
49 typedef std::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
50
reference_8x8_dct_1d(const double in[8],double out[8])51 void reference_8x8_dct_1d(const double in[8], double out[8]) {
52 const double kInvSqrt2 = 0.707106781186547524400844362104;
53 for (int k = 0; k < 8; k++) {
54 out[k] = 0.0;
55 for (int n = 0; n < 8; n++) {
56 out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
57 }
58 if (k == 0) out[k] = out[k] * kInvSqrt2;
59 }
60 }
61
reference_8x8_dct_2d(const int16_t input[kNumCoeffs],double output[kNumCoeffs])62 void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
63 double output[kNumCoeffs]) {
64 // First transform columns
65 for (int i = 0; i < 8; ++i) {
66 double temp_in[8], temp_out[8];
67 for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
68 reference_8x8_dct_1d(temp_in, temp_out);
69 for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
70 }
71 // Then transform rows
72 for (int i = 0; i < 8; ++i) {
73 double temp_in[8], temp_out[8];
74 for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
75 reference_8x8_dct_1d(temp_in, temp_out);
76 // Scale by some magic number
77 for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
78 }
79 }
80
fdct8x8_ref(const int16_t * in,tran_low_t * out,int stride,int)81 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
82 int /*tx_type*/) {
83 vpx_fdct8x8_c(in, out, stride);
84 }
85
fht8x8_ref(const int16_t * in,tran_low_t * out,int stride,int tx_type)86 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
87 vp9_fht8x8_c(in, out, stride, tx_type);
88 }
89
90 #if CONFIG_VP9_HIGHBITDEPTH
idct8x8_10(const tran_low_t * in,uint8_t * out,int stride)91 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
92 vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
93 }
94
idct8x8_12(const tran_low_t * in,uint8_t * out,int stride)95 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
96 vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
97 }
98
iht8x8_10(const tran_low_t * in,uint8_t * out,int stride,int tx_type)99 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
100 vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
101 }
102
iht8x8_12(const tran_low_t * in,uint8_t * out,int stride,int tx_type)103 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
104 vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
105 }
106
107 #if HAVE_SSE2
108
idct8x8_12_add_10_c(const tran_low_t * in,uint8_t * out,int stride)109 void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
110 vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
111 }
112
idct8x8_12_add_12_c(const tran_low_t * in,uint8_t * out,int stride)113 void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
114 vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
115 }
116
idct8x8_12_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)117 void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
118 vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
119 }
120
idct8x8_12_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)121 void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
122 vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
123 }
124
idct8x8_64_add_10_sse2(const tran_low_t * in,uint8_t * out,int stride)125 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
126 vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
127 }
128
idct8x8_64_add_12_sse2(const tran_low_t * in,uint8_t * out,int stride)129 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
130 vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
131 }
132 #endif // HAVE_SSE2
133 #endif // CONFIG_VP9_HIGHBITDEPTH
134
135 // Visual Studio 2022 (cl.exe) targeting AArch64 with optimizations enabled
136 // produces invalid code in RunExtremalCheck() and RunInvAccuracyCheck().
137 // See:
138 // https://developercommunity.visualstudio.com/t/1770-preview-1:-Misoptimization-for-AR/10369786
139 // TODO(jzern): check the compiler version after a fix for the issue is
140 // released.
141 #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
142 #pragma optimize("", off)
143 #endif
144 class FwdTrans8x8TestBase {
145 public:
146 virtual ~FwdTrans8x8TestBase() = default;
147
148 protected:
149 virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
150 virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
151
RunSignBiasCheck()152 void RunSignBiasCheck() {
153 ACMRandom rnd(ACMRandom::DeterministicSeed());
154 DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
155 DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
156 int count_sign_block[64][2];
157 const int count_test_block = 100000;
158
159 memset(count_sign_block, 0, sizeof(count_sign_block));
160
161 for (int i = 0; i < count_test_block; ++i) {
162 // Initialize a test block with input range [-255, 255].
163 for (int j = 0; j < 64; ++j) {
164 test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
165 ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
166 }
167 ASM_REGISTER_STATE_CHECK(
168 RunFwdTxfm(test_input_block, test_output_block, pitch_));
169
170 for (int j = 0; j < 64; ++j) {
171 if (test_output_block[j] < 0) {
172 ++count_sign_block[j][0];
173 } else if (test_output_block[j] > 0) {
174 ++count_sign_block[j][1];
175 }
176 }
177 }
178
179 for (int j = 0; j < 64; ++j) {
180 const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
181 const int max_diff = kSignBiasMaxDiff255;
182 ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
183 << "Error: 8x8 FDCT/FHT has a sign bias > "
184 << 1. * max_diff / count_test_block * 100 << "%"
185 << " for input range [-255, 255] at index " << j
186 << " count0: " << count_sign_block[j][0]
187 << " count1: " << count_sign_block[j][1] << " diff: " << diff;
188 }
189
190 memset(count_sign_block, 0, sizeof(count_sign_block));
191
192 for (int i = 0; i < count_test_block; ++i) {
193 // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
194 for (int j = 0; j < 64; ++j) {
195 test_input_block[j] =
196 ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
197 }
198 ASM_REGISTER_STATE_CHECK(
199 RunFwdTxfm(test_input_block, test_output_block, pitch_));
200
201 for (int j = 0; j < 64; ++j) {
202 if (test_output_block[j] < 0) {
203 ++count_sign_block[j][0];
204 } else if (test_output_block[j] > 0) {
205 ++count_sign_block[j][1];
206 }
207 }
208 }
209
210 for (int j = 0; j < 64; ++j) {
211 const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
212 const int max_diff = kSignBiasMaxDiff15;
213 ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
214 << "Error: 8x8 FDCT/FHT has a sign bias > "
215 << 1. * max_diff / count_test_block * 100 << "%"
216 << " for input range [-15, 15] at index " << j
217 << " count0: " << count_sign_block[j][0]
218 << " count1: " << count_sign_block[j][1] << " diff: " << diff;
219 }
220 }
221
RunRoundTripErrorCheck()222 void RunRoundTripErrorCheck() {
223 ACMRandom rnd(ACMRandom::DeterministicSeed());
224 int max_error = 0;
225 int total_error = 0;
226 const int count_test_block = 100000;
227 DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
228 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
229 DECLARE_ALIGNED(16, uint8_t, dst[64]);
230 DECLARE_ALIGNED(16, uint8_t, src[64]);
231 #if CONFIG_VP9_HIGHBITDEPTH
232 DECLARE_ALIGNED(16, uint16_t, dst16[64]);
233 DECLARE_ALIGNED(16, uint16_t, src16[64]);
234 #endif
235
236 for (int i = 0; i < count_test_block; ++i) {
237 // Initialize a test block with input range [-mask_, mask_].
238 for (int j = 0; j < 64; ++j) {
239 if (bit_depth_ == VPX_BITS_8) {
240 src[j] = rnd.Rand8();
241 dst[j] = rnd.Rand8();
242 test_input_block[j] = src[j] - dst[j];
243 #if CONFIG_VP9_HIGHBITDEPTH
244 } else {
245 src16[j] = rnd.Rand16() & mask_;
246 dst16[j] = rnd.Rand16() & mask_;
247 test_input_block[j] = src16[j] - dst16[j];
248 #endif
249 }
250 }
251
252 ASM_REGISTER_STATE_CHECK(
253 RunFwdTxfm(test_input_block, test_temp_block, pitch_));
254 for (int j = 0; j < 64; ++j) {
255 if (test_temp_block[j] > 0) {
256 test_temp_block[j] += 2;
257 test_temp_block[j] /= 4;
258 test_temp_block[j] *= 4;
259 } else {
260 test_temp_block[j] -= 2;
261 test_temp_block[j] /= 4;
262 test_temp_block[j] *= 4;
263 }
264 }
265 if (bit_depth_ == VPX_BITS_8) {
266 ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
267 #if CONFIG_VP9_HIGHBITDEPTH
268 } else {
269 ASM_REGISTER_STATE_CHECK(
270 RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
271 #endif
272 }
273
274 for (int j = 0; j < 64; ++j) {
275 #if CONFIG_VP9_HIGHBITDEPTH
276 const int diff =
277 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
278 #else
279 const int diff = dst[j] - src[j];
280 #endif
281 const int error = diff * diff;
282 if (max_error < error) max_error = error;
283 total_error += error;
284 }
285 }
286
287 ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
288 << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
289 << " roundtrip error > 1";
290
291 ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
292 << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
293 << "error > 1/5 per block";
294 }
295
RunExtremalCheck()296 void RunExtremalCheck() {
297 ACMRandom rnd(ACMRandom::DeterministicSeed());
298 int max_error = 0;
299 int total_error = 0;
300 int total_coeff_error = 0;
301 const int count_test_block = 100000;
302 DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
303 DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
304 DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
305 DECLARE_ALIGNED(16, uint8_t, dst[64]);
306 DECLARE_ALIGNED(16, uint8_t, src[64]);
307 #if CONFIG_VP9_HIGHBITDEPTH
308 DECLARE_ALIGNED(16, uint16_t, dst16[64]);
309 DECLARE_ALIGNED(16, uint16_t, src16[64]);
310 #endif
311
312 for (int i = 0; i < count_test_block; ++i) {
313 // Initialize a test block with input range [-mask_, mask_].
314 for (int j = 0; j < 64; ++j) {
315 if (bit_depth_ == VPX_BITS_8) {
316 if (i == 0) {
317 src[j] = 255;
318 dst[j] = 0;
319 } else if (i == 1) {
320 src[j] = 0;
321 dst[j] = 255;
322 } else {
323 src[j] = rnd.Rand8() % 2 ? 255 : 0;
324 dst[j] = rnd.Rand8() % 2 ? 255 : 0;
325 }
326 test_input_block[j] = src[j] - dst[j];
327 #if CONFIG_VP9_HIGHBITDEPTH
328 } else {
329 if (i == 0) {
330 src16[j] = mask_;
331 dst16[j] = 0;
332 } else if (i == 1) {
333 src16[j] = 0;
334 dst16[j] = mask_;
335 } else {
336 src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
337 dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
338 }
339 test_input_block[j] = src16[j] - dst16[j];
340 #endif
341 }
342 }
343
344 ASM_REGISTER_STATE_CHECK(
345 RunFwdTxfm(test_input_block, test_temp_block, pitch_));
346 ASM_REGISTER_STATE_CHECK(
347 fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
348 if (bit_depth_ == VPX_BITS_8) {
349 ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
350 #if CONFIG_VP9_HIGHBITDEPTH
351 } else {
352 ASM_REGISTER_STATE_CHECK(
353 RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
354 #endif
355 }
356
357 for (int j = 0; j < 64; ++j) {
358 #if CONFIG_VP9_HIGHBITDEPTH
359 const int diff =
360 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
361 #else
362 const int diff = dst[j] - src[j];
363 #endif
364 const int error = diff * diff;
365 if (max_error < error) max_error = error;
366 total_error += error;
367
368 const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
369 total_coeff_error += abs(coeff_diff);
370 }
371
372 ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
373 << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
374 << " an individual roundtrip error > 1";
375
376 ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
377 << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
378 << " roundtrip error > 1/5 per block";
379
380 ASSERT_EQ(0, total_coeff_error)
381 << "Error: Extremal 8x8 FDCT/FHT has"
382 << " overflow issues in the intermediate steps > 1";
383 }
384 }
385
RunInvAccuracyCheck()386 void RunInvAccuracyCheck() {
387 ACMRandom rnd(ACMRandom::DeterministicSeed());
388 const int count_test_block = 1000;
389 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
390 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
391 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
392 DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
393 #if CONFIG_VP9_HIGHBITDEPTH
394 DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
395 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
396 #endif
397
398 for (int i = 0; i < count_test_block; ++i) {
399 double out_r[kNumCoeffs];
400
401 // Initialize a test block with input range [-255, 255].
402 for (int j = 0; j < kNumCoeffs; ++j) {
403 if (bit_depth_ == VPX_BITS_8) {
404 src[j] = rnd.Rand8() % 2 ? 255 : 0;
405 dst[j] = src[j] > 0 ? 0 : 255;
406 in[j] = src[j] - dst[j];
407 #if CONFIG_VP9_HIGHBITDEPTH
408 } else {
409 src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
410 dst16[j] = src16[j] > 0 ? 0 : mask_;
411 in[j] = src16[j] - dst16[j];
412 #endif
413 }
414 }
415
416 reference_8x8_dct_2d(in, out_r);
417 for (int j = 0; j < kNumCoeffs; ++j) {
418 coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
419 }
420
421 if (bit_depth_ == VPX_BITS_8) {
422 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
423 #if CONFIG_VP9_HIGHBITDEPTH
424 } else {
425 ASM_REGISTER_STATE_CHECK(
426 RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
427 #endif
428 }
429
430 for (int j = 0; j < kNumCoeffs; ++j) {
431 #if CONFIG_VP9_HIGHBITDEPTH
432 const int diff =
433 bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
434 #else
435 const int diff = dst[j] - src[j];
436 #endif
437 const uint32_t error = diff * diff;
438 ASSERT_GE(1u << 2 * (bit_depth_ - 8), error)
439 << "Error: 8x8 IDCT has error " << error << " at index " << j;
440 }
441 }
442 }
443
RunFwdAccuracyCheck()444 void RunFwdAccuracyCheck() {
445 ACMRandom rnd(ACMRandom::DeterministicSeed());
446 const int count_test_block = 1000;
447 DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
448 DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
449 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
450
451 for (int i = 0; i < count_test_block; ++i) {
452 double out_r[kNumCoeffs];
453
454 // Initialize a test block with input range [-mask_, mask_].
455 for (int j = 0; j < kNumCoeffs; ++j) {
456 in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
457 }
458
459 RunFwdTxfm(in, coeff, pitch_);
460 reference_8x8_dct_2d(in, out_r);
461 for (int j = 0; j < kNumCoeffs; ++j) {
462 coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
463 }
464
465 for (int j = 0; j < kNumCoeffs; ++j) {
466 const int32_t diff = coeff[j] - coeff_r[j];
467 const uint32_t error = diff * diff;
468 ASSERT_GE(9u << 2 * (bit_depth_ - 8), error)
469 << "Error: 8x8 DCT has error " << error << " at index " << j;
470 }
471 }
472 }
473
CompareInvReference(IdctFunc ref_txfm,int thresh)474 void CompareInvReference(IdctFunc ref_txfm, int thresh) {
475 ACMRandom rnd(ACMRandom::DeterministicSeed());
476 const int count_test_block = 10000;
477 const int eob = 12;
478 DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
479 DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
480 DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
481 #if CONFIG_VP9_HIGHBITDEPTH
482 DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
483 DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
484 #endif
485 const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
486
487 for (int i = 0; i < count_test_block; ++i) {
488 for (int j = 0; j < kNumCoeffs; ++j) {
489 if (j < eob) {
490 // Random values less than the threshold, either positive or negative
491 coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
492 } else {
493 coeff[scan[j]] = 0;
494 }
495 if (bit_depth_ == VPX_BITS_8) {
496 dst[j] = 0;
497 ref[j] = 0;
498 #if CONFIG_VP9_HIGHBITDEPTH
499 } else {
500 dst16[j] = 0;
501 ref16[j] = 0;
502 #endif
503 }
504 }
505 if (bit_depth_ == VPX_BITS_8) {
506 ref_txfm(coeff, ref, pitch_);
507 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
508 #if CONFIG_VP9_HIGHBITDEPTH
509 } else {
510 ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
511 ASM_REGISTER_STATE_CHECK(
512 RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
513 #endif
514 }
515
516 for (int j = 0; j < kNumCoeffs; ++j) {
517 #if CONFIG_VP9_HIGHBITDEPTH
518 const int diff =
519 bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
520 #else
521 const int diff = dst[j] - ref[j];
522 #endif
523 const uint32_t error = diff * diff;
524 ASSERT_EQ(0u, error)
525 << "Error: 8x8 IDCT has error " << error << " at index " << j;
526 }
527 }
528 }
529 int pitch_;
530 int tx_type_;
531 FhtFunc fwd_txfm_ref;
532 vpx_bit_depth_t bit_depth_;
533 int mask_;
534 };
535 #if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
536 #pragma optimize("", on)
537 #endif
538
539 class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
540 public ::testing::TestWithParam<Dct8x8Param> {
541 public:
542 ~FwdTrans8x8DCT() override = default;
543
SetUp()544 void SetUp() override {
545 fwd_txfm_ = GET_PARAM(0);
546 inv_txfm_ = GET_PARAM(1);
547 tx_type_ = GET_PARAM(2);
548 pitch_ = 8;
549 fwd_txfm_ref = fdct8x8_ref;
550 bit_depth_ = GET_PARAM(3);
551 mask_ = (1 << bit_depth_) - 1;
552 }
553
TearDown()554 void TearDown() override { libvpx_test::ClearSystemState(); }
555
556 protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)557 void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) override {
558 fwd_txfm_(in, out, stride);
559 }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)560 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
561 inv_txfm_(out, dst, stride);
562 }
563
564 FdctFunc fwd_txfm_;
565 IdctFunc inv_txfm_;
566 };
567
TEST_P(FwdTrans8x8DCT,SignBiasCheck)568 TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
569
TEST_P(FwdTrans8x8DCT,RoundTripErrorCheck)570 TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
571
TEST_P(FwdTrans8x8DCT,ExtremalCheck)572 TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
573
TEST_P(FwdTrans8x8DCT,FwdAccuracyCheck)574 TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
575
TEST_P(FwdTrans8x8DCT,InvAccuracyCheck)576 TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
577
578 class FwdTrans8x8HT : public FwdTrans8x8TestBase,
579 public ::testing::TestWithParam<Ht8x8Param> {
580 public:
581 ~FwdTrans8x8HT() override = default;
582
SetUp()583 void SetUp() override {
584 fwd_txfm_ = GET_PARAM(0);
585 inv_txfm_ = GET_PARAM(1);
586 tx_type_ = GET_PARAM(2);
587 pitch_ = 8;
588 fwd_txfm_ref = fht8x8_ref;
589 bit_depth_ = GET_PARAM(3);
590 mask_ = (1 << bit_depth_) - 1;
591 }
592
TearDown()593 void TearDown() override { libvpx_test::ClearSystemState(); }
594
595 protected:
RunFwdTxfm(int16_t * in,tran_low_t * out,int stride)596 void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) override {
597 fwd_txfm_(in, out, stride, tx_type_);
598 }
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)599 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
600 inv_txfm_(out, dst, stride, tx_type_);
601 }
602
603 FhtFunc fwd_txfm_;
604 IhtFunc inv_txfm_;
605 };
606
TEST_P(FwdTrans8x8HT,SignBiasCheck)607 TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
608
TEST_P(FwdTrans8x8HT,RoundTripErrorCheck)609 TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
610
TEST_P(FwdTrans8x8HT,ExtremalCheck)611 TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
612
613 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
614 class InvTrans8x8DCT : public FwdTrans8x8TestBase,
615 public ::testing::TestWithParam<Idct8x8Param> {
616 public:
617 ~InvTrans8x8DCT() override = default;
618
SetUp()619 void SetUp() override {
620 ref_txfm_ = GET_PARAM(0);
621 inv_txfm_ = GET_PARAM(1);
622 thresh_ = GET_PARAM(2);
623 pitch_ = 8;
624 bit_depth_ = GET_PARAM(3);
625 mask_ = (1 << bit_depth_) - 1;
626 }
627
TearDown()628 void TearDown() override { libvpx_test::ClearSystemState(); }
629
630 protected:
RunInvTxfm(tran_low_t * out,uint8_t * dst,int stride)631 void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) override {
632 inv_txfm_(out, dst, stride);
633 }
RunFwdTxfm(int16_t *,tran_low_t *,int)634 void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/,
635 int /*stride*/) override {}
636
637 IdctFunc ref_txfm_;
638 IdctFunc inv_txfm_;
639 int thresh_;
640 };
641 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans8x8DCT);
642
TEST_P(InvTrans8x8DCT,CompareReference)643 TEST_P(InvTrans8x8DCT, CompareReference) {
644 CompareInvReference(ref_txfm_, thresh_);
645 }
646 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
647
648 using std::make_tuple;
649
650 #if CONFIG_VP9_HIGHBITDEPTH
651 INSTANTIATE_TEST_SUITE_P(
652 C, FwdTrans8x8DCT,
653 ::testing::Values(
654 make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
655 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
656 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
657 #else
658 INSTANTIATE_TEST_SUITE_P(C, FwdTrans8x8DCT,
659 ::testing::Values(make_tuple(&vpx_fdct8x8_c,
660 &vpx_idct8x8_64_add_c, 0,
661 VPX_BITS_8)));
662 #endif // CONFIG_VP9_HIGHBITDEPTH
663
664 #if CONFIG_VP9_HIGHBITDEPTH
665 INSTANTIATE_TEST_SUITE_P(
666 C, FwdTrans8x8HT,
667 ::testing::Values(
668 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
669 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
670 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
671 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
672 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
673 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
674 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
675 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
676 make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
677 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
678 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
679 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
680 #else
681 INSTANTIATE_TEST_SUITE_P(
682 C, FwdTrans8x8HT,
683 ::testing::Values(
684 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
685 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
686 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
687 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
688 #endif // CONFIG_VP9_HIGHBITDEPTH
689
690 #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
691 INSTANTIATE_TEST_SUITE_P(NEON, FwdTrans8x8DCT,
692 ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
693 &vpx_idct8x8_64_add_neon,
694 0, VPX_BITS_8)));
695
696 #if !CONFIG_VP9_HIGHBITDEPTH
697 INSTANTIATE_TEST_SUITE_P(
698 NEON, FwdTrans8x8HT,
699 ::testing::Values(
700 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
701 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
702 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
703 make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
704 #endif // !CONFIG_VP9_HIGHBITDEPTH
705 #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
706
707 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
708 INSTANTIATE_TEST_SUITE_P(SSE2, FwdTrans8x8DCT,
709 ::testing::Values(make_tuple(&vpx_fdct8x8_sse2,
710 &vpx_idct8x8_64_add_sse2,
711 0, VPX_BITS_8)));
712 INSTANTIATE_TEST_SUITE_P(
713 SSE2, FwdTrans8x8HT,
714 ::testing::Values(
715 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
716 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
717 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
718 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
719 #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
720
721 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
722 INSTANTIATE_TEST_SUITE_P(
723 SSE2, FwdTrans8x8DCT,
724 ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0,
725 VPX_BITS_8),
726 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
727 12, VPX_BITS_10),
728 make_tuple(&vpx_highbd_fdct8x8_sse2,
729 &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
730 make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
731 12, VPX_BITS_12),
732 make_tuple(&vpx_highbd_fdct8x8_sse2,
733 &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
734
735 INSTANTIATE_TEST_SUITE_P(
736 SSE2, FwdTrans8x8HT,
737 ::testing::Values(
738 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
739 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
740 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
741 make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
742
743 // Optimizations take effect at a threshold of 6201, so we use a value close to
744 // that to test both branches.
745 INSTANTIATE_TEST_SUITE_P(
746 SSE2, InvTrans8x8DCT,
747 ::testing::Values(
748 make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
749 VPX_BITS_10),
750 make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
751 make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
752 VPX_BITS_12),
753 make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
754 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
755
756 #if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
757 !CONFIG_EMULATE_HARDWARE
758 INSTANTIATE_TEST_SUITE_P(SSSE3, FwdTrans8x8DCT,
759 ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
760 &vpx_idct8x8_64_add_sse2,
761 0, VPX_BITS_8)));
762 #endif
763
764 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
765 INSTANTIATE_TEST_SUITE_P(MSA, FwdTrans8x8DCT,
766 ::testing::Values(make_tuple(&vpx_fdct8x8_msa,
767 &vpx_idct8x8_64_add_msa,
768 0, VPX_BITS_8)));
769 INSTANTIATE_TEST_SUITE_P(
770 MSA, FwdTrans8x8HT,
771 ::testing::Values(
772 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
773 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
774 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
775 make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
776 #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
777
778 #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
779 INSTANTIATE_TEST_SUITE_P(VSX, FwdTrans8x8DCT,
780 ::testing::Values(make_tuple(&vpx_fdct8x8_c,
781 &vpx_idct8x8_64_add_vsx,
782 0, VPX_BITS_8)));
783 #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
784
785 #if HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
786 INSTANTIATE_TEST_SUITE_P(LSX, FwdTrans8x8DCT,
787 ::testing::Values(make_tuple(&vpx_fdct8x8_lsx,
788 &vpx_idct8x8_64_add_c, 0,
789 VPX_BITS_8)));
790 #endif // HAVE_LSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
791 } // namespace
792