1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <algorithm>
12 #include <ostream>
13
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
15
16 #include "config/aom_dsp_rtcd.h"
17
18 #include "test/acm_random.h"
19 #include "test/register_state_check.h"
20 #include "test/util.h"
21
22 namespace {
23
24 using libaom_test::ACMRandom;
25
26 using HadamardFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
27 tran_low_t *b);
28 // Low precision version of Hadamard Transform
29 using HadamardLPFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
30 int16_t *b);
31 // Low precision version of Hadamard Transform 8x8 - Dual
32 using HadamardLP8x8DualFunc = void (*)(const int16_t *a, ptrdiff_t a_stride,
33 int16_t *b);
34
35 template <typename OutputType>
Hadamard4x4(const OutputType * a,OutputType * out)36 void Hadamard4x4(const OutputType *a, OutputType *out) {
37 OutputType b[8];
38 for (int i = 0; i < 4; i += 2) {
39 b[i + 0] = (a[i * 4] + a[(i + 1) * 4]) >> 1;
40 b[i + 1] = (a[i * 4] - a[(i + 1) * 4]) >> 1;
41 }
42
43 out[0] = b[0] + b[2];
44 out[1] = b[1] + b[3];
45 out[2] = b[0] - b[2];
46 out[3] = b[1] - b[3];
47 }
48
49 template <typename OutputType>
ReferenceHadamard4x4(const int16_t * a,int a_stride,OutputType * b)50 void ReferenceHadamard4x4(const int16_t *a, int a_stride, OutputType *b) {
51 OutputType input[16];
52 OutputType buf[16];
53 for (int i = 0; i < 4; ++i) {
54 for (int j = 0; j < 4; ++j) {
55 input[i * 4 + j] = static_cast<OutputType>(a[i * a_stride + j]);
56 }
57 }
58 for (int i = 0; i < 4; ++i) Hadamard4x4(input + i, buf + i * 4);
59 for (int i = 0; i < 4; ++i) Hadamard4x4(buf + i, b + i * 4);
60
61 // Extra transpose to match C and SSE2 behavior(i.e., aom_hadamard_4x4).
62 for (int i = 0; i < 4; i++) {
63 for (int j = i + 1; j < 4; j++) {
64 OutputType temp = b[j * 4 + i];
65 b[j * 4 + i] = b[i * 4 + j];
66 b[i * 4 + j] = temp;
67 }
68 }
69 }
70
71 template <typename OutputType>
HadamardLoop(const OutputType * a,OutputType * out)72 void HadamardLoop(const OutputType *a, OutputType *out) {
73 OutputType b[8];
74 for (int i = 0; i < 8; i += 2) {
75 b[i + 0] = a[i * 8] + a[(i + 1) * 8];
76 b[i + 1] = a[i * 8] - a[(i + 1) * 8];
77 }
78 OutputType c[8];
79 for (int i = 0; i < 8; i += 4) {
80 c[i + 0] = b[i + 0] + b[i + 2];
81 c[i + 1] = b[i + 1] + b[i + 3];
82 c[i + 2] = b[i + 0] - b[i + 2];
83 c[i + 3] = b[i + 1] - b[i + 3];
84 }
85 out[0] = c[0] + c[4];
86 out[7] = c[1] + c[5];
87 out[3] = c[2] + c[6];
88 out[4] = c[3] + c[7];
89 out[2] = c[0] - c[4];
90 out[6] = c[1] - c[5];
91 out[1] = c[2] - c[6];
92 out[5] = c[3] - c[7];
93 }
94
95 template <typename OutputType>
ReferenceHadamard8x8(const int16_t * a,int a_stride,OutputType * b)96 void ReferenceHadamard8x8(const int16_t *a, int a_stride, OutputType *b) {
97 OutputType input[64];
98 OutputType buf[64];
99 for (int i = 0; i < 8; ++i) {
100 for (int j = 0; j < 8; ++j) {
101 input[i * 8 + j] = static_cast<OutputType>(a[i * a_stride + j]);
102 }
103 }
104 for (int i = 0; i < 8; ++i) HadamardLoop(input + i, buf + i * 8);
105 for (int i = 0; i < 8; ++i) HadamardLoop(buf + i, b + i * 8);
106
107 // Extra transpose to match SSE2 behavior (i.e., aom_hadamard_8x8 and
108 // aom_hadamard_lp_8x8).
109 for (int i = 0; i < 8; i++) {
110 for (int j = i + 1; j < 8; j++) {
111 OutputType temp = b[j * 8 + i];
112 b[j * 8 + i] = b[i * 8 + j];
113 b[i * 8 + j] = temp;
114 }
115 }
116 }
117
118 template <typename OutputType>
ReferenceHadamard8x8Dual(const int16_t * a,int a_stride,OutputType * b)119 void ReferenceHadamard8x8Dual(const int16_t *a, int a_stride, OutputType *b) {
120 /* The source is a 8x16 block. The destination is rearranged to 8x16.
121 * Input is 9 bit. */
122 ReferenceHadamard8x8(a, a_stride, b);
123 ReferenceHadamard8x8(a + 8, a_stride, b + 64);
124 }
125
126 template <typename OutputType>
ReferenceHadamard16x16(const int16_t * a,int a_stride,OutputType * b,bool shift)127 void ReferenceHadamard16x16(const int16_t *a, int a_stride, OutputType *b,
128 bool shift) {
129 /* The source is a 16x16 block. The destination is rearranged to 8x32.
130 * Input is 9 bit. */
131 ReferenceHadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
132 ReferenceHadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
133 ReferenceHadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
134 ReferenceHadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
135
136 /* Overlay the 8x8 blocks and combine. */
137 for (int i = 0; i < 64; ++i) {
138 /* 8x8 steps the range up to 15 bits. */
139 const OutputType a0 = b[0];
140 const OutputType a1 = b[64];
141 const OutputType a2 = b[128];
142 const OutputType a3 = b[192];
143
144 /* Prevent the result from escaping int16_t. */
145 const OutputType b0 = (a0 + a1) >> 1;
146 const OutputType b1 = (a0 - a1) >> 1;
147 const OutputType b2 = (a2 + a3) >> 1;
148 const OutputType b3 = (a2 - a3) >> 1;
149
150 /* Store a 16 bit value. */
151 b[0] = b0 + b2;
152 b[64] = b1 + b3;
153 b[128] = b0 - b2;
154 b[192] = b1 - b3;
155
156 ++b;
157 }
158
159 if (shift) {
160 b -= 64;
161 // Extra shift to match aom_hadamard_16x16_c and aom_hadamard_16x16_avx2.
162 for (int i = 0; i < 16; i++) {
163 for (int j = 0; j < 4; j++) {
164 OutputType temp = b[i * 16 + 4 + j];
165 b[i * 16 + 4 + j] = b[i * 16 + 8 + j];
166 b[i * 16 + 8 + j] = temp;
167 }
168 }
169 }
170 }
171
172 template <typename OutputType>
ReferenceHadamard32x32(const int16_t * a,int a_stride,OutputType * b,bool shift)173 void ReferenceHadamard32x32(const int16_t *a, int a_stride, OutputType *b,
174 bool shift) {
175 ReferenceHadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0, shift);
176 ReferenceHadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256, shift);
177 ReferenceHadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512, shift);
178 ReferenceHadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768, shift);
179
180 for (int i = 0; i < 256; ++i) {
181 const OutputType a0 = b[0];
182 const OutputType a1 = b[256];
183 const OutputType a2 = b[512];
184 const OutputType a3 = b[768];
185
186 const OutputType b0 = (a0 + a1) >> 2;
187 const OutputType b1 = (a0 - a1) >> 2;
188 const OutputType b2 = (a2 + a3) >> 2;
189 const OutputType b3 = (a2 - a3) >> 2;
190
191 b[0] = b0 + b2;
192 b[256] = b1 + b3;
193 b[512] = b0 - b2;
194 b[768] = b1 - b3;
195
196 ++b;
197 }
198 }
199
200 template <typename OutputType>
ReferenceHadamard(const int16_t * a,int a_stride,OutputType * b,int bw,int bh,bool shift)201 void ReferenceHadamard(const int16_t *a, int a_stride, OutputType *b, int bw,
202 int bh, bool shift) {
203 if (bw == 32 && bh == 32) {
204 ReferenceHadamard32x32(a, a_stride, b, shift);
205 } else if (bw == 16 && bh == 16) {
206 ReferenceHadamard16x16(a, a_stride, b, shift);
207 } else if (bw == 8 && bh == 8) {
208 ReferenceHadamard8x8(a, a_stride, b);
209 } else if (bw == 4 && bh == 4) {
210 ReferenceHadamard4x4(a, a_stride, b);
211 } else if (bw == 8 && bh == 16) {
212 ReferenceHadamard8x8Dual(a, a_stride, b);
213 } else {
214 GTEST_FAIL() << "Invalid Hadamard transform size " << bw << bh << std::endl;
215 }
216 }
217
218 template <typename HadamardFuncType>
219 struct FuncWithSize {
FuncWithSize__anon7fba2d6d0111::FuncWithSize220 FuncWithSize(HadamardFuncType f, int bw, int bh)
221 : func(f), block_width(bw), block_height(bh) {}
222 HadamardFuncType func;
223 int block_width;
224 int block_height;
225 };
226
227 using HadamardFuncWithSize = FuncWithSize<HadamardFunc>;
228 using HadamardLPFuncWithSize = FuncWithSize<HadamardLPFunc>;
229 using HadamardLP8x8DualFuncWithSize = FuncWithSize<HadamardLP8x8DualFunc>;
230
231 template <typename OutputType, typename HadamardFuncType>
232 class HadamardTestBase
233 : public ::testing::TestWithParam<FuncWithSize<HadamardFuncType>> {
234 public:
HadamardTestBase(const FuncWithSize<HadamardFuncType> & func_param,bool do_shift)235 HadamardTestBase(const FuncWithSize<HadamardFuncType> &func_param,
236 bool do_shift) {
237 h_func_ = func_param.func;
238 bw_ = func_param.block_width;
239 bh_ = func_param.block_height;
240 shift_ = do_shift;
241 }
242
SetUp()243 virtual void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
244
245 virtual int16_t Rand() = 0;
246
CompareReferenceRandom()247 void CompareReferenceRandom() {
248 const int kMaxBlockSize = 32 * 32;
249 const int block_size_ = bw_ * bh_;
250
251 DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
252 DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]);
253 memset(a, 0, sizeof(a));
254 memset(b, 0, sizeof(b));
255
256 OutputType b_ref[kMaxBlockSize];
257 memset(b_ref, 0, sizeof(b_ref));
258
259 for (int i = 0; i < block_size_; ++i) a[i] = Rand();
260 ReferenceHadamard(a, bw_, b_ref, bw_, bh_, shift_);
261 API_REGISTER_STATE_CHECK(h_func_(a, bw_, b));
262 EXPECT_EQ(memcmp(b, b_ref, sizeof(b)), 0);
263 }
264
VaryStride()265 void VaryStride() {
266 const int kMaxBlockSize = 32 * 32;
267 const int block_size_ = bw_ * bh_;
268
269 DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
270 DECLARE_ALIGNED(16, OutputType, b[kMaxBlockSize]);
271 memset(a, 0, sizeof(a));
272 for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand();
273
274 OutputType b_ref[kMaxBlockSize];
275 for (int i = 8; i < 64; i += 8) {
276 memset(b, 0, sizeof(b));
277 memset(b_ref, 0, sizeof(b_ref));
278
279 ReferenceHadamard(a, i, b_ref, bw_, bh_, shift_);
280 API_REGISTER_STATE_CHECK(h_func_(a, i, b));
281 EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
282 }
283 }
284
SpeedTest(int times)285 void SpeedTest(int times) {
286 const int kMaxBlockSize = 32 * 32;
287 DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
288 DECLARE_ALIGNED(16, OutputType, output[kMaxBlockSize]);
289 memset(input, 1, sizeof(input));
290 memset(output, 0, sizeof(output));
291
292 aom_usec_timer timer;
293 aom_usec_timer_start(&timer);
294 for (int i = 0; i < times; ++i) {
295 h_func_(input, bw_, output);
296 }
297 aom_usec_timer_mark(&timer);
298
299 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
300 printf("Hadamard%dx%d[%12d runs]: %d us\n", bw_, bh_, times, elapsed_time);
301 }
302
303 ACMRandom rnd_;
304
305 private:
306 HadamardFuncType h_func_;
307 int bw_;
308 int bh_;
309 bool shift_;
310 };
311
312 class HadamardLowbdTest : public HadamardTestBase<tran_low_t, HadamardFunc> {
313 public:
HadamardLowbdTest()314 HadamardLowbdTest() : HadamardTestBase(GetParam(), /*do_shift=*/true) {}
Rand()315 virtual int16_t Rand() { return rnd_.Rand9Signed(); }
316 };
317
TEST_P(HadamardLowbdTest,CompareReferenceRandom)318 TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
319
TEST_P(HadamardLowbdTest,VaryStride)320 TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
321
TEST_P(HadamardLowbdTest,DISABLED_SpeedTest)322 TEST_P(HadamardLowbdTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
323
324 INSTANTIATE_TEST_SUITE_P(
325 C, HadamardLowbdTest,
326 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_c, 4, 4),
327 HadamardFuncWithSize(&aom_hadamard_8x8_c, 8, 8),
328 HadamardFuncWithSize(&aom_hadamard_16x16_c, 16, 16),
329 HadamardFuncWithSize(&aom_hadamard_32x32_c, 32, 32)));
330
331 #if HAVE_SSE2
332 INSTANTIATE_TEST_SUITE_P(
333 SSE2, HadamardLowbdTest,
334 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_4x4_sse2, 4, 4),
335 HadamardFuncWithSize(&aom_hadamard_8x8_sse2, 8, 8),
336 HadamardFuncWithSize(&aom_hadamard_16x16_sse2, 16, 16),
337 HadamardFuncWithSize(&aom_hadamard_32x32_sse2, 32, 32)));
338 #endif // HAVE_SSE2
339
340 #if HAVE_AVX2
341 INSTANTIATE_TEST_SUITE_P(
342 AVX2, HadamardLowbdTest,
343 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_16x16_avx2, 16, 16),
344 HadamardFuncWithSize(&aom_hadamard_32x32_avx2, 32, 32)));
345 #endif // HAVE_AVX2
346
347 // TODO(aomedia:3314): Disable NEON unit test for now, since hadamard 16x16 NEON
348 // need modifications to match C/AVX2 behavior.
349 #if HAVE_NEON
350 INSTANTIATE_TEST_SUITE_P(
351 NEON, HadamardLowbdTest,
352 ::testing::Values(HadamardFuncWithSize(&aom_hadamard_8x8_neon, 8, 8),
353 HadamardFuncWithSize(&aom_hadamard_16x16_neon, 16, 16)));
354 #endif // HAVE_NEON
355
356 // Tests for low precision
357 class HadamardLowbdLPTest : public HadamardTestBase<int16_t, HadamardLPFunc> {
358 public:
HadamardLowbdLPTest()359 HadamardLowbdLPTest() : HadamardTestBase(GetParam(), /*do_shift=*/false) {}
Rand()360 virtual int16_t Rand() { return rnd_.Rand9Signed(); }
361 };
362
TEST_P(HadamardLowbdLPTest,CompareReferenceRandom)363 TEST_P(HadamardLowbdLPTest, CompareReferenceRandom) {
364 CompareReferenceRandom();
365 }
366
TEST_P(HadamardLowbdLPTest,VaryStride)367 TEST_P(HadamardLowbdLPTest, VaryStride) { VaryStride(); }
368
TEST_P(HadamardLowbdLPTest,DISABLED_SpeedTest)369 TEST_P(HadamardLowbdLPTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
370
371 INSTANTIATE_TEST_SUITE_P(
372 C, HadamardLowbdLPTest,
373 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_c, 8, 8),
374 HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_c, 16,
375 16)));
376
377 #if HAVE_SSE2
378 INSTANTIATE_TEST_SUITE_P(
379 SSE2, HadamardLowbdLPTest,
380 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_sse2, 8, 8),
381 HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_sse2, 16,
382 16)));
383 #endif // HAVE_SSE2
384
385 #if HAVE_AVX2
386 INSTANTIATE_TEST_SUITE_P(AVX2, HadamardLowbdLPTest,
387 ::testing::Values(HadamardLPFuncWithSize(
388 &aom_hadamard_lp_16x16_avx2, 16, 16)));
389 #endif // HAVE_AVX2
390
391 #if HAVE_NEON
392 INSTANTIATE_TEST_SUITE_P(
393 NEON, HadamardLowbdLPTest,
394 ::testing::Values(HadamardLPFuncWithSize(&aom_hadamard_lp_8x8_neon, 8, 8),
395 HadamardLPFuncWithSize(&aom_hadamard_lp_16x16_neon, 16,
396 16)));
397 #endif // HAVE_NEON
398
399 // Tests for 8x8 dual low precision
400 class HadamardLowbdLP8x8DualTest
401 : public HadamardTestBase<int16_t, HadamardLP8x8DualFunc> {
402 public:
HadamardLowbdLP8x8DualTest()403 HadamardLowbdLP8x8DualTest()
404 : HadamardTestBase(GetParam(), /*do_shift=*/false) {}
Rand()405 virtual int16_t Rand() { return rnd_.Rand9Signed(); }
406 };
407
TEST_P(HadamardLowbdLP8x8DualTest,CompareReferenceRandom)408 TEST_P(HadamardLowbdLP8x8DualTest, CompareReferenceRandom) {
409 CompareReferenceRandom();
410 }
411
TEST_P(HadamardLowbdLP8x8DualTest,VaryStride)412 TEST_P(HadamardLowbdLP8x8DualTest, VaryStride) { VaryStride(); }
413
TEST_P(HadamardLowbdLP8x8DualTest,DISABLED_SpeedTest)414 TEST_P(HadamardLowbdLP8x8DualTest, DISABLED_SpeedTest) { SpeedTest(1000000); }
415
416 INSTANTIATE_TEST_SUITE_P(C, HadamardLowbdLP8x8DualTest,
417 ::testing::Values(HadamardLP8x8DualFuncWithSize(
418 &aom_hadamard_lp_8x8_dual_c, 8, 16)));
419
420 #if HAVE_SSE2
421 INSTANTIATE_TEST_SUITE_P(SSE2, HadamardLowbdLP8x8DualTest,
422 ::testing::Values(HadamardLP8x8DualFuncWithSize(
423 &aom_hadamard_lp_8x8_dual_sse2, 8, 16)));
424 #endif // HAVE_SSE2
425
426 #if HAVE_AVX2
427 INSTANTIATE_TEST_SUITE_P(AVX2, HadamardLowbdLP8x8DualTest,
428 ::testing::Values(HadamardLP8x8DualFuncWithSize(
429 &aom_hadamard_lp_8x8_dual_avx2, 8, 16)));
430 #endif // HAVE_AVX2
431
432 #if HAVE_NEON
433 INSTANTIATE_TEST_SUITE_P(NEON, HadamardLowbdLP8x8DualTest,
434 ::testing::Values(HadamardLP8x8DualFuncWithSize(
435 &aom_hadamard_lp_8x8_dual_neon, 8, 16)));
436 #endif // HAVE_NEON
437
438 } // namespace
439