1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <stdlib.h>
12 #include <ostream>
13 #include <string>
14 #include <tuple>
15
16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
17
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20
21 #include "aom_ports/aom_timer.h"
22 #include "aom_ports/mem.h"
23 #include "test/acm_random.h"
24 #include "test/register_state_check.h"
25 #include "test/util.h"
26
27 namespace {
28
29 using libaom_test::ACMRandom;
30
31 template <typename Pixel>
32 class AverageTestBase : public ::testing::Test {
33 public:
AverageTestBase(int width,int height,int bit_depth=8)34 AverageTestBase(int width, int height, int bit_depth = 8)
35 : width_(width), height_(height), source_data_(nullptr),
36 source_stride_(0), bit_depth_(bit_depth) {}
37
TearDown()38 void TearDown() override {
39 aom_free(source_data_);
40 source_data_ = nullptr;
41 }
42
43 protected:
44 // Handle blocks up to 4 blocks 64x64 with stride up to 128
45 static const int kDataAlignment = 16;
46 static const int kDataBlockWidth = 128;
47 static const int kDataBlockHeight = 128;
48 static const int kDataBlockSize = kDataBlockWidth * kDataBlockHeight;
49
SetUp()50 void SetUp() override {
51 const testing::TestInfo *const test_info =
52 testing::UnitTest::GetInstance()->current_test_info();
53 // Skip the speed test for C code as the baseline uses the same function.
54 if (std::string(test_info->test_suite_name()).find("C/") == 0 &&
55 std::string(test_info->name()).find("DISABLED_Speed") !=
56 std::string::npos) {
57 GTEST_SKIP();
58 }
59
60 source_data_ = static_cast<Pixel *>(
61 aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
62 ASSERT_NE(source_data_, nullptr);
63 memset(source_data_, 0, kDataBlockSize * sizeof(source_data_[0]));
64 source_stride_ = (width_ + 31) & ~31;
65 bit_depth_ = 8;
66 rnd_.Reset(ACMRandom::DeterministicSeed());
67 }
68
69 // Sum Pixels
ReferenceAverage8x8(const Pixel * source,int pitch)70 static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) {
71 unsigned int average = 0;
72 for (int h = 0; h < 8; ++h) {
73 for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
74 }
75 return (average + 32) >> 6;
76 }
77
ReferenceAverage8x8_quad(const uint8_t * source,int pitch,int x16_idx,int y16_idx,int * avg)78 static void ReferenceAverage8x8_quad(const uint8_t *source, int pitch,
79 int x16_idx, int y16_idx, int *avg) {
80 for (int k = 0; k < 4; k++) {
81 int average = 0;
82 int x8_idx = x16_idx + ((k & 1) << 3);
83 int y8_idx = y16_idx + ((k >> 1) << 3);
84 for (int h = 0; h < 8; ++h) {
85 for (int w = 0; w < 8; ++w)
86 average += source[(h + y8_idx) * pitch + w + x8_idx];
87 }
88 avg[k] = (average + 32) >> 6;
89 }
90 }
91
ReferenceAverage4x4(const Pixel * source,int pitch)92 static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) {
93 unsigned int average = 0;
94 for (int h = 0; h < 4; ++h) {
95 for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
96 }
97 return (average + 8) >> 4;
98 }
99
FillConstant(Pixel fill_constant)100 void FillConstant(Pixel fill_constant) {
101 for (int i = 0; i < width_ * height_; ++i) {
102 source_data_[i] = fill_constant;
103 }
104 }
105
FillRandom()106 void FillRandom() {
107 for (int i = 0; i < width_ * height_; ++i) {
108 source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1);
109 }
110 }
111
112 int width_, height_;
113 Pixel *source_data_;
114 int source_stride_;
115 int bit_depth_;
116
117 ACMRandom rnd_;
118 };
119 typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch);
120
121 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
122 // function.
123 typedef std::tuple<int, int, int, int, int, AverageFunction> AvgFunc;
124
125 template <typename Pixel>
126 class AverageTest : public AverageTestBase<Pixel>,
127 public ::testing::WithParamInterface<AvgFunc> {
128 public:
AverageTest()129 AverageTest()
130 : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
131
132 protected:
133 using AverageTestBase<Pixel>::source_data_;
134 using AverageTestBase<Pixel>::source_stride_;
135 using AverageTestBase<Pixel>::ReferenceAverage8x8;
136 using AverageTestBase<Pixel>::ReferenceAverage4x4;
137 using AverageTestBase<Pixel>::FillConstant;
138 using AverageTestBase<Pixel>::FillRandom;
139
CheckAverages()140 void CheckAverages() {
141 const int block_size = GET_PARAM(4);
142 unsigned int expected = 0;
143
144 // The reference frame, but not the source frame, may be unaligned for
145 // certain types of searches.
146 const Pixel *const src = source_data_ + GET_PARAM(3);
147 if (block_size == 8) {
148 expected = ReferenceAverage8x8(src, source_stride_);
149 } else if (block_size == 4) {
150 expected = ReferenceAverage4x4(src, source_stride_);
151 }
152
153 aom_usec_timer timer;
154 unsigned int actual;
155 if (sizeof(Pixel) == 2) {
156 #if CONFIG_AV1_HIGHBITDEPTH
157 AverageFunction avg_c =
158 (block_size == 8) ? aom_highbd_avg_8x8_c : aom_highbd_avg_4x4_c;
159 // To avoid differences in optimization with the local Reference*()
160 // functions the C implementation is used as a baseline.
161 aom_usec_timer_start(&timer);
162 avg_c(CONVERT_TO_BYTEPTR(src), source_stride_);
163 aom_usec_timer_mark(&timer);
164 ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
165
166 AverageFunction avg_opt = GET_PARAM(5);
167 API_REGISTER_STATE_CHECK(
168 aom_usec_timer_start(&timer);
169 actual = avg_opt(CONVERT_TO_BYTEPTR(src), source_stride_);
170 aom_usec_timer_mark(&timer));
171 #endif // CONFIG_AV1_HIGHBITDEPTH
172 } else {
173 ASSERT_EQ(sizeof(Pixel), 1u);
174
175 AverageFunction avg_c = (block_size == 8) ? aom_avg_8x8_c : aom_avg_4x4_c;
176 aom_usec_timer_start(&timer);
177 avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_);
178 aom_usec_timer_mark(&timer);
179 ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
180
181 AverageFunction avg_opt = GET_PARAM(5);
182 API_REGISTER_STATE_CHECK(
183 aom_usec_timer_start(&timer);
184 actual =
185 avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_);
186 aom_usec_timer_mark(&timer));
187 }
188 opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
189
190 EXPECT_EQ(expected, actual);
191 }
192
TestConstantValue(Pixel value)193 void TestConstantValue(Pixel value) {
194 FillConstant(value);
195 CheckAverages();
196 }
197
TestRandom(int iterations=1000)198 void TestRandom(int iterations = 1000) {
199 for (int i = 0; i < iterations; i++) {
200 FillRandom();
201 CheckAverages();
202 }
203 }
204
PrintTimingStats() const205 void PrintTimingStats() const {
206 printf(
207 "block_size = %d \t ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
208 GET_PARAM(4), static_cast<int>(ref_elapsed_time_),
209 static_cast<int>(opt_elapsed_time_),
210 (static_cast<float>(ref_elapsed_time_) /
211 static_cast<float>(opt_elapsed_time_)));
212 }
213
214 int64_t ref_elapsed_time_ = 0;
215 int64_t opt_elapsed_time_ = 0;
216 };
217
218 typedef void (*AverageFunction_8x8_quad)(const uint8_t *s, int pitch, int x_idx,
219 int y_idx, int *avg);
220
221 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
222 // function.
223 typedef std::tuple<int, int, int, int, int, AverageFunction_8x8_quad>
224 AvgFunc_8x8_quad;
225
226 template <typename Pixel>
227 class AverageTest_8x8_quad
228 : public AverageTestBase<Pixel>,
229 public ::testing::WithParamInterface<AvgFunc_8x8_quad> {
230 public:
AverageTest_8x8_quad()231 AverageTest_8x8_quad()
232 : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
233
234 protected:
235 using AverageTestBase<Pixel>::source_data_;
236 using AverageTestBase<Pixel>::source_stride_;
237 using AverageTestBase<Pixel>::ReferenceAverage8x8_quad;
238 using AverageTestBase<Pixel>::FillConstant;
239 using AverageTestBase<Pixel>::FillRandom;
240
CheckAveragesAt(int iterations,int x16_idx,int y16_idx)241 void CheckAveragesAt(int iterations, int x16_idx, int y16_idx) {
242 ASSERT_EQ(sizeof(Pixel), 1u);
243 const int block_size = GET_PARAM(4);
244 (void)block_size;
245 int expected[4] = { 0 };
246
247 // The reference frame, but not the source frame, may be unaligned for
248 // certain types of searches.
249 const Pixel *const src = source_data_ + GET_PARAM(3);
250 ReferenceAverage8x8_quad(src, source_stride_, x16_idx, y16_idx, expected);
251
252 aom_usec_timer timer;
253 int expected_c[4] = { 0 };
254 int actual[4] = { 0 };
255 AverageFunction_8x8_quad avg_c = aom_avg_8x8_quad_c;
256 aom_usec_timer_start(&timer);
257 for (int i = 0; i < iterations; i++) {
258 avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
259 y16_idx, expected_c);
260 }
261 aom_usec_timer_mark(&timer);
262 ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
263
264 AverageFunction_8x8_quad avg_opt = GET_PARAM(5);
265 aom_usec_timer_start(&timer);
266 for (int i = 0; i < iterations; i++) {
267 avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
268 y16_idx, actual);
269 }
270 aom_usec_timer_mark(&timer);
271 opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
272
273 for (int k = 0; k < 4; k++) {
274 EXPECT_EQ(expected[k], actual[k]);
275 EXPECT_EQ(expected_c[k], actual[k]);
276 }
277
278 // Print scaling information only when Speed test is called.
279 if (iterations > 1) {
280 printf("ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
281 static_cast<int>(ref_elapsed_time_),
282 static_cast<int>(opt_elapsed_time_),
283 (static_cast<float>(ref_elapsed_time_) /
284 static_cast<float>(opt_elapsed_time_)));
285 }
286 }
287
CheckAverages()288 void CheckAverages() {
289 for (int x16_idx = 0; x16_idx < this->kDataBlockWidth / 8; x16_idx += 2)
290 for (int y16_idx = 0; y16_idx < this->kDataBlockHeight / 8; y16_idx += 2)
291 CheckAveragesAt(1, x16_idx, y16_idx);
292 }
293
TestConstantValue(Pixel value)294 void TestConstantValue(Pixel value) {
295 FillConstant(value);
296 CheckAverages();
297 }
298
TestRandom()299 void TestRandom() {
300 FillRandom();
301 CheckAverages();
302 }
303
TestSpeed()304 void TestSpeed() {
305 FillRandom();
306 CheckAveragesAt(1000000, 0, 0);
307 }
308
309 int64_t ref_elapsed_time_ = 0;
310 int64_t opt_elapsed_time_ = 0;
311 };
312
313 using AverageTest8bpp = AverageTest<uint8_t>;
314
TEST_P(AverageTest8bpp,MinValue)315 TEST_P(AverageTest8bpp, MinValue) { TestConstantValue(0); }
316
TEST_P(AverageTest8bpp,MaxValue)317 TEST_P(AverageTest8bpp, MaxValue) { TestConstantValue(255); }
318
TEST_P(AverageTest8bpp,Random)319 TEST_P(AverageTest8bpp, Random) { TestRandom(); }
320
TEST_P(AverageTest8bpp,DISABLED_Speed)321 TEST_P(AverageTest8bpp, DISABLED_Speed) {
322 TestRandom(1000000);
323 PrintTimingStats();
324 }
325
326 using AvgTest8bpp_avg_8x8_quad = AverageTest_8x8_quad<uint8_t>;
327
TEST_P(AvgTest8bpp_avg_8x8_quad,MinValue)328 TEST_P(AvgTest8bpp_avg_8x8_quad, MinValue) { TestConstantValue(0); }
329
TEST_P(AvgTest8bpp_avg_8x8_quad,MaxValue)330 TEST_P(AvgTest8bpp_avg_8x8_quad, MaxValue) { TestConstantValue(255); }
331
TEST_P(AvgTest8bpp_avg_8x8_quad,Random)332 TEST_P(AvgTest8bpp_avg_8x8_quad, Random) { TestRandom(); }
333
TEST_P(AvgTest8bpp_avg_8x8_quad,DISABLED_Speed)334 TEST_P(AvgTest8bpp_avg_8x8_quad, DISABLED_Speed) { TestSpeed(); }
335
336 #if CONFIG_AV1_HIGHBITDEPTH
337 using AverageTestHbd = AverageTest<uint16_t>;
338
TEST_P(AverageTestHbd,MinValue)339 TEST_P(AverageTestHbd, MinValue) { TestConstantValue(0); }
340
TEST_P(AverageTestHbd,MaxValue10bit)341 TEST_P(AverageTestHbd, MaxValue10bit) { TestConstantValue(1023); }
TEST_P(AverageTestHbd,MaxValue12bit)342 TEST_P(AverageTestHbd, MaxValue12bit) { TestConstantValue(4095); }
343
TEST_P(AverageTestHbd,Random)344 TEST_P(AverageTestHbd, Random) { TestRandom(); }
345
TEST_P(AverageTestHbd,DISABLED_Speed)346 TEST_P(AverageTestHbd, DISABLED_Speed) {
347 TestRandom(1000000);
348 PrintTimingStats();
349 }
350 #endif // CONFIG_AV1_HIGHBITDEPTH
351
352 typedef void (*IntProRowFunc)(int16_t *hbuf, uint8_t const *ref,
353 const int ref_stride, const int width,
354 const int height, int norm_factor);
355
356 // Params: width, height, asm function, c function.
357 typedef std::tuple<int, int, IntProRowFunc, IntProRowFunc> IntProRowParam;
358
359 class IntProRowTest : public AverageTestBase<uint8_t>,
360 public ::testing::WithParamInterface<IntProRowParam> {
361 public:
IntProRowTest()362 IntProRowTest()
363 : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), hbuf_asm_(nullptr),
364 hbuf_c_(nullptr) {
365 asm_func_ = GET_PARAM(2);
366 c_func_ = GET_PARAM(3);
367 }
368
set_norm_factor()369 void set_norm_factor() {
370 if (height_ == 128)
371 norm_factor_ = 6;
372 else if (height_ == 64)
373 norm_factor_ = 5;
374 else if (height_ == 32)
375 norm_factor_ = 4;
376 else if (height_ == 16)
377 norm_factor_ = 3;
378 }
379
380 protected:
SetUp()381 void SetUp() override {
382 source_data_ = static_cast<uint8_t *>(
383 aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
384 ASSERT_NE(source_data_, nullptr);
385
386 hbuf_asm_ = static_cast<int16_t *>(
387 aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * width_));
388 ASSERT_NE(hbuf_asm_, nullptr);
389 hbuf_c_ = static_cast<int16_t *>(
390 aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * width_));
391 ASSERT_NE(hbuf_c_, nullptr);
392 }
393
TearDown()394 void TearDown() override {
395 aom_free(source_data_);
396 source_data_ = nullptr;
397 aom_free(hbuf_c_);
398 hbuf_c_ = nullptr;
399 aom_free(hbuf_asm_);
400 hbuf_asm_ = nullptr;
401 }
402
RunComparison()403 void RunComparison() {
404 set_norm_factor();
405 API_REGISTER_STATE_CHECK(
406 c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_));
407 API_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, width_, width_,
408 height_, norm_factor_));
409 EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
410 << "Output mismatch\n";
411 }
412
RunSpeedTest()413 void RunSpeedTest() {
414 const int numIter = 5000000;
415 set_norm_factor();
416 printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
417 numIter);
418 aom_usec_timer c_timer_;
419 aom_usec_timer_start(&c_timer_);
420 for (int i = 0; i < numIter; i++) {
421 c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_);
422 }
423 aom_usec_timer_mark(&c_timer_);
424
425 aom_usec_timer asm_timer_;
426 aom_usec_timer_start(&asm_timer_);
427
428 for (int i = 0; i < numIter; i++) {
429 asm_func_(hbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
430 }
431 aom_usec_timer_mark(&asm_timer_);
432
433 const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
434 const int asm_sum_time =
435 static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
436
437 printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
438 asm_sum_time,
439 (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
440
441 EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
442 << "Output mismatch\n";
443 }
444
445 private:
446 IntProRowFunc asm_func_;
447 IntProRowFunc c_func_;
448 int16_t *hbuf_asm_;
449 int16_t *hbuf_c_;
450 int norm_factor_;
451 };
452 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProRowTest);
453
454 typedef void (*IntProColFunc)(int16_t *vbuf, uint8_t const *ref,
455 const int ref_stride, const int width,
456 const int height, int norm_factor);
457
458 // Params: width, height, asm function, c function.
459 typedef std::tuple<int, int, IntProColFunc, IntProColFunc> IntProColParam;
460
461 class IntProColTest : public AverageTestBase<uint8_t>,
462 public ::testing::WithParamInterface<IntProColParam> {
463 public:
IntProColTest()464 IntProColTest()
465 : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), vbuf_asm_(nullptr),
466 vbuf_c_(nullptr) {
467 asm_func_ = GET_PARAM(2);
468 c_func_ = GET_PARAM(3);
469 }
470
471 protected:
SetUp()472 void SetUp() override {
473 source_data_ = static_cast<uint8_t *>(
474 aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
475 ASSERT_NE(source_data_, nullptr);
476
477 vbuf_asm_ = static_cast<int16_t *>(
478 aom_memalign(kDataAlignment, sizeof(*vbuf_asm_) * width_));
479 ASSERT_NE(vbuf_asm_, nullptr);
480 vbuf_c_ = static_cast<int16_t *>(
481 aom_memalign(kDataAlignment, sizeof(*vbuf_c_) * width_));
482 ASSERT_NE(vbuf_c_, nullptr);
483 }
484
TearDown()485 void TearDown() override {
486 aom_free(source_data_);
487 source_data_ = nullptr;
488 aom_free(vbuf_c_);
489 vbuf_c_ = nullptr;
490 aom_free(vbuf_asm_);
491 vbuf_asm_ = nullptr;
492 }
493
RunComparison()494 void RunComparison() {
495 int norm_factor_ = 3 + (width_ >> 5);
496 API_REGISTER_STATE_CHECK(
497 c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_));
498 API_REGISTER_STATE_CHECK(asm_func_(vbuf_asm_, source_data_, width_, width_,
499 height_, norm_factor_));
500 EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
501 << "Output mismatch\n";
502 }
RunSpeedTest()503 void RunSpeedTest() {
504 const int numIter = 5000000;
505 printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
506 numIter);
507 int norm_factor_ = 3 + (width_ >> 5);
508 aom_usec_timer c_timer_;
509 aom_usec_timer_start(&c_timer_);
510 for (int i = 0; i < numIter; i++) {
511 c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_);
512 }
513 aom_usec_timer_mark(&c_timer_);
514
515 aom_usec_timer asm_timer_;
516 aom_usec_timer_start(&asm_timer_);
517
518 for (int i = 0; i < numIter; i++) {
519 asm_func_(vbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
520 }
521 aom_usec_timer_mark(&asm_timer_);
522
523 const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
524 const int asm_sum_time =
525 static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
526
527 printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
528 asm_sum_time,
529 (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
530
531 EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
532 << "Output mismatch\n";
533 }
534
535 private:
536 IntProColFunc asm_func_;
537 IntProColFunc c_func_;
538 int16_t *vbuf_asm_;
539 int16_t *vbuf_c_;
540 };
541 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProColTest);
542
TEST_P(IntProRowTest,MinValue)543 TEST_P(IntProRowTest, MinValue) {
544 FillConstant(0);
545 RunComparison();
546 }
547
TEST_P(IntProRowTest,MaxValue)548 TEST_P(IntProRowTest, MaxValue) {
549 FillConstant(255);
550 RunComparison();
551 }
552
TEST_P(IntProRowTest,Random)553 TEST_P(IntProRowTest, Random) {
554 FillRandom();
555 RunComparison();
556 }
557
TEST_P(IntProRowTest,DISABLED_Speed)558 TEST_P(IntProRowTest, DISABLED_Speed) {
559 FillRandom();
560 RunSpeedTest();
561 }
562
TEST_P(IntProColTest,MinValue)563 TEST_P(IntProColTest, MinValue) {
564 FillConstant(0);
565 RunComparison();
566 }
567
TEST_P(IntProColTest,MaxValue)568 TEST_P(IntProColTest, MaxValue) {
569 FillConstant(255);
570 RunComparison();
571 }
572
TEST_P(IntProColTest,Random)573 TEST_P(IntProColTest, Random) {
574 FillRandom();
575 RunComparison();
576 }
577
TEST_P(IntProColTest,DISABLED_Speed)578 TEST_P(IntProColTest, DISABLED_Speed) {
579 FillRandom();
580 RunSpeedTest();
581 }
582 class VectorVarTestBase : public ::testing::Test {
583 public:
VectorVarTestBase(int bwl)584 explicit VectorVarTestBase(int bwl) { m_bwl = bwl; }
585 VectorVarTestBase() = default;
586 ~VectorVarTestBase() override = default;
587
588 protected:
589 static const int kDataAlignment = 16;
590
SetUp()591 void SetUp() override {
592 width = 4 << m_bwl;
593
594 ref_vector = static_cast<int16_t *>(
595 aom_memalign(kDataAlignment, width * sizeof(ref_vector[0])));
596 ASSERT_NE(ref_vector, nullptr);
597 src_vector = static_cast<int16_t *>(
598 aom_memalign(kDataAlignment, width * sizeof(src_vector[0])));
599 ASSERT_NE(src_vector, nullptr);
600
601 rnd_.Reset(ACMRandom::DeterministicSeed());
602 }
TearDown()603 void TearDown() override {
604 aom_free(ref_vector);
605 ref_vector = nullptr;
606 aom_free(src_vector);
607 src_vector = nullptr;
608 }
609
FillConstant(int16_t fill_constant_ref,int16_t fill_constant_src)610 void FillConstant(int16_t fill_constant_ref, int16_t fill_constant_src) {
611 for (int i = 0; i < width; ++i) {
612 ref_vector[i] = fill_constant_ref;
613 src_vector[i] = fill_constant_src;
614 }
615 }
616
FillRandom()617 void FillRandom() {
618 for (int i = 0; i < width; ++i) {
619 ref_vector[i] =
620 rnd_.Rand16() % max_range; // acc. aom_vector_var_c brief.
621 src_vector[i] = rnd_.Rand16() % max_range;
622 }
623 }
624
625 int width;
626 int m_bwl;
627 int16_t *ref_vector;
628 int16_t *src_vector;
629 ACMRandom rnd_;
630
631 static const int max_range = 510;
632 static const int num_random_cmp = 50;
633 };
634
635 typedef int (*VectorVarFunc)(const int16_t *ref, const int16_t *src,
636 const int bwl);
637
638 typedef std::tuple<int, VectorVarFunc, VectorVarFunc> VecVarFunc;
639
640 class VectorVarTest : public VectorVarTestBase,
641 public ::testing::WithParamInterface<VecVarFunc> {
642 public:
VectorVarTest()643 VectorVarTest()
644 : VectorVarTestBase(GET_PARAM(0)), c_func(GET_PARAM(1)),
645 simd_func(GET_PARAM(2)) {}
646
647 protected:
calcVarC()648 int calcVarC() { return c_func(ref_vector, src_vector, m_bwl); }
calcVarSIMD()649 int calcVarSIMD() { return simd_func(ref_vector, src_vector, m_bwl); }
650
651 VectorVarFunc c_func;
652 VectorVarFunc simd_func;
653 };
654 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VectorVarTest);
655
TEST_P(VectorVarTest,MaxVar)656 TEST_P(VectorVarTest, MaxVar) {
657 FillConstant(0, max_range);
658 int c_var = calcVarC();
659 int simd_var = calcVarSIMD();
660 ASSERT_EQ(c_var, simd_var);
661 }
TEST_P(VectorVarTest,MaxVarRev)662 TEST_P(VectorVarTest, MaxVarRev) {
663 FillConstant(max_range, 0);
664 int c_var = calcVarC();
665 int simd_var = calcVarSIMD();
666 ASSERT_EQ(c_var, simd_var);
667 }
TEST_P(VectorVarTest,ZeroDiff)668 TEST_P(VectorVarTest, ZeroDiff) {
669 FillConstant(0, 0);
670 int c_var = calcVarC();
671 int simd_var = calcVarSIMD();
672 ASSERT_EQ(c_var, simd_var);
673 }
TEST_P(VectorVarTest,ZeroDiff2)674 TEST_P(VectorVarTest, ZeroDiff2) {
675 FillConstant(max_range, max_range);
676 int c_var = calcVarC();
677 int simd_var = calcVarSIMD();
678 ASSERT_EQ(c_var, simd_var);
679 }
TEST_P(VectorVarTest,Constant)680 TEST_P(VectorVarTest, Constant) {
681 FillConstant(30, 90);
682 int c_var = calcVarC();
683 int simd_var = calcVarSIMD();
684 ASSERT_EQ(c_var, simd_var);
685 }
TEST_P(VectorVarTest,Random)686 TEST_P(VectorVarTest, Random) {
687 for (size_t i = 0; i < num_random_cmp; i++) {
688 FillRandom();
689 int c_var = calcVarC();
690 int simd_var = calcVarSIMD();
691 ASSERT_EQ(c_var, simd_var);
692 }
693 }
TEST_P(VectorVarTest,DISABLED_Speed)694 TEST_P(VectorVarTest, DISABLED_Speed) {
695 FillRandom();
696 const int numIter = 5000000;
697 printf("Width = %d number of iteration is %d \n", width, numIter);
698
699 int sum_c_var = 0;
700 int c_var = 0;
701
702 aom_usec_timer c_timer_;
703 aom_usec_timer_start(&c_timer_);
704 for (size_t i = 0; i < numIter; i++) {
705 c_var = calcVarC();
706 sum_c_var += c_var;
707 }
708 aom_usec_timer_mark(&c_timer_);
709
710 int simd_var = 0;
711 int sum_simd_var = 0;
712 aom_usec_timer simd_timer_;
713 aom_usec_timer_start(&simd_timer_);
714 for (size_t i = 0; i < numIter; i++) {
715 simd_var = calcVarSIMD();
716 sum_simd_var += simd_var;
717 }
718 aom_usec_timer_mark(&simd_timer_);
719
720 const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
721 const int simd_sum_time =
722 static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
723
724 printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
725 simd_sum_time,
726 (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
727
728 EXPECT_EQ(c_var, simd_var) << "Output mismatch \n";
729 EXPECT_EQ(sum_c_var, sum_simd_var) << "Output mismatch \n";
730 }
731
732 using std::make_tuple;
733
734 INSTANTIATE_TEST_SUITE_P(
735 C, AverageTest8bpp,
736 ::testing::Values(make_tuple(16, 16, 8, 1, 8, &aom_avg_8x8_c),
737 make_tuple(16, 16, 8, 1, 4, &aom_avg_4x4_c)));
738
739 INSTANTIATE_TEST_SUITE_P(
740 C, AvgTest8bpp_avg_8x8_quad,
741 ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_c),
742 make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_c),
743 make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_c)));
744
745 #if HAVE_SSE2
746 INSTANTIATE_TEST_SUITE_P(
747 SSE2, AverageTest8bpp,
748 ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_sse2),
749 make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_sse2),
750 make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_sse2),
751 make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_sse2),
752 make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_sse2),
753 make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_sse2)));
754
755 INSTANTIATE_TEST_SUITE_P(
756 SSE2, AvgTest8bpp_avg_8x8_quad,
757 ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_sse2),
758 make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_sse2),
759 make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_sse2)));
760
761 INSTANTIATE_TEST_SUITE_P(
762 SSE2, IntProRowTest,
763 ::testing::Values(
764 make_tuple(16, 16, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
765 make_tuple(32, 32, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
766 make_tuple(64, 64, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
767 make_tuple(128, 128, &aom_int_pro_row_sse2, &aom_int_pro_row_c)));
768
769 INSTANTIATE_TEST_SUITE_P(
770 SSE2, IntProColTest,
771 ::testing::Values(
772 make_tuple(16, 16, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
773 make_tuple(32, 32, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
774 make_tuple(64, 64, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
775 make_tuple(128, 128, &aom_int_pro_col_sse2, &aom_int_pro_col_c)));
776 #endif
777
778 #if HAVE_AVX2
779 INSTANTIATE_TEST_SUITE_P(
780 AVX2, AvgTest8bpp_avg_8x8_quad,
781 ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_avx2),
782 make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_avx2),
783 make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_avx2)));
784
785 INSTANTIATE_TEST_SUITE_P(
786 AVX2, IntProRowTest,
787 ::testing::Values(
788 make_tuple(16, 16, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
789 make_tuple(32, 32, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
790 make_tuple(64, 64, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
791 make_tuple(128, 128, &aom_int_pro_row_avx2, &aom_int_pro_row_c)));
792
793 INSTANTIATE_TEST_SUITE_P(
794 AVX2, IntProColTest,
795 ::testing::Values(
796 make_tuple(16, 16, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
797 make_tuple(32, 32, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
798 make_tuple(64, 64, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
799 make_tuple(128, 128, &aom_int_pro_col_avx2, &aom_int_pro_col_c)));
800 #endif
801
802 #if HAVE_NEON
803 INSTANTIATE_TEST_SUITE_P(
804 NEON, AverageTest8bpp,
805 ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_neon),
806 make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_neon),
807 make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_neon),
808 make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_neon),
809 make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_neon),
810 make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_neon)));
811 INSTANTIATE_TEST_SUITE_P(
812 NEON, IntProRowTest,
813 ::testing::Values(
814 make_tuple(16, 16, &aom_int_pro_row_neon, &aom_int_pro_row_c),
815 make_tuple(32, 32, &aom_int_pro_row_neon, &aom_int_pro_row_c),
816 make_tuple(64, 64, &aom_int_pro_row_neon, &aom_int_pro_row_c),
817 make_tuple(128, 128, &aom_int_pro_row_neon, &aom_int_pro_row_c)));
818
819 INSTANTIATE_TEST_SUITE_P(
820 NEON, IntProColTest,
821 ::testing::Values(
822 make_tuple(16, 16, &aom_int_pro_col_neon, &aom_int_pro_col_c),
823 make_tuple(32, 32, &aom_int_pro_col_neon, &aom_int_pro_col_c),
824 make_tuple(64, 64, &aom_int_pro_col_neon, &aom_int_pro_col_c),
825 make_tuple(128, 128, &aom_int_pro_col_neon, &aom_int_pro_col_c)));
826
827 INSTANTIATE_TEST_SUITE_P(
828 NEON, AvgTest8bpp_avg_8x8_quad,
829 ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_neon),
830 make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_neon),
831 make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_neon)));
832 #endif
833
834 #if CONFIG_AV1_HIGHBITDEPTH
835 INSTANTIATE_TEST_SUITE_P(
836 C, AverageTestHbd,
837 ::testing::Values(make_tuple(16, 16, 10, 1, 8, &aom_highbd_avg_8x8_c),
838 make_tuple(16, 16, 10, 1, 4, &aom_highbd_avg_4x4_c),
839 make_tuple(16, 16, 12, 1, 8, &aom_highbd_avg_8x8_c),
840 make_tuple(16, 16, 12, 1, 4, &aom_highbd_avg_4x4_c)));
841
842 #if HAVE_NEON
843 INSTANTIATE_TEST_SUITE_P(
844 NEON, AverageTestHbd,
845 ::testing::Values(make_tuple(16, 16, 10, 0, 4, &aom_highbd_avg_4x4_neon),
846 make_tuple(16, 16, 10, 5, 4, &aom_highbd_avg_4x4_neon),
847 make_tuple(32, 32, 10, 15, 4, &aom_highbd_avg_4x4_neon),
848 make_tuple(16, 16, 12, 0, 4, &aom_highbd_avg_4x4_neon),
849 make_tuple(16, 16, 12, 5, 4, &aom_highbd_avg_4x4_neon),
850 make_tuple(32, 32, 12, 15, 4, &aom_highbd_avg_4x4_neon),
851 make_tuple(16, 16, 10, 0, 8, &aom_highbd_avg_8x8_neon),
852 make_tuple(16, 16, 10, 5, 8, &aom_highbd_avg_8x8_neon),
853 make_tuple(32, 32, 10, 15, 8, &aom_highbd_avg_8x8_neon),
854 make_tuple(16, 16, 12, 0, 8, &aom_highbd_avg_8x8_neon),
855 make_tuple(16, 16, 12, 5, 8, &aom_highbd_avg_8x8_neon),
856 make_tuple(32, 32, 12, 15, 8, &aom_highbd_avg_8x8_neon)));
857 #endif // HAVE_NEON
858 #endif // CONFIG_AV1_HIGHBITDEPTH
859
860 typedef int (*SatdFunc)(const tran_low_t *coeffs, int length);
861 typedef int (*SatdLpFunc)(const int16_t *coeffs, int length);
862
863 template <typename SatdFuncType>
864 struct SatdTestParam {
SatdTestParam__anon1b8be9680111::SatdTestParam865 SatdTestParam(int s, SatdFuncType f1, SatdFuncType f2)
866 : satd_size(s), func_ref(f1), func_simd(f2) {}
operator <<(std::ostream & os,const SatdTestParam<SatdFuncType> & param)867 friend std::ostream &operator<<(std::ostream &os,
868 const SatdTestParam<SatdFuncType> ¶m) {
869 return os << "satd_size: " << param.satd_size;
870 }
871 int satd_size;
872 SatdFuncType func_ref;
873 SatdFuncType func_simd;
874 };
875
876 template <typename CoeffType, typename SatdFuncType>
877 class SatdTestBase
878 : public ::testing::Test,
879 public ::testing::WithParamInterface<SatdTestParam<SatdFuncType>> {
880 protected:
SatdTestBase(const SatdTestParam<SatdFuncType> & func_param)881 explicit SatdTestBase(const SatdTestParam<SatdFuncType> &func_param) {
882 satd_size_ = func_param.satd_size;
883 satd_func_ref_ = func_param.func_ref;
884 satd_func_simd_ = func_param.func_simd;
885 }
SetUp()886 void SetUp() override {
887 rnd_.Reset(ACMRandom::DeterministicSeed());
888 src_ = reinterpret_cast<CoeffType *>(
889 aom_memalign(32, sizeof(*src_) * satd_size_));
890 ASSERT_NE(src_, nullptr);
891 }
TearDown()892 void TearDown() override { aom_free(src_); }
FillConstant(const CoeffType val)893 void FillConstant(const CoeffType val) {
894 for (int i = 0; i < satd_size_; ++i) src_[i] = val;
895 }
FillRandom()896 void FillRandom() {
897 for (int i = 0; i < satd_size_; ++i) {
898 src_[i] = static_cast<int16_t>(rnd_.Rand16());
899 }
900 }
Check(int expected)901 void Check(int expected) {
902 int total_ref;
903 API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
904 EXPECT_EQ(expected, total_ref);
905
906 int total_simd;
907 API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
908 EXPECT_EQ(expected, total_simd);
909 }
RunComparison()910 void RunComparison() {
911 int total_ref;
912 API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
913
914 int total_simd;
915 API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
916
917 EXPECT_EQ(total_ref, total_simd);
918 }
RunSpeedTest()919 void RunSpeedTest() {
920 const int numIter = 500000;
921 printf("size = %d number of iteration is %d \n", satd_size_, numIter);
922
923 int total_ref;
924 aom_usec_timer c_timer_;
925 aom_usec_timer_start(&c_timer_);
926 for (int i = 0; i < numIter; i++) {
927 total_ref = satd_func_ref_(src_, satd_size_);
928 }
929 aom_usec_timer_mark(&c_timer_);
930
931 int total_simd;
932 aom_usec_timer simd_timer_;
933 aom_usec_timer_start(&simd_timer_);
934
935 for (int i = 0; i < numIter; i++) {
936 total_simd = satd_func_simd_(src_, satd_size_);
937 }
938 aom_usec_timer_mark(&simd_timer_);
939
940 const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
941 const int simd_sum_time =
942 static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
943
944 printf(
945 "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
946 simd_sum_time,
947 (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
948
949 EXPECT_EQ(total_ref, total_simd) << "Output mismatch \n";
950 }
951 int satd_size_;
952
953 private:
954 CoeffType *src_;
955 SatdFuncType satd_func_ref_;
956 SatdFuncType satd_func_simd_;
957 ACMRandom rnd_;
958 };
959
960 class SatdTest : public SatdTestBase<tran_low_t, SatdFunc> {
961 public:
SatdTest()962 SatdTest() : SatdTestBase(GetParam()) {}
963 };
964
TEST_P(SatdTest,MinValue)965 TEST_P(SatdTest, MinValue) {
966 const int kMin = -524287;
967 const int expected = -kMin * satd_size_;
968 FillConstant(kMin);
969 Check(expected);
970 }
TEST_P(SatdTest,MaxValue)971 TEST_P(SatdTest, MaxValue) {
972 const int kMax = 524287;
973 const int expected = kMax * satd_size_;
974 FillConstant(kMax);
975 Check(expected);
976 }
TEST_P(SatdTest,Random)977 TEST_P(SatdTest, Random) {
978 int expected;
979 switch (satd_size_) {
980 case 16: expected = 205298; break;
981 case 64: expected = 1113950; break;
982 case 256: expected = 4268415; break;
983 case 1024: expected = 16954082; break;
984 default:
985 FAIL() << "Invalid satd size (" << satd_size_
986 << ") valid: 16/64/256/1024";
987 }
988 FillRandom();
989 Check(expected);
990 }
TEST_P(SatdTest,Match)991 TEST_P(SatdTest, Match) {
992 FillRandom();
993 RunComparison();
994 }
TEST_P(SatdTest,DISABLED_Speed)995 TEST_P(SatdTest, DISABLED_Speed) {
996 FillRandom();
997 RunSpeedTest();
998 }
999 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdTest);
1000
1001 INSTANTIATE_TEST_SUITE_P(
1002 C, SatdTest,
1003 ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_c),
1004 SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_c),
1005 SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_c),
1006 SatdTestParam<SatdFunc>(1024, &aom_satd_c, &aom_satd_c)));
1007
1008 #if HAVE_NEON
1009 INSTANTIATE_TEST_SUITE_P(
1010 NEON, SatdTest,
1011 ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_neon),
1012 SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_neon),
1013 SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_neon),
1014 SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1015 &aom_satd_neon)));
1016 INSTANTIATE_TEST_SUITE_P(
1017 NEON, VectorVarTest,
1018 ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_neon),
1019 make_tuple(3, &aom_vector_var_c, &aom_vector_var_neon),
1020 make_tuple(4, &aom_vector_var_c, &aom_vector_var_neon),
1021 make_tuple(5, &aom_vector_var_c, &aom_vector_var_neon)));
1022 #endif
1023
1024 #if HAVE_SVE
1025 INSTANTIATE_TEST_SUITE_P(
1026 SVE, VectorVarTest,
1027 ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sve),
1028 make_tuple(3, &aom_vector_var_c, &aom_vector_var_sve),
1029 make_tuple(4, &aom_vector_var_c, &aom_vector_var_sve),
1030 make_tuple(5, &aom_vector_var_c, &aom_vector_var_sve)));
1031 #endif // HAVE_SVE
1032
1033 #if HAVE_SSE4_1
1034 INSTANTIATE_TEST_SUITE_P(
1035 SSE4_1, VectorVarTest,
1036 ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sse4_1),
1037 make_tuple(3, &aom_vector_var_c, &aom_vector_var_sse4_1),
1038 make_tuple(4, &aom_vector_var_c, &aom_vector_var_sse4_1),
1039 make_tuple(5, &aom_vector_var_c,
1040 &aom_vector_var_sse4_1)));
1041 #endif // HAVE_SSE4_1
1042
1043 #if HAVE_AVX2
1044 INSTANTIATE_TEST_SUITE_P(
1045 AVX2, SatdTest,
1046 ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_avx2),
1047 SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_avx2),
1048 SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_avx2),
1049 SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1050 &aom_satd_avx2)));
1051
1052 INSTANTIATE_TEST_SUITE_P(
1053 AVX2, VectorVarTest,
1054 ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_avx2),
1055 make_tuple(3, &aom_vector_var_c, &aom_vector_var_avx2),
1056 make_tuple(4, &aom_vector_var_c, &aom_vector_var_avx2),
1057 make_tuple(5, &aom_vector_var_c, &aom_vector_var_avx2)));
1058 #endif // HAVE_AVX2
1059
1060 #if HAVE_SSE2
1061 INSTANTIATE_TEST_SUITE_P(
1062 SSE2, SatdTest,
1063 ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_sse2),
1064 SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_sse2),
1065 SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_sse2),
1066 SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1067 &aom_satd_sse2)));
1068 #endif
1069
1070 class SatdLpTest : public SatdTestBase<int16_t, SatdLpFunc> {
1071 public:
SatdLpTest()1072 SatdLpTest() : SatdTestBase(GetParam()) {}
1073 };
1074
TEST_P(SatdLpTest,MinValue)1075 TEST_P(SatdLpTest, MinValue) {
1076 const int kMin = -32640;
1077 const int expected = -kMin * satd_size_;
1078 FillConstant(kMin);
1079 Check(expected);
1080 }
TEST_P(SatdLpTest,MaxValue)1081 TEST_P(SatdLpTest, MaxValue) {
1082 const int kMax = 32640;
1083 const int expected = kMax * satd_size_;
1084 FillConstant(kMax);
1085 Check(expected);
1086 }
TEST_P(SatdLpTest,Random)1087 TEST_P(SatdLpTest, Random) {
1088 int expected;
1089 switch (satd_size_) {
1090 case 16: expected = 205298; break;
1091 case 64: expected = 1113950; break;
1092 case 256: expected = 4268415; break;
1093 case 1024: expected = 16954082; break;
1094 default:
1095 FAIL() << "Invalid satd size (" << satd_size_
1096 << ") valid: 16/64/256/1024";
1097 }
1098 FillRandom();
1099 Check(expected);
1100 }
TEST_P(SatdLpTest,Match)1101 TEST_P(SatdLpTest, Match) {
1102 FillRandom();
1103 RunComparison();
1104 }
TEST_P(SatdLpTest,DISABLED_Speed)1105 TEST_P(SatdLpTest, DISABLED_Speed) {
1106 FillRandom();
1107 RunSpeedTest();
1108 }
1109 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdLpTest);
1110
1111 // Add the following c test to avoid gtest uninitialized warning.
1112 INSTANTIATE_TEST_SUITE_P(
1113 C, SatdLpTest,
1114 ::testing::Values(
1115 SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_c),
1116 SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_c),
1117 SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_c),
1118 SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_c)));
1119
1120 #if HAVE_NEON
1121 INSTANTIATE_TEST_SUITE_P(
1122 NEON, SatdLpTest,
1123 ::testing::Values(
1124 SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_neon),
1125 SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_neon),
1126 SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_neon),
1127 SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_neon)));
1128 #endif
1129
1130 #if HAVE_AVX2
1131 INSTANTIATE_TEST_SUITE_P(
1132 AVX2, SatdLpTest,
1133 ::testing::Values(
1134 SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_avx2),
1135 SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_avx2),
1136 SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_avx2),
1137 SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_avx2)));
1138 #endif
1139
1140 #if HAVE_SSE2
1141 INSTANTIATE_TEST_SUITE_P(
1142 SSE2, SatdLpTest,
1143 ::testing::Values(
1144 SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_sse2),
1145 SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_sse2),
1146 SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_sse2),
1147 SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_sse2)));
1148 #endif
1149
1150 } // namespace
1151