• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <stdlib.h>
12 #include <ostream>
13 #include <string>
14 #include <tuple>
15 
16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
17 
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 
21 #include "aom_ports/aom_timer.h"
22 #include "aom_ports/mem.h"
23 #include "test/acm_random.h"
24 #include "test/register_state_check.h"
25 #include "test/util.h"
26 
27 namespace {
28 
29 using libaom_test::ACMRandom;
30 
31 template <typename Pixel>
32 class AverageTestBase : public ::testing::Test {
33  public:
AverageTestBase(int width,int height,int bit_depth=8)34   AverageTestBase(int width, int height, int bit_depth = 8)
35       : width_(width), height_(height), source_data_(nullptr),
36         source_stride_(0), bit_depth_(bit_depth) {}
37 
TearDown()38   void TearDown() override {
39     aom_free(source_data_);
40     source_data_ = nullptr;
41   }
42 
43  protected:
44   // Handle blocks up to 4 blocks 64x64 with stride up to 128
45   static const int kDataAlignment = 16;
46   static const int kDataBlockWidth = 128;
47   static const int kDataBlockHeight = 128;
48   static const int kDataBlockSize = kDataBlockWidth * kDataBlockHeight;
49 
SetUp()50   void SetUp() override {
51     const testing::TestInfo *const test_info =
52         testing::UnitTest::GetInstance()->current_test_info();
53     // Skip the speed test for C code as the baseline uses the same function.
54     if (std::string(test_info->test_suite_name()).find("C/") == 0 &&
55         std::string(test_info->name()).find("DISABLED_Speed") !=
56             std::string::npos) {
57       GTEST_SKIP();
58     }
59 
60     source_data_ = static_cast<Pixel *>(
61         aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
62     ASSERT_NE(source_data_, nullptr);
63     memset(source_data_, 0, kDataBlockSize * sizeof(source_data_[0]));
64     source_stride_ = (width_ + 31) & ~31;
65     bit_depth_ = 8;
66     rnd_.Reset(ACMRandom::DeterministicSeed());
67   }
68 
69   // Sum Pixels
ReferenceAverage8x8(const Pixel * source,int pitch)70   static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) {
71     unsigned int average = 0;
72     for (int h = 0; h < 8; ++h) {
73       for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
74     }
75     return (average + 32) >> 6;
76   }
77 
ReferenceAverage8x8_quad(const uint8_t * source,int pitch,int x16_idx,int y16_idx,int * avg)78   static void ReferenceAverage8x8_quad(const uint8_t *source, int pitch,
79                                        int x16_idx, int y16_idx, int *avg) {
80     for (int k = 0; k < 4; k++) {
81       int average = 0;
82       int x8_idx = x16_idx + ((k & 1) << 3);
83       int y8_idx = y16_idx + ((k >> 1) << 3);
84       for (int h = 0; h < 8; ++h) {
85         for (int w = 0; w < 8; ++w)
86           average += source[(h + y8_idx) * pitch + w + x8_idx];
87       }
88       avg[k] = (average + 32) >> 6;
89     }
90   }
91 
ReferenceAverage4x4(const Pixel * source,int pitch)92   static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) {
93     unsigned int average = 0;
94     for (int h = 0; h < 4; ++h) {
95       for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
96     }
97     return (average + 8) >> 4;
98   }
99 
FillConstant(Pixel fill_constant)100   void FillConstant(Pixel fill_constant) {
101     for (int i = 0; i < width_ * height_; ++i) {
102       source_data_[i] = fill_constant;
103     }
104   }
105 
FillRandom()106   void FillRandom() {
107     for (int i = 0; i < width_ * height_; ++i) {
108       source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1);
109     }
110   }
111 
112   int width_, height_;
113   Pixel *source_data_;
114   int source_stride_;
115   int bit_depth_;
116 
117   ACMRandom rnd_;
118 };
119 typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch);
120 
121 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
122 // function.
123 typedef std::tuple<int, int, int, int, int, AverageFunction> AvgFunc;
124 
125 template <typename Pixel>
126 class AverageTest : public AverageTestBase<Pixel>,
127                     public ::testing::WithParamInterface<AvgFunc> {
128  public:
AverageTest()129   AverageTest()
130       : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
131 
132  protected:
133   using AverageTestBase<Pixel>::source_data_;
134   using AverageTestBase<Pixel>::source_stride_;
135   using AverageTestBase<Pixel>::ReferenceAverage8x8;
136   using AverageTestBase<Pixel>::ReferenceAverage4x4;
137   using AverageTestBase<Pixel>::FillConstant;
138   using AverageTestBase<Pixel>::FillRandom;
139 
CheckAverages()140   void CheckAverages() {
141     const int block_size = GET_PARAM(4);
142     unsigned int expected = 0;
143 
144     // The reference frame, but not the source frame, may be unaligned for
145     // certain types of searches.
146     const Pixel *const src = source_data_ + GET_PARAM(3);
147     if (block_size == 8) {
148       expected = ReferenceAverage8x8(src, source_stride_);
149     } else if (block_size == 4) {
150       expected = ReferenceAverage4x4(src, source_stride_);
151     }
152 
153     aom_usec_timer timer;
154     unsigned int actual;
155     if (sizeof(Pixel) == 2) {
156 #if CONFIG_AV1_HIGHBITDEPTH
157       AverageFunction avg_c =
158           (block_size == 8) ? aom_highbd_avg_8x8_c : aom_highbd_avg_4x4_c;
159       // To avoid differences in optimization with the local Reference*()
160       // functions the C implementation is used as a baseline.
161       aom_usec_timer_start(&timer);
162       avg_c(CONVERT_TO_BYTEPTR(src), source_stride_);
163       aom_usec_timer_mark(&timer);
164       ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
165 
166       AverageFunction avg_opt = GET_PARAM(5);
167       API_REGISTER_STATE_CHECK(
168           aom_usec_timer_start(&timer);
169           actual = avg_opt(CONVERT_TO_BYTEPTR(src), source_stride_);
170           aom_usec_timer_mark(&timer));
171 #endif  // CONFIG_AV1_HIGHBITDEPTH
172     } else {
173       ASSERT_EQ(sizeof(Pixel), 1u);
174 
175       AverageFunction avg_c = (block_size == 8) ? aom_avg_8x8_c : aom_avg_4x4_c;
176       aom_usec_timer_start(&timer);
177       avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_);
178       aom_usec_timer_mark(&timer);
179       ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
180 
181       AverageFunction avg_opt = GET_PARAM(5);
182       API_REGISTER_STATE_CHECK(
183           aom_usec_timer_start(&timer);
184           actual =
185               avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_);
186           aom_usec_timer_mark(&timer));
187     }
188     opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
189 
190     EXPECT_EQ(expected, actual);
191   }
192 
TestConstantValue(Pixel value)193   void TestConstantValue(Pixel value) {
194     FillConstant(value);
195     CheckAverages();
196   }
197 
TestRandom(int iterations=1000)198   void TestRandom(int iterations = 1000) {
199     for (int i = 0; i < iterations; i++) {
200       FillRandom();
201       CheckAverages();
202     }
203   }
204 
PrintTimingStats() const205   void PrintTimingStats() const {
206     printf(
207         "block_size = %d \t ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
208         GET_PARAM(4), static_cast<int>(ref_elapsed_time_),
209         static_cast<int>(opt_elapsed_time_),
210         (static_cast<float>(ref_elapsed_time_) /
211          static_cast<float>(opt_elapsed_time_)));
212   }
213 
214   int64_t ref_elapsed_time_ = 0;
215   int64_t opt_elapsed_time_ = 0;
216 };
217 
218 typedef void (*AverageFunction_8x8_quad)(const uint8_t *s, int pitch, int x_idx,
219                                          int y_idx, int *avg);
220 
221 // Arguments: width, height, bit_depth, buffer start offset, block size, avg
222 // function.
223 typedef std::tuple<int, int, int, int, int, AverageFunction_8x8_quad>
224     AvgFunc_8x8_quad;
225 
226 template <typename Pixel>
227 class AverageTest_8x8_quad
228     : public AverageTestBase<Pixel>,
229       public ::testing::WithParamInterface<AvgFunc_8x8_quad> {
230  public:
AverageTest_8x8_quad()231   AverageTest_8x8_quad()
232       : AverageTestBase<Pixel>(GET_PARAM(0), GET_PARAM(1), GET_PARAM(2)) {}
233 
234  protected:
235   using AverageTestBase<Pixel>::source_data_;
236   using AverageTestBase<Pixel>::source_stride_;
237   using AverageTestBase<Pixel>::ReferenceAverage8x8_quad;
238   using AverageTestBase<Pixel>::FillConstant;
239   using AverageTestBase<Pixel>::FillRandom;
240 
CheckAveragesAt(int iterations,int x16_idx,int y16_idx)241   void CheckAveragesAt(int iterations, int x16_idx, int y16_idx) {
242     ASSERT_EQ(sizeof(Pixel), 1u);
243     const int block_size = GET_PARAM(4);
244     (void)block_size;
245     int expected[4] = { 0 };
246 
247     // The reference frame, but not the source frame, may be unaligned for
248     // certain types of searches.
249     const Pixel *const src = source_data_ + GET_PARAM(3);
250     ReferenceAverage8x8_quad(src, source_stride_, x16_idx, y16_idx, expected);
251 
252     aom_usec_timer timer;
253     int expected_c[4] = { 0 };
254     int actual[4] = { 0 };
255     AverageFunction_8x8_quad avg_c = aom_avg_8x8_quad_c;
256     aom_usec_timer_start(&timer);
257     for (int i = 0; i < iterations; i++) {
258       avg_c(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
259             y16_idx, expected_c);
260     }
261     aom_usec_timer_mark(&timer);
262     ref_elapsed_time_ += aom_usec_timer_elapsed(&timer);
263 
264     AverageFunction_8x8_quad avg_opt = GET_PARAM(5);
265     aom_usec_timer_start(&timer);
266     for (int i = 0; i < iterations; i++) {
267       avg_opt(reinterpret_cast<const uint8_t *>(src), source_stride_, x16_idx,
268               y16_idx, actual);
269     }
270     aom_usec_timer_mark(&timer);
271     opt_elapsed_time_ += aom_usec_timer_elapsed(&timer);
272 
273     for (int k = 0; k < 4; k++) {
274       EXPECT_EQ(expected[k], actual[k]);
275       EXPECT_EQ(expected_c[k], actual[k]);
276     }
277 
278     // Print scaling information only when Speed test is called.
279     if (iterations > 1) {
280       printf("ref_time = %d \t simd_time = %d \t Gain = %4.2f\n",
281              static_cast<int>(ref_elapsed_time_),
282              static_cast<int>(opt_elapsed_time_),
283              (static_cast<float>(ref_elapsed_time_) /
284               static_cast<float>(opt_elapsed_time_)));
285     }
286   }
287 
CheckAverages()288   void CheckAverages() {
289     for (int x16_idx = 0; x16_idx < this->kDataBlockWidth / 8; x16_idx += 2)
290       for (int y16_idx = 0; y16_idx < this->kDataBlockHeight / 8; y16_idx += 2)
291         CheckAveragesAt(1, x16_idx, y16_idx);
292   }
293 
TestConstantValue(Pixel value)294   void TestConstantValue(Pixel value) {
295     FillConstant(value);
296     CheckAverages();
297   }
298 
TestRandom()299   void TestRandom() {
300     FillRandom();
301     CheckAverages();
302   }
303 
TestSpeed()304   void TestSpeed() {
305     FillRandom();
306     CheckAveragesAt(1000000, 0, 0);
307   }
308 
309   int64_t ref_elapsed_time_ = 0;
310   int64_t opt_elapsed_time_ = 0;
311 };
312 
313 using AverageTest8bpp = AverageTest<uint8_t>;
314 
TEST_P(AverageTest8bpp,MinValue)315 TEST_P(AverageTest8bpp, MinValue) { TestConstantValue(0); }
316 
TEST_P(AverageTest8bpp,MaxValue)317 TEST_P(AverageTest8bpp, MaxValue) { TestConstantValue(255); }
318 
TEST_P(AverageTest8bpp,Random)319 TEST_P(AverageTest8bpp, Random) { TestRandom(); }
320 
TEST_P(AverageTest8bpp,DISABLED_Speed)321 TEST_P(AverageTest8bpp, DISABLED_Speed) {
322   TestRandom(1000000);
323   PrintTimingStats();
324 }
325 
326 using AvgTest8bpp_avg_8x8_quad = AverageTest_8x8_quad<uint8_t>;
327 
TEST_P(AvgTest8bpp_avg_8x8_quad,MinValue)328 TEST_P(AvgTest8bpp_avg_8x8_quad, MinValue) { TestConstantValue(0); }
329 
TEST_P(AvgTest8bpp_avg_8x8_quad,MaxValue)330 TEST_P(AvgTest8bpp_avg_8x8_quad, MaxValue) { TestConstantValue(255); }
331 
TEST_P(AvgTest8bpp_avg_8x8_quad,Random)332 TEST_P(AvgTest8bpp_avg_8x8_quad, Random) { TestRandom(); }
333 
TEST_P(AvgTest8bpp_avg_8x8_quad,DISABLED_Speed)334 TEST_P(AvgTest8bpp_avg_8x8_quad, DISABLED_Speed) { TestSpeed(); }
335 
336 #if CONFIG_AV1_HIGHBITDEPTH
337 using AverageTestHbd = AverageTest<uint16_t>;
338 
TEST_P(AverageTestHbd,MinValue)339 TEST_P(AverageTestHbd, MinValue) { TestConstantValue(0); }
340 
TEST_P(AverageTestHbd,MaxValue10bit)341 TEST_P(AverageTestHbd, MaxValue10bit) { TestConstantValue(1023); }
TEST_P(AverageTestHbd,MaxValue12bit)342 TEST_P(AverageTestHbd, MaxValue12bit) { TestConstantValue(4095); }
343 
TEST_P(AverageTestHbd,Random)344 TEST_P(AverageTestHbd, Random) { TestRandom(); }
345 
TEST_P(AverageTestHbd,DISABLED_Speed)346 TEST_P(AverageTestHbd, DISABLED_Speed) {
347   TestRandom(1000000);
348   PrintTimingStats();
349 }
350 #endif  // CONFIG_AV1_HIGHBITDEPTH
351 
352 typedef void (*IntProRowFunc)(int16_t *hbuf, uint8_t const *ref,
353                               const int ref_stride, const int width,
354                               const int height, int norm_factor);
355 
356 // Params: width, height, asm function, c function.
357 typedef std::tuple<int, int, IntProRowFunc, IntProRowFunc> IntProRowParam;
358 
359 class IntProRowTest : public AverageTestBase<uint8_t>,
360                       public ::testing::WithParamInterface<IntProRowParam> {
361  public:
IntProRowTest()362   IntProRowTest()
363       : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), hbuf_asm_(nullptr),
364         hbuf_c_(nullptr) {
365     asm_func_ = GET_PARAM(2);
366     c_func_ = GET_PARAM(3);
367   }
368 
set_norm_factor()369   void set_norm_factor() {
370     if (height_ == 128)
371       norm_factor_ = 6;
372     else if (height_ == 64)
373       norm_factor_ = 5;
374     else if (height_ == 32)
375       norm_factor_ = 4;
376     else if (height_ == 16)
377       norm_factor_ = 3;
378   }
379 
380  protected:
SetUp()381   void SetUp() override {
382     source_data_ = static_cast<uint8_t *>(
383         aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
384     ASSERT_NE(source_data_, nullptr);
385 
386     hbuf_asm_ = static_cast<int16_t *>(
387         aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * width_));
388     ASSERT_NE(hbuf_asm_, nullptr);
389     hbuf_c_ = static_cast<int16_t *>(
390         aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * width_));
391     ASSERT_NE(hbuf_c_, nullptr);
392   }
393 
TearDown()394   void TearDown() override {
395     aom_free(source_data_);
396     source_data_ = nullptr;
397     aom_free(hbuf_c_);
398     hbuf_c_ = nullptr;
399     aom_free(hbuf_asm_);
400     hbuf_asm_ = nullptr;
401   }
402 
RunComparison()403   void RunComparison() {
404     set_norm_factor();
405     API_REGISTER_STATE_CHECK(
406         c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_));
407     API_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, width_, width_,
408                                        height_, norm_factor_));
409     EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
410         << "Output mismatch\n";
411   }
412 
RunSpeedTest()413   void RunSpeedTest() {
414     const int numIter = 5000000;
415     set_norm_factor();
416     printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
417            numIter);
418     aom_usec_timer c_timer_;
419     aom_usec_timer_start(&c_timer_);
420     for (int i = 0; i < numIter; i++) {
421       c_func_(hbuf_c_, source_data_, width_, width_, height_, norm_factor_);
422     }
423     aom_usec_timer_mark(&c_timer_);
424 
425     aom_usec_timer asm_timer_;
426     aom_usec_timer_start(&asm_timer_);
427 
428     for (int i = 0; i < numIter; i++) {
429       asm_func_(hbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
430     }
431     aom_usec_timer_mark(&asm_timer_);
432 
433     const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
434     const int asm_sum_time =
435         static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
436 
437     printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
438            asm_sum_time,
439            (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
440 
441     EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * width_))
442         << "Output mismatch\n";
443   }
444 
445  private:
446   IntProRowFunc asm_func_;
447   IntProRowFunc c_func_;
448   int16_t *hbuf_asm_;
449   int16_t *hbuf_c_;
450   int norm_factor_;
451 };
452 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProRowTest);
453 
454 typedef void (*IntProColFunc)(int16_t *vbuf, uint8_t const *ref,
455                               const int ref_stride, const int width,
456                               const int height, int norm_factor);
457 
458 // Params: width, height, asm function, c function.
459 typedef std::tuple<int, int, IntProColFunc, IntProColFunc> IntProColParam;
460 
461 class IntProColTest : public AverageTestBase<uint8_t>,
462                       public ::testing::WithParamInterface<IntProColParam> {
463  public:
IntProColTest()464   IntProColTest()
465       : AverageTestBase(GET_PARAM(0), GET_PARAM(1)), vbuf_asm_(nullptr),
466         vbuf_c_(nullptr) {
467     asm_func_ = GET_PARAM(2);
468     c_func_ = GET_PARAM(3);
469   }
470 
471  protected:
SetUp()472   void SetUp() override {
473     source_data_ = static_cast<uint8_t *>(
474         aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
475     ASSERT_NE(source_data_, nullptr);
476 
477     vbuf_asm_ = static_cast<int16_t *>(
478         aom_memalign(kDataAlignment, sizeof(*vbuf_asm_) * width_));
479     ASSERT_NE(vbuf_asm_, nullptr);
480     vbuf_c_ = static_cast<int16_t *>(
481         aom_memalign(kDataAlignment, sizeof(*vbuf_c_) * width_));
482     ASSERT_NE(vbuf_c_, nullptr);
483   }
484 
TearDown()485   void TearDown() override {
486     aom_free(source_data_);
487     source_data_ = nullptr;
488     aom_free(vbuf_c_);
489     vbuf_c_ = nullptr;
490     aom_free(vbuf_asm_);
491     vbuf_asm_ = nullptr;
492   }
493 
RunComparison()494   void RunComparison() {
495     int norm_factor_ = 3 + (width_ >> 5);
496     API_REGISTER_STATE_CHECK(
497         c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_));
498     API_REGISTER_STATE_CHECK(asm_func_(vbuf_asm_, source_data_, width_, width_,
499                                        height_, norm_factor_));
500     EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
501         << "Output mismatch\n";
502   }
RunSpeedTest()503   void RunSpeedTest() {
504     const int numIter = 5000000;
505     printf("Blk_Size=%dx%d: number of iteration is %d \n", width_, height_,
506            numIter);
507     int norm_factor_ = 3 + (width_ >> 5);
508     aom_usec_timer c_timer_;
509     aom_usec_timer_start(&c_timer_);
510     for (int i = 0; i < numIter; i++) {
511       c_func_(vbuf_c_, source_data_, width_, width_, height_, norm_factor_);
512     }
513     aom_usec_timer_mark(&c_timer_);
514 
515     aom_usec_timer asm_timer_;
516     aom_usec_timer_start(&asm_timer_);
517 
518     for (int i = 0; i < numIter; i++) {
519       asm_func_(vbuf_asm_, source_data_, width_, width_, height_, norm_factor_);
520     }
521     aom_usec_timer_mark(&asm_timer_);
522 
523     const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
524     const int asm_sum_time =
525         static_cast<int>(aom_usec_timer_elapsed(&asm_timer_));
526 
527     printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
528            asm_sum_time,
529            (static_cast<float>(c_sum_time) / static_cast<float>(asm_sum_time)));
530 
531     EXPECT_EQ(0, memcmp(vbuf_c_, vbuf_asm_, sizeof(*vbuf_c_) * height_))
532         << "Output mismatch\n";
533   }
534 
535  private:
536   IntProColFunc asm_func_;
537   IntProColFunc c_func_;
538   int16_t *vbuf_asm_;
539   int16_t *vbuf_c_;
540 };
541 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(IntProColTest);
542 
TEST_P(IntProRowTest,MinValue)543 TEST_P(IntProRowTest, MinValue) {
544   FillConstant(0);
545   RunComparison();
546 }
547 
TEST_P(IntProRowTest,MaxValue)548 TEST_P(IntProRowTest, MaxValue) {
549   FillConstant(255);
550   RunComparison();
551 }
552 
TEST_P(IntProRowTest,Random)553 TEST_P(IntProRowTest, Random) {
554   FillRandom();
555   RunComparison();
556 }
557 
TEST_P(IntProRowTest,DISABLED_Speed)558 TEST_P(IntProRowTest, DISABLED_Speed) {
559   FillRandom();
560   RunSpeedTest();
561 }
562 
TEST_P(IntProColTest,MinValue)563 TEST_P(IntProColTest, MinValue) {
564   FillConstant(0);
565   RunComparison();
566 }
567 
TEST_P(IntProColTest,MaxValue)568 TEST_P(IntProColTest, MaxValue) {
569   FillConstant(255);
570   RunComparison();
571 }
572 
TEST_P(IntProColTest,Random)573 TEST_P(IntProColTest, Random) {
574   FillRandom();
575   RunComparison();
576 }
577 
TEST_P(IntProColTest,DISABLED_Speed)578 TEST_P(IntProColTest, DISABLED_Speed) {
579   FillRandom();
580   RunSpeedTest();
581 }
582 class VectorVarTestBase : public ::testing::Test {
583  public:
VectorVarTestBase(int bwl)584   explicit VectorVarTestBase(int bwl) { m_bwl = bwl; }
585   VectorVarTestBase() = default;
586   ~VectorVarTestBase() override = default;
587 
588  protected:
589   static const int kDataAlignment = 16;
590 
SetUp()591   void SetUp() override {
592     width = 4 << m_bwl;
593 
594     ref_vector = static_cast<int16_t *>(
595         aom_memalign(kDataAlignment, width * sizeof(ref_vector[0])));
596     ASSERT_NE(ref_vector, nullptr);
597     src_vector = static_cast<int16_t *>(
598         aom_memalign(kDataAlignment, width * sizeof(src_vector[0])));
599     ASSERT_NE(src_vector, nullptr);
600 
601     rnd_.Reset(ACMRandom::DeterministicSeed());
602   }
TearDown()603   void TearDown() override {
604     aom_free(ref_vector);
605     ref_vector = nullptr;
606     aom_free(src_vector);
607     src_vector = nullptr;
608   }
609 
FillConstant(int16_t fill_constant_ref,int16_t fill_constant_src)610   void FillConstant(int16_t fill_constant_ref, int16_t fill_constant_src) {
611     for (int i = 0; i < width; ++i) {
612       ref_vector[i] = fill_constant_ref;
613       src_vector[i] = fill_constant_src;
614     }
615   }
616 
FillRandom()617   void FillRandom() {
618     for (int i = 0; i < width; ++i) {
619       ref_vector[i] =
620           rnd_.Rand16() % max_range;  // acc. aom_vector_var_c brief.
621       src_vector[i] = rnd_.Rand16() % max_range;
622     }
623   }
624 
625   int width;
626   int m_bwl;
627   int16_t *ref_vector;
628   int16_t *src_vector;
629   ACMRandom rnd_;
630 
631   static const int max_range = 510;
632   static const int num_random_cmp = 50;
633 };
634 
635 typedef int (*VectorVarFunc)(const int16_t *ref, const int16_t *src,
636                              const int bwl);
637 
638 typedef std::tuple<int, VectorVarFunc, VectorVarFunc> VecVarFunc;
639 
640 class VectorVarTest : public VectorVarTestBase,
641                       public ::testing::WithParamInterface<VecVarFunc> {
642  public:
VectorVarTest()643   VectorVarTest()
644       : VectorVarTestBase(GET_PARAM(0)), c_func(GET_PARAM(1)),
645         simd_func(GET_PARAM(2)) {}
646 
647  protected:
calcVarC()648   int calcVarC() { return c_func(ref_vector, src_vector, m_bwl); }
calcVarSIMD()649   int calcVarSIMD() { return simd_func(ref_vector, src_vector, m_bwl); }
650 
651   VectorVarFunc c_func;
652   VectorVarFunc simd_func;
653 };
654 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VectorVarTest);
655 
TEST_P(VectorVarTest,MaxVar)656 TEST_P(VectorVarTest, MaxVar) {
657   FillConstant(0, max_range);
658   int c_var = calcVarC();
659   int simd_var = calcVarSIMD();
660   ASSERT_EQ(c_var, simd_var);
661 }
TEST_P(VectorVarTest,MaxVarRev)662 TEST_P(VectorVarTest, MaxVarRev) {
663   FillConstant(max_range, 0);
664   int c_var = calcVarC();
665   int simd_var = calcVarSIMD();
666   ASSERT_EQ(c_var, simd_var);
667 }
TEST_P(VectorVarTest,ZeroDiff)668 TEST_P(VectorVarTest, ZeroDiff) {
669   FillConstant(0, 0);
670   int c_var = calcVarC();
671   int simd_var = calcVarSIMD();
672   ASSERT_EQ(c_var, simd_var);
673 }
TEST_P(VectorVarTest,ZeroDiff2)674 TEST_P(VectorVarTest, ZeroDiff2) {
675   FillConstant(max_range, max_range);
676   int c_var = calcVarC();
677   int simd_var = calcVarSIMD();
678   ASSERT_EQ(c_var, simd_var);
679 }
TEST_P(VectorVarTest,Constant)680 TEST_P(VectorVarTest, Constant) {
681   FillConstant(30, 90);
682   int c_var = calcVarC();
683   int simd_var = calcVarSIMD();
684   ASSERT_EQ(c_var, simd_var);
685 }
TEST_P(VectorVarTest,Random)686 TEST_P(VectorVarTest, Random) {
687   for (size_t i = 0; i < num_random_cmp; i++) {
688     FillRandom();
689     int c_var = calcVarC();
690     int simd_var = calcVarSIMD();
691     ASSERT_EQ(c_var, simd_var);
692   }
693 }
TEST_P(VectorVarTest,DISABLED_Speed)694 TEST_P(VectorVarTest, DISABLED_Speed) {
695   FillRandom();
696   const int numIter = 5000000;
697   printf("Width = %d number of iteration is %d \n", width, numIter);
698 
699   int sum_c_var = 0;
700   int c_var = 0;
701 
702   aom_usec_timer c_timer_;
703   aom_usec_timer_start(&c_timer_);
704   for (size_t i = 0; i < numIter; i++) {
705     c_var = calcVarC();
706     sum_c_var += c_var;
707   }
708   aom_usec_timer_mark(&c_timer_);
709 
710   int simd_var = 0;
711   int sum_simd_var = 0;
712   aom_usec_timer simd_timer_;
713   aom_usec_timer_start(&simd_timer_);
714   for (size_t i = 0; i < numIter; i++) {
715     simd_var = calcVarSIMD();
716     sum_simd_var += simd_var;
717   }
718   aom_usec_timer_mark(&simd_timer_);
719 
720   const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
721   const int simd_sum_time =
722       static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
723 
724   printf("c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
725          simd_sum_time,
726          (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
727 
728   EXPECT_EQ(c_var, simd_var) << "Output mismatch \n";
729   EXPECT_EQ(sum_c_var, sum_simd_var) << "Output mismatch \n";
730 }
731 
732 using std::make_tuple;
733 
734 INSTANTIATE_TEST_SUITE_P(
735     C, AverageTest8bpp,
736     ::testing::Values(make_tuple(16, 16, 8, 1, 8, &aom_avg_8x8_c),
737                       make_tuple(16, 16, 8, 1, 4, &aom_avg_4x4_c)));
738 
739 INSTANTIATE_TEST_SUITE_P(
740     C, AvgTest8bpp_avg_8x8_quad,
741     ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_c),
742                       make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_c),
743                       make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_c)));
744 
745 #if HAVE_SSE2
746 INSTANTIATE_TEST_SUITE_P(
747     SSE2, AverageTest8bpp,
748     ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_sse2),
749                       make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_sse2),
750                       make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_sse2),
751                       make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_sse2),
752                       make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_sse2),
753                       make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_sse2)));
754 
755 INSTANTIATE_TEST_SUITE_P(
756     SSE2, AvgTest8bpp_avg_8x8_quad,
757     ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_sse2),
758                       make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_sse2),
759                       make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_sse2)));
760 
761 INSTANTIATE_TEST_SUITE_P(
762     SSE2, IntProRowTest,
763     ::testing::Values(
764         make_tuple(16, 16, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
765         make_tuple(32, 32, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
766         make_tuple(64, 64, &aom_int_pro_row_sse2, &aom_int_pro_row_c),
767         make_tuple(128, 128, &aom_int_pro_row_sse2, &aom_int_pro_row_c)));
768 
769 INSTANTIATE_TEST_SUITE_P(
770     SSE2, IntProColTest,
771     ::testing::Values(
772         make_tuple(16, 16, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
773         make_tuple(32, 32, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
774         make_tuple(64, 64, &aom_int_pro_col_sse2, &aom_int_pro_col_c),
775         make_tuple(128, 128, &aom_int_pro_col_sse2, &aom_int_pro_col_c)));
776 #endif
777 
778 #if HAVE_AVX2
779 INSTANTIATE_TEST_SUITE_P(
780     AVX2, AvgTest8bpp_avg_8x8_quad,
781     ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_avx2),
782                       make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_avx2),
783                       make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_avx2)));
784 
785 INSTANTIATE_TEST_SUITE_P(
786     AVX2, IntProRowTest,
787     ::testing::Values(
788         make_tuple(16, 16, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
789         make_tuple(32, 32, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
790         make_tuple(64, 64, &aom_int_pro_row_avx2, &aom_int_pro_row_c),
791         make_tuple(128, 128, &aom_int_pro_row_avx2, &aom_int_pro_row_c)));
792 
793 INSTANTIATE_TEST_SUITE_P(
794     AVX2, IntProColTest,
795     ::testing::Values(
796         make_tuple(16, 16, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
797         make_tuple(32, 32, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
798         make_tuple(64, 64, &aom_int_pro_col_avx2, &aom_int_pro_col_c),
799         make_tuple(128, 128, &aom_int_pro_col_avx2, &aom_int_pro_col_c)));
800 #endif
801 
802 #if HAVE_NEON
803 INSTANTIATE_TEST_SUITE_P(
804     NEON, AverageTest8bpp,
805     ::testing::Values(make_tuple(16, 16, 8, 0, 8, &aom_avg_8x8_neon),
806                       make_tuple(16, 16, 8, 5, 8, &aom_avg_8x8_neon),
807                       make_tuple(32, 32, 8, 15, 8, &aom_avg_8x8_neon),
808                       make_tuple(16, 16, 8, 0, 4, &aom_avg_4x4_neon),
809                       make_tuple(16, 16, 8, 5, 4, &aom_avg_4x4_neon),
810                       make_tuple(32, 32, 8, 15, 4, &aom_avg_4x4_neon)));
811 INSTANTIATE_TEST_SUITE_P(
812     NEON, IntProRowTest,
813     ::testing::Values(
814         make_tuple(16, 16, &aom_int_pro_row_neon, &aom_int_pro_row_c),
815         make_tuple(32, 32, &aom_int_pro_row_neon, &aom_int_pro_row_c),
816         make_tuple(64, 64, &aom_int_pro_row_neon, &aom_int_pro_row_c),
817         make_tuple(128, 128, &aom_int_pro_row_neon, &aom_int_pro_row_c)));
818 
819 INSTANTIATE_TEST_SUITE_P(
820     NEON, IntProColTest,
821     ::testing::Values(
822         make_tuple(16, 16, &aom_int_pro_col_neon, &aom_int_pro_col_c),
823         make_tuple(32, 32, &aom_int_pro_col_neon, &aom_int_pro_col_c),
824         make_tuple(64, 64, &aom_int_pro_col_neon, &aom_int_pro_col_c),
825         make_tuple(128, 128, &aom_int_pro_col_neon, &aom_int_pro_col_c)));
826 
827 INSTANTIATE_TEST_SUITE_P(
828     NEON, AvgTest8bpp_avg_8x8_quad,
829     ::testing::Values(make_tuple(16, 16, 8, 0, 16, &aom_avg_8x8_quad_neon),
830                       make_tuple(32, 32, 8, 16, 16, &aom_avg_8x8_quad_neon),
831                       make_tuple(32, 32, 8, 8, 16, &aom_avg_8x8_quad_neon)));
832 #endif
833 
834 #if CONFIG_AV1_HIGHBITDEPTH
835 INSTANTIATE_TEST_SUITE_P(
836     C, AverageTestHbd,
837     ::testing::Values(make_tuple(16, 16, 10, 1, 8, &aom_highbd_avg_8x8_c),
838                       make_tuple(16, 16, 10, 1, 4, &aom_highbd_avg_4x4_c),
839                       make_tuple(16, 16, 12, 1, 8, &aom_highbd_avg_8x8_c),
840                       make_tuple(16, 16, 12, 1, 4, &aom_highbd_avg_4x4_c)));
841 
842 #if HAVE_NEON
843 INSTANTIATE_TEST_SUITE_P(
844     NEON, AverageTestHbd,
845     ::testing::Values(make_tuple(16, 16, 10, 0, 4, &aom_highbd_avg_4x4_neon),
846                       make_tuple(16, 16, 10, 5, 4, &aom_highbd_avg_4x4_neon),
847                       make_tuple(32, 32, 10, 15, 4, &aom_highbd_avg_4x4_neon),
848                       make_tuple(16, 16, 12, 0, 4, &aom_highbd_avg_4x4_neon),
849                       make_tuple(16, 16, 12, 5, 4, &aom_highbd_avg_4x4_neon),
850                       make_tuple(32, 32, 12, 15, 4, &aom_highbd_avg_4x4_neon),
851                       make_tuple(16, 16, 10, 0, 8, &aom_highbd_avg_8x8_neon),
852                       make_tuple(16, 16, 10, 5, 8, &aom_highbd_avg_8x8_neon),
853                       make_tuple(32, 32, 10, 15, 8, &aom_highbd_avg_8x8_neon),
854                       make_tuple(16, 16, 12, 0, 8, &aom_highbd_avg_8x8_neon),
855                       make_tuple(16, 16, 12, 5, 8, &aom_highbd_avg_8x8_neon),
856                       make_tuple(32, 32, 12, 15, 8, &aom_highbd_avg_8x8_neon)));
857 #endif  // HAVE_NEON
858 #endif  // CONFIG_AV1_HIGHBITDEPTH
859 
860 typedef int (*SatdFunc)(const tran_low_t *coeffs, int length);
861 typedef int (*SatdLpFunc)(const int16_t *coeffs, int length);
862 
863 template <typename SatdFuncType>
864 struct SatdTestParam {
SatdTestParam__anon1b8be9680111::SatdTestParam865   SatdTestParam(int s, SatdFuncType f1, SatdFuncType f2)
866       : satd_size(s), func_ref(f1), func_simd(f2) {}
operator <<(std::ostream & os,const SatdTestParam<SatdFuncType> & param)867   friend std::ostream &operator<<(std::ostream &os,
868                                   const SatdTestParam<SatdFuncType> &param) {
869     return os << "satd_size: " << param.satd_size;
870   }
871   int satd_size;
872   SatdFuncType func_ref;
873   SatdFuncType func_simd;
874 };
875 
876 template <typename CoeffType, typename SatdFuncType>
877 class SatdTestBase
878     : public ::testing::Test,
879       public ::testing::WithParamInterface<SatdTestParam<SatdFuncType>> {
880  protected:
SatdTestBase(const SatdTestParam<SatdFuncType> & func_param)881   explicit SatdTestBase(const SatdTestParam<SatdFuncType> &func_param) {
882     satd_size_ = func_param.satd_size;
883     satd_func_ref_ = func_param.func_ref;
884     satd_func_simd_ = func_param.func_simd;
885   }
SetUp()886   void SetUp() override {
887     rnd_.Reset(ACMRandom::DeterministicSeed());
888     src_ = reinterpret_cast<CoeffType *>(
889         aom_memalign(32, sizeof(*src_) * satd_size_));
890     ASSERT_NE(src_, nullptr);
891   }
TearDown()892   void TearDown() override { aom_free(src_); }
FillConstant(const CoeffType val)893   void FillConstant(const CoeffType val) {
894     for (int i = 0; i < satd_size_; ++i) src_[i] = val;
895   }
FillRandom()896   void FillRandom() {
897     for (int i = 0; i < satd_size_; ++i) {
898       src_[i] = static_cast<int16_t>(rnd_.Rand16());
899     }
900   }
Check(int expected)901   void Check(int expected) {
902     int total_ref;
903     API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
904     EXPECT_EQ(expected, total_ref);
905 
906     int total_simd;
907     API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
908     EXPECT_EQ(expected, total_simd);
909   }
RunComparison()910   void RunComparison() {
911     int total_ref;
912     API_REGISTER_STATE_CHECK(total_ref = satd_func_ref_(src_, satd_size_));
913 
914     int total_simd;
915     API_REGISTER_STATE_CHECK(total_simd = satd_func_simd_(src_, satd_size_));
916 
917     EXPECT_EQ(total_ref, total_simd);
918   }
RunSpeedTest()919   void RunSpeedTest() {
920     const int numIter = 500000;
921     printf("size = %d number of iteration is %d \n", satd_size_, numIter);
922 
923     int total_ref;
924     aom_usec_timer c_timer_;
925     aom_usec_timer_start(&c_timer_);
926     for (int i = 0; i < numIter; i++) {
927       total_ref = satd_func_ref_(src_, satd_size_);
928     }
929     aom_usec_timer_mark(&c_timer_);
930 
931     int total_simd;
932     aom_usec_timer simd_timer_;
933     aom_usec_timer_start(&simd_timer_);
934 
935     for (int i = 0; i < numIter; i++) {
936       total_simd = satd_func_simd_(src_, satd_size_);
937     }
938     aom_usec_timer_mark(&simd_timer_);
939 
940     const int c_sum_time = static_cast<int>(aom_usec_timer_elapsed(&c_timer_));
941     const int simd_sum_time =
942         static_cast<int>(aom_usec_timer_elapsed(&simd_timer_));
943 
944     printf(
945         "c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
946         simd_sum_time,
947         (static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
948 
949     EXPECT_EQ(total_ref, total_simd) << "Output mismatch \n";
950   }
951   int satd_size_;
952 
953  private:
954   CoeffType *src_;
955   SatdFuncType satd_func_ref_;
956   SatdFuncType satd_func_simd_;
957   ACMRandom rnd_;
958 };
959 
960 class SatdTest : public SatdTestBase<tran_low_t, SatdFunc> {
961  public:
SatdTest()962   SatdTest() : SatdTestBase(GetParam()) {}
963 };
964 
TEST_P(SatdTest,MinValue)965 TEST_P(SatdTest, MinValue) {
966   const int kMin = -524287;
967   const int expected = -kMin * satd_size_;
968   FillConstant(kMin);
969   Check(expected);
970 }
TEST_P(SatdTest,MaxValue)971 TEST_P(SatdTest, MaxValue) {
972   const int kMax = 524287;
973   const int expected = kMax * satd_size_;
974   FillConstant(kMax);
975   Check(expected);
976 }
TEST_P(SatdTest,Random)977 TEST_P(SatdTest, Random) {
978   int expected;
979   switch (satd_size_) {
980     case 16: expected = 205298; break;
981     case 64: expected = 1113950; break;
982     case 256: expected = 4268415; break;
983     case 1024: expected = 16954082; break;
984     default:
985       FAIL() << "Invalid satd size (" << satd_size_
986              << ") valid: 16/64/256/1024";
987   }
988   FillRandom();
989   Check(expected);
990 }
TEST_P(SatdTest,Match)991 TEST_P(SatdTest, Match) {
992   FillRandom();
993   RunComparison();
994 }
TEST_P(SatdTest,DISABLED_Speed)995 TEST_P(SatdTest, DISABLED_Speed) {
996   FillRandom();
997   RunSpeedTest();
998 }
999 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdTest);
1000 
1001 INSTANTIATE_TEST_SUITE_P(
1002     C, SatdTest,
1003     ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_c),
1004                       SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_c),
1005                       SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_c),
1006                       SatdTestParam<SatdFunc>(1024, &aom_satd_c, &aom_satd_c)));
1007 
1008 #if HAVE_NEON
1009 INSTANTIATE_TEST_SUITE_P(
1010     NEON, SatdTest,
1011     ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_neon),
1012                       SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_neon),
1013                       SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_neon),
1014                       SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1015                                               &aom_satd_neon)));
1016 INSTANTIATE_TEST_SUITE_P(
1017     NEON, VectorVarTest,
1018     ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_neon),
1019                       make_tuple(3, &aom_vector_var_c, &aom_vector_var_neon),
1020                       make_tuple(4, &aom_vector_var_c, &aom_vector_var_neon),
1021                       make_tuple(5, &aom_vector_var_c, &aom_vector_var_neon)));
1022 #endif
1023 
1024 #if HAVE_SVE
1025 INSTANTIATE_TEST_SUITE_P(
1026     SVE, VectorVarTest,
1027     ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sve),
1028                       make_tuple(3, &aom_vector_var_c, &aom_vector_var_sve),
1029                       make_tuple(4, &aom_vector_var_c, &aom_vector_var_sve),
1030                       make_tuple(5, &aom_vector_var_c, &aom_vector_var_sve)));
1031 #endif  // HAVE_SVE
1032 
1033 #if HAVE_SSE4_1
1034 INSTANTIATE_TEST_SUITE_P(
1035     SSE4_1, VectorVarTest,
1036     ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_sse4_1),
1037                       make_tuple(3, &aom_vector_var_c, &aom_vector_var_sse4_1),
1038                       make_tuple(4, &aom_vector_var_c, &aom_vector_var_sse4_1),
1039                       make_tuple(5, &aom_vector_var_c,
1040                                  &aom_vector_var_sse4_1)));
1041 #endif  // HAVE_SSE4_1
1042 
1043 #if HAVE_AVX2
1044 INSTANTIATE_TEST_SUITE_P(
1045     AVX2, SatdTest,
1046     ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_avx2),
1047                       SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_avx2),
1048                       SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_avx2),
1049                       SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1050                                               &aom_satd_avx2)));
1051 
1052 INSTANTIATE_TEST_SUITE_P(
1053     AVX2, VectorVarTest,
1054     ::testing::Values(make_tuple(2, &aom_vector_var_c, &aom_vector_var_avx2),
1055                       make_tuple(3, &aom_vector_var_c, &aom_vector_var_avx2),
1056                       make_tuple(4, &aom_vector_var_c, &aom_vector_var_avx2),
1057                       make_tuple(5, &aom_vector_var_c, &aom_vector_var_avx2)));
1058 #endif  // HAVE_AVX2
1059 
1060 #if HAVE_SSE2
1061 INSTANTIATE_TEST_SUITE_P(
1062     SSE2, SatdTest,
1063     ::testing::Values(SatdTestParam<SatdFunc>(16, &aom_satd_c, &aom_satd_sse2),
1064                       SatdTestParam<SatdFunc>(64, &aom_satd_c, &aom_satd_sse2),
1065                       SatdTestParam<SatdFunc>(256, &aom_satd_c, &aom_satd_sse2),
1066                       SatdTestParam<SatdFunc>(1024, &aom_satd_c,
1067                                               &aom_satd_sse2)));
1068 #endif
1069 
1070 class SatdLpTest : public SatdTestBase<int16_t, SatdLpFunc> {
1071  public:
SatdLpTest()1072   SatdLpTest() : SatdTestBase(GetParam()) {}
1073 };
1074 
TEST_P(SatdLpTest,MinValue)1075 TEST_P(SatdLpTest, MinValue) {
1076   const int kMin = -32640;
1077   const int expected = -kMin * satd_size_;
1078   FillConstant(kMin);
1079   Check(expected);
1080 }
TEST_P(SatdLpTest,MaxValue)1081 TEST_P(SatdLpTest, MaxValue) {
1082   const int kMax = 32640;
1083   const int expected = kMax * satd_size_;
1084   FillConstant(kMax);
1085   Check(expected);
1086 }
TEST_P(SatdLpTest,Random)1087 TEST_P(SatdLpTest, Random) {
1088   int expected;
1089   switch (satd_size_) {
1090     case 16: expected = 205298; break;
1091     case 64: expected = 1113950; break;
1092     case 256: expected = 4268415; break;
1093     case 1024: expected = 16954082; break;
1094     default:
1095       FAIL() << "Invalid satd size (" << satd_size_
1096              << ") valid: 16/64/256/1024";
1097   }
1098   FillRandom();
1099   Check(expected);
1100 }
TEST_P(SatdLpTest,Match)1101 TEST_P(SatdLpTest, Match) {
1102   FillRandom();
1103   RunComparison();
1104 }
TEST_P(SatdLpTest,DISABLED_Speed)1105 TEST_P(SatdLpTest, DISABLED_Speed) {
1106   FillRandom();
1107   RunSpeedTest();
1108 }
1109 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SatdLpTest);
1110 
1111 // Add the following c test to avoid gtest uninitialized warning.
1112 INSTANTIATE_TEST_SUITE_P(
1113     C, SatdLpTest,
1114     ::testing::Values(
1115         SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_c),
1116         SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_c),
1117         SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_c),
1118         SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_c)));
1119 
1120 #if HAVE_NEON
1121 INSTANTIATE_TEST_SUITE_P(
1122     NEON, SatdLpTest,
1123     ::testing::Values(
1124         SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_neon),
1125         SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_neon),
1126         SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_neon),
1127         SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_neon)));
1128 #endif
1129 
1130 #if HAVE_AVX2
1131 INSTANTIATE_TEST_SUITE_P(
1132     AVX2, SatdLpTest,
1133     ::testing::Values(
1134         SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_avx2),
1135         SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_avx2),
1136         SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_avx2),
1137         SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_avx2)));
1138 #endif
1139 
1140 #if HAVE_SSE2
1141 INSTANTIATE_TEST_SUITE_P(
1142     SSE2, SatdLpTest,
1143     ::testing::Values(
1144         SatdTestParam<SatdLpFunc>(16, &aom_satd_lp_c, &aom_satd_lp_sse2),
1145         SatdTestParam<SatdLpFunc>(64, &aom_satd_lp_c, &aom_satd_lp_sse2),
1146         SatdTestParam<SatdLpFunc>(256, &aom_satd_lp_c, &aom_satd_lp_sse2),
1147         SatdTestParam<SatdLpFunc>(1024, &aom_satd_lp_c, &aom_satd_lp_sse2)));
1148 #endif
1149 
1150 }  // namespace
1151