• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <array>
13 #include <cstdlib>
14 #include <iostream>
15 #include <string>
16 #include <tuple>
17 
18 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
19 
20 #include "config/aom_config.h"
21 #include "config/av1_rtcd.h"
22 
23 #include "aom_ports/aom_timer.h"
24 #include "av1/common/cdef_block.h"
25 #include "test/acm_random.h"
26 #include "test/register_state_check.h"
27 #include "test/util.h"
28 
29 using libaom_test::ACMRandom;
30 
31 namespace {
32 
33 using CdefFilterBlockFunctions = std::array<cdef_filter_block_func, 4>;
34 
35 typedef std::tuple<CdefFilterBlockFunctions, CdefFilterBlockFunctions,
36                    BLOCK_SIZE, int, int>
37     cdef_dir_param_t;
38 
39 class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
40  public:
~CDEFBlockTest()41   virtual ~CDEFBlockTest() {}
SetUp()42   virtual void SetUp() {
43     cdef = GET_PARAM(0);
44     ref_cdef = GET_PARAM(1);
45     bsize = GET_PARAM(2);
46     boundary = GET_PARAM(3);
47     depth = GET_PARAM(4);
48   }
49 
TearDown()50   virtual void TearDown() {}
51 
52  protected:
53   int bsize;
54   int boundary;
55   int depth;
56   CdefFilterBlockFunctions cdef;
57   CdefFilterBlockFunctions ref_cdef;
58 };
59 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockTest);
60 
61 typedef CDEFBlockTest CDEFBlockHighbdTest;
62 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockHighbdTest);
63 
64 typedef CDEFBlockTest CDEFSpeedTest;
65 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedTest);
66 
67 typedef CDEFBlockTest CDEFSpeedHighbdTest;
68 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedHighbdTest);
69 
test_cdef(int bsize,int iterations,CdefFilterBlockFunctions cdef,CdefFilterBlockFunctions ref_cdef,int boundary,int depth)70 int64_t test_cdef(int bsize, int iterations, CdefFilterBlockFunctions cdef,
71                   CdefFilterBlockFunctions ref_cdef, int boundary, int depth) {
72   aom_usec_timer ref_timer;
73   int64_t ref_elapsed_time = 0;
74   const int size = 8;
75   const int ysize = size + 2 * CDEF_VBORDER;
76   ACMRandom rnd(ACMRandom::DeterministicSeed());
77   DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
78   DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
79   DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
80   memset(ref_d, 0, sizeof(ref_d));
81   memset(d, 0, sizeof(d));
82 
83   int error = 0, pristrength = 0, secstrength, dir;
84   int pridamping, secdamping, bits, level, count,
85       errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0,
86       errpridamping = 0, errsecdamping = 0;
87   unsigned int pos = 0;
88 
89   const int block_width =
90       ((bsize == BLOCK_8X8) || (bsize == BLOCK_8X4)) ? 8 : 4;
91   const int block_height =
92       ((bsize == BLOCK_8X8) || (bsize == BLOCK_4X8)) ? 8 : 4;
93   const unsigned int max_pos = size * size >> static_cast<int>(depth == 8);
94   for (pridamping = 3 + depth - 8; pridamping < 7 - 3 * !!boundary + depth - 8;
95        pridamping++) {
96     for (secdamping = 3 + depth - 8;
97          secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) {
98       for (count = 0; count < iterations; count++) {
99         for (level = 0; level < (1 << depth) && !error;
100              level += (2 + 6 * !!boundary) << (depth - 8)) {
101           for (bits = 1; bits <= depth && !error; bits += 1 + 3 * !!boundary) {
102             for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
103               s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
104                            (1 << depth) - 1);
105             if (boundary) {
106               if (boundary & 1) {  // Left
107                 for (int i = 0; i < ysize; i++)
108                   for (int j = 0; j < CDEF_HBORDER; j++)
109                     s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
110               }
111               if (boundary & 2) {  // Right
112                 for (int i = 0; i < ysize; i++)
113                   for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
114                     s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
115               }
116               if (boundary & 4) {  // Above
117                 for (int i = 0; i < CDEF_VBORDER; i++)
118                   for (int j = 0; j < CDEF_BSTRIDE; j++)
119                     s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
120               }
121               if (boundary & 8) {  // Below
122                 for (int i = CDEF_VBORDER + size; i < ysize; i++)
123                   for (int j = 0; j < CDEF_BSTRIDE; j++)
124                     s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
125               }
126             }
127             for (dir = 0; dir < 8; dir++) {
128               for (pristrength = 0; pristrength <= 19 << (depth - 8) && !error;
129                    pristrength += (1 + 4 * !!boundary) << (depth - 8)) {
130                 if (pristrength == 16) pristrength = 19;
131                 for (secstrength = 0; secstrength <= 4 << (depth - 8) && !error;
132                      secstrength += 1 << (depth - 8)) {
133                   if (secstrength == 3 << (depth - 8)) continue;
134 
135                   const int strength_index =
136                       (secstrength == 0) | ((pristrength == 0) << 1);
137 
138                   aom_usec_timer_start(&ref_timer);
139                   ref_cdef[strength_index](
140                       ref_d, size,
141                       s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
142                       pristrength, secstrength, dir, pridamping, secdamping,
143                       depth - 8, block_width, block_height);
144                   aom_usec_timer_mark(&ref_timer);
145                   ref_elapsed_time += aom_usec_timer_elapsed(&ref_timer);
146                   // If cdef and ref_cdef are the same, we're just testing
147                   // speed
148                   if (cdef[0] != ref_cdef[0])
149                     API_REGISTER_STATE_CHECK(cdef[strength_index](
150                         d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
151                         pristrength, secstrength, dir, pridamping, secdamping,
152                         depth - 8, block_width, block_height));
153                   if (ref_cdef[0] != cdef[0]) {
154                     for (pos = 0; pos < max_pos && !error; pos++) {
155                       error = ref_d[pos] != d[pos];
156                       errdepth = depth;
157                       errpristrength = pristrength;
158                       errsecstrength = secstrength;
159                       errboundary = boundary;
160                       errpridamping = pridamping;
161                       errsecdamping = secdamping;
162                     }
163                   }
164                 }
165               }
166             }
167           }
168         }
169       }
170     }
171   }
172 
173   pos--;
174   EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch."
175                       << std::endl
176                       << "First error at " << pos % size << "," << pos / size
177                       << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
178                       << ") " << std::endl
179                       << "pristrength: " << errpristrength << std::endl
180                       << "pridamping: " << errpridamping << std::endl
181                       << "secstrength: " << errsecstrength << std::endl
182                       << "secdamping: " << errsecdamping << std::endl
183                       << "depth: " << errdepth << std::endl
184                       << "size: " << bsize << std::endl
185                       << "boundary: " << errboundary << std::endl
186                       << std::endl;
187 
188   return ref_elapsed_time;
189 }
190 
test_cdef_speed(int bsize,int iterations,CdefFilterBlockFunctions cdef,CdefFilterBlockFunctions ref_cdef,int boundary,int depth)191 void test_cdef_speed(int bsize, int iterations, CdefFilterBlockFunctions cdef,
192                      CdefFilterBlockFunctions ref_cdef, int boundary,
193                      int depth) {
194   int64_t ref_elapsed_time =
195       test_cdef(bsize, iterations, ref_cdef, ref_cdef, boundary, depth);
196 
197   int64_t elapsed_time =
198       test_cdef(bsize, iterations, cdef, cdef, boundary, depth);
199 
200   std::cout << "C time: " << ref_elapsed_time << " us" << std::endl
201             << "SIMD time: " << elapsed_time << " us" << std::endl;
202 
203   EXPECT_GT(ref_elapsed_time, elapsed_time)
204       << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
205       << "C time: " << ref_elapsed_time << " us" << std::endl
206       << "SIMD time: " << elapsed_time << " us" << std::endl;
207 }
208 
209 typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
210                           int coeff_shift);
211 
212 typedef std::tuple<find_dir_t, find_dir_t> find_dir_param_t;
213 
214 class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> {
215  public:
~CDEFFindDirTest()216   virtual ~CDEFFindDirTest() {}
SetUp()217   virtual void SetUp() {
218     finddir = GET_PARAM(0);
219     ref_finddir = GET_PARAM(1);
220   }
221 
TearDown()222   virtual void TearDown() {}
223 
224  protected:
225   find_dir_t finddir;
226   find_dir_t ref_finddir;
227 };
228 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirTest);
229 
230 typedef CDEFFindDirTest CDEFFindDirSpeedTest;
231 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirSpeedTest);
232 
test_finddir(int (* finddir)(const uint16_t * img,int stride,int32_t * var,int coeff_shift),int (* ref_finddir)(const uint16_t * img,int stride,int32_t * var,int coeff_shift))233 void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
234                                  int coeff_shift),
235                   int (*ref_finddir)(const uint16_t *img, int stride,
236                                      int32_t *var, int coeff_shift)) {
237   const int size = 8;
238   ACMRandom rnd(ACMRandom::DeterministicSeed());
239   DECLARE_ALIGNED(16, uint16_t, s[size * size]);
240 
241   int error = 0;
242   int depth, bits, level, count, errdepth = 0;
243   int ref_res = 0, res = 0;
244   int32_t ref_var = 0, var = 0;
245 
246   for (depth = 8; depth <= 12 && !error; depth += 2) {
247     for (count = 0; count < 512 && !error; count++) {
248       for (level = 0; level < (1 << depth) && !error;
249            level += 1 << (depth - 8)) {
250         for (bits = 1; bits <= depth && !error; bits++) {
251           for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
252             s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
253                          (1 << depth) - 1);
254           for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
255             ref_res = ref_finddir(s, size, &ref_var, depth - 8);
256           if (finddir != ref_finddir)
257             API_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
258           if (ref_finddir != finddir) {
259             if (res != ref_res || var != ref_var) error = 1;
260             errdepth = depth;
261           }
262         }
263       }
264     }
265   }
266 
267   EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch."
268                       << std::endl
269                       << "return: " << res << " : " << ref_res << std::endl
270                       << "var: " << var << " : " << ref_var << std::endl
271                       << "depth: " << errdepth << std::endl
272                       << std::endl;
273 }
274 
test_finddir_speed(int (* finddir)(const uint16_t * img,int stride,int32_t * var,int coeff_shift),int (* ref_finddir)(const uint16_t * img,int stride,int32_t * var,int coeff_shift))275 void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
276                                        int32_t *var, int coeff_shift),
277                         int (*ref_finddir)(const uint16_t *img, int stride,
278                                            int32_t *var, int coeff_shift)) {
279   aom_usec_timer ref_timer;
280   aom_usec_timer timer;
281 
282   aom_usec_timer_start(&ref_timer);
283   test_finddir(ref_finddir, ref_finddir);
284   aom_usec_timer_mark(&ref_timer);
285   int64_t ref_elapsed_time = aom_usec_timer_elapsed(&ref_timer);
286 
287   aom_usec_timer_start(&timer);
288   test_finddir(finddir, finddir);
289   aom_usec_timer_mark(&timer);
290   int64_t elapsed_time = aom_usec_timer_elapsed(&timer);
291 
292   EXPECT_GT(ref_elapsed_time, elapsed_time)
293       << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
294       << "C time: " << ref_elapsed_time << " us" << std::endl
295       << "SIMD time: " << elapsed_time << " us" << std::endl;
296 }
297 
298 typedef void (*find_dir_dual_t)(const uint16_t *img1, const uint16_t *img2,
299                                 int stride, int32_t *var1, int32_t *var2,
300                                 int coeff_shift, int *out1, int *out2);
301 
302 typedef std::tuple<find_dir_dual_t, find_dir_dual_t> find_dir_dual_param_t;
303 
304 class CDEFFindDirDualTest
305     : public ::testing::TestWithParam<find_dir_dual_param_t> {
306  public:
~CDEFFindDirDualTest()307   virtual ~CDEFFindDirDualTest() {}
SetUp()308   virtual void SetUp() {
309     finddir = GET_PARAM(0);
310     ref_finddir = GET_PARAM(1);
311   }
312 
TearDown()313   virtual void TearDown() {}
314 
315  protected:
316   find_dir_dual_t finddir;
317   find_dir_dual_t ref_finddir;
318 };
319 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualTest);
320 
321 typedef CDEFFindDirDualTest CDEFFindDirDualSpeedTest;
322 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualSpeedTest);
323 
test_finddir_dual(void (* finddir)(const uint16_t * img1,const uint16_t * img2,int stride,int32_t * var1,int32_t * var2,int coeff_shift,int * out1,int * out2),void (* ref_finddir)(const uint16_t * img1,const uint16_t * img2,int stride,int32_t * var1,int32_t * var2,int coeff_shift,int * out1,int * out2))324 void test_finddir_dual(
325     void (*finddir)(const uint16_t *img1, const uint16_t *img2, int stride,
326                     int32_t *var1, int32_t *var2, int coeff_shift, int *out1,
327                     int *out2),
328     void (*ref_finddir)(const uint16_t *img1, const uint16_t *img2, int stride,
329                         int32_t *var1, int32_t *var2, int coeff_shift,
330                         int *out1, int *out2)) {
331   const int size_wd = 16;
332   const int size_ht = 8;
333   ACMRandom rnd(ACMRandom::DeterministicSeed());
334   DECLARE_ALIGNED(16, uint16_t, s[size_ht * size_wd]);
335 
336   int error = 0, errdepth = 0;
337   int32_t ref_var[2] = { 0 };
338   int ref_dir[2] = { 0 };
339   int32_t var[2] = { 0 };
340   int dir[2] = { 0 };
341 
342   for (int depth = 8; depth <= 12 && !error; depth += 2) {
343     for (int count = 0; count < 512 && !error; count++) {
344       for (int level = 0; level < (1 << depth) && !error;
345            level += 1 << (depth - 8)) {
346         for (int bits = 1; bits <= depth && !error; bits++) {
347           for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
348             s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
349                          (1 << depth) - 1);
350           for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
351             ref_finddir(s, s + 8, size_wd, &ref_var[0], &ref_var[1], depth - 8,
352                         &ref_dir[0], &ref_dir[1]);
353           if (finddir != ref_finddir)
354             API_REGISTER_STATE_CHECK(finddir(s, s + 8, size_wd, &var[0],
355                                              &var[1], depth - 8, &dir[0],
356                                              &dir[1]));
357           if (ref_finddir != finddir) {
358             for (int j = 0; j < 2; j++) {
359               if (ref_dir[j] != dir[j] || ref_var[j] != var[j]) error = 1;
360             }
361             errdepth = depth;
362           }
363         }
364       }
365     }
366   }
367 
368   for (int j = 0; j < 2; j++) {
369     EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch."
370                         << std::endl
371                         << "direction: " << dir[j] << " : " << ref_dir[j]
372                         << std::endl
373                         << "variance: " << var[j] << " : " << ref_var[j]
374                         << std::endl
375                         << "depth: " << errdepth << std::endl
376                         << std::endl;
377   }
378 }
379 
test_finddir_dual_speed(void (* finddir)(const uint16_t * img1,const uint16_t * img2,int stride,int32_t * var1,int32_t * var2,int coeff_shift,int * out1,int * out2),void (* ref_finddir)(const uint16_t * img1,const uint16_t * img2,int stride,int32_t * var1,int32_t * var2,int coeff_shift,int * out1,int * out2))380 void test_finddir_dual_speed(
381     void (*finddir)(const uint16_t *img1, const uint16_t *img2, int stride,
382                     int32_t *var1, int32_t *var2, int coeff_shift, int *out1,
383                     int *out2),
384     void (*ref_finddir)(const uint16_t *img1, const uint16_t *img2, int stride,
385                         int32_t *var1, int32_t *var2, int coeff_shift,
386                         int *out1, int *out2)) {
387   aom_usec_timer ref_timer;
388   aom_usec_timer timer;
389 
390   aom_usec_timer_start(&ref_timer);
391   test_finddir_dual(ref_finddir, ref_finddir);
392   aom_usec_timer_mark(&ref_timer);
393   const double ref_elapsed_time =
394       static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
395 
396   aom_usec_timer_start(&timer);
397   test_finddir_dual(finddir, finddir);
398   aom_usec_timer_mark(&timer);
399   const double elapsed_time =
400       static_cast<double>(aom_usec_timer_elapsed(&timer));
401 
402   printf(
403       "ref_time=%lf \t simd_time=%lf \t "
404       "gain=%lf \n",
405       ref_elapsed_time, elapsed_time, ref_elapsed_time / elapsed_time);
406 }
407 
TEST_P(CDEFBlockTest,TestSIMDNoMismatch)408 TEST_P(CDEFBlockTest, TestSIMDNoMismatch) {
409   test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth);
410 }
411 
TEST_P(CDEFBlockHighbdTest,TestSIMDHighbdNoMismatch)412 TEST_P(CDEFBlockHighbdTest, TestSIMDHighbdNoMismatch) {
413   test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth);
414 }
415 
TEST_P(CDEFSpeedTest,DISABLED_TestSpeed)416 TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) {
417   test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth);
418 }
419 
TEST_P(CDEFSpeedHighbdTest,DISABLED_TestSpeed)420 TEST_P(CDEFSpeedHighbdTest, DISABLED_TestSpeed) {
421   test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth);
422 }
423 
TEST_P(CDEFFindDirTest,TestSIMDNoMismatch)424 TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) {
425   test_finddir(finddir, ref_finddir);
426 }
427 
TEST_P(CDEFFindDirSpeedTest,DISABLED_TestSpeed)428 TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) {
429   test_finddir_speed(finddir, ref_finddir);
430 }
431 
TEST_P(CDEFFindDirDualTest,TestSIMDNoMismatch)432 TEST_P(CDEFFindDirDualTest, TestSIMDNoMismatch) {
433   test_finddir_dual(finddir, ref_finddir);
434 }
435 
TEST_P(CDEFFindDirDualSpeedTest,DISABLED_TestSpeed)436 TEST_P(CDEFFindDirDualSpeedTest, DISABLED_TestSpeed) {
437   test_finddir_dual_speed(finddir, ref_finddir);
438 }
439 
440 using std::make_tuple;
441 
442 #if (HAVE_SSE2 || HAVE_SSSE3 || HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON)
443 static const CdefFilterBlockFunctions kCdefFilterFuncC[] = {
444   { &cdef_filter_8_0_c, &cdef_filter_8_1_c, &cdef_filter_8_2_c,
445     &cdef_filter_8_3_c }
446 };
447 
448 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncC[] = {
449   { &cdef_filter_16_0_c, &cdef_filter_16_0_c, &cdef_filter_16_0_c,
450     &cdef_filter_16_0_c }
451 };
452 #endif
453 
454 #if HAVE_SSE2
455 static const CdefFilterBlockFunctions kCdefFilterFuncSse2[] = {
456   { &cdef_filter_8_0_sse2, &cdef_filter_8_1_sse2, &cdef_filter_8_2_sse2,
457     &cdef_filter_8_3_sse2 }
458 };
459 
460 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSse2[] = {
461   { &cdef_filter_16_0_sse2, &cdef_filter_16_1_sse2, &cdef_filter_16_2_sse2,
462     &cdef_filter_16_3_sse2 }
463 };
464 
465 INSTANTIATE_TEST_SUITE_P(
466     SSE2, CDEFBlockTest,
467     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse2),
468                        ::testing::ValuesIn(kCdefFilterFuncC),
469                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
470                                          BLOCK_8X8),
471                        ::testing::Range(0, 16), ::testing::Values(8)));
472 INSTANTIATE_TEST_SUITE_P(
473     SSE2, CDEFBlockHighbdTest,
474     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse2),
475                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
476                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
477                                          BLOCK_8X8),
478                        ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
479 INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirTest,
480                          ::testing::Values(make_tuple(&cdef_find_dir_sse2,
481                                                       &cdef_find_dir_c)));
482 INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirDualTest,
483                          ::testing::Values(make_tuple(&cdef_find_dir_dual_sse2,
484                                                       &cdef_find_dir_dual_c)));
485 #endif
486 
487 #if HAVE_SSSE3
488 static const CdefFilterBlockFunctions kCdefFilterFuncSsse3[] = {
489   { &cdef_filter_8_0_ssse3, &cdef_filter_8_1_ssse3, &cdef_filter_8_2_ssse3,
490     &cdef_filter_8_3_ssse3 }
491 };
492 
493 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSsse3[] = {
494   { &cdef_filter_16_0_ssse3, &cdef_filter_16_1_ssse3, &cdef_filter_16_2_ssse3,
495     &cdef_filter_16_3_ssse3 }
496 };
497 
498 INSTANTIATE_TEST_SUITE_P(
499     SSSE3, CDEFBlockTest,
500     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3),
501                        ::testing::ValuesIn(kCdefFilterFuncC),
502                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
503                                          BLOCK_8X8),
504                        ::testing::Range(0, 16), ::testing::Values(8)));
505 INSTANTIATE_TEST_SUITE_P(
506     SSSE3, CDEFBlockHighbdTest,
507     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3),
508                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
509                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
510                                          BLOCK_8X8),
511                        ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
512 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirTest,
513                          ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
514                                                       &cdef_find_dir_c)));
515 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirDualTest,
516                          ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3,
517                                                       &cdef_find_dir_dual_c)));
518 #endif
519 
520 #if HAVE_SSE4_1
521 static const CdefFilterBlockFunctions kCdefFilterFuncSse4_1[] = {
522   { &cdef_filter_8_0_sse4_1, &cdef_filter_8_1_sse4_1, &cdef_filter_8_2_sse4_1,
523     &cdef_filter_8_3_sse4_1 }
524 };
525 
526 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSse4_1[] = {
527   { &cdef_filter_16_0_sse4_1, &cdef_filter_16_1_sse4_1,
528     &cdef_filter_16_2_sse4_1, &cdef_filter_16_3_sse4_1 }
529 };
530 
531 INSTANTIATE_TEST_SUITE_P(
532     SSE4_1, CDEFBlockTest,
533     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1),
534                        ::testing::ValuesIn(kCdefFilterFuncC),
535                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
536                                          BLOCK_8X8),
537                        ::testing::Range(0, 16), ::testing::Values(8)));
538 INSTANTIATE_TEST_SUITE_P(
539     SSE4_1, CDEFBlockHighbdTest,
540     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1),
541                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
542                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
543                                          BLOCK_8X8),
544                        ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
545 INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirTest,
546                          ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
547                                                       &cdef_find_dir_c)));
548 INSTANTIATE_TEST_SUITE_P(
549     SSE4_1, CDEFFindDirDualTest,
550     ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1,
551                                  &cdef_find_dir_dual_c)));
552 #endif
553 
554 #if HAVE_AVX2
555 static const CdefFilterBlockFunctions kCdefFilterFuncAvx2[] = {
556   { &cdef_filter_8_0_avx2, &cdef_filter_8_1_avx2, &cdef_filter_8_2_avx2,
557     &cdef_filter_8_3_avx2 }
558 };
559 
560 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncAvx2[] = {
561   { &cdef_filter_16_0_avx2, &cdef_filter_16_1_avx2, &cdef_filter_16_2_avx2,
562     &cdef_filter_16_3_avx2 }
563 };
564 
565 INSTANTIATE_TEST_SUITE_P(
566     AVX2, CDEFBlockTest,
567     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2),
568                        ::testing::ValuesIn(kCdefFilterFuncC),
569                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
570                                          BLOCK_8X8),
571                        ::testing::Range(0, 16), ::testing::Values(8)));
572 INSTANTIATE_TEST_SUITE_P(
573     AVX2, CDEFBlockHighbdTest,
574     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2),
575                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
576                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
577                                          BLOCK_8X8),
578                        ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
579 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirTest,
580                          ::testing::Values(make_tuple(&cdef_find_dir_avx2,
581                                                       &cdef_find_dir_c)));
582 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirDualTest,
583                          ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2,
584                                                       &cdef_find_dir_dual_c)));
585 #endif
586 
587 #if HAVE_NEON
588 static const CdefFilterBlockFunctions kCdefFilterFuncNeon[] = {
589   { &cdef_filter_8_0_neon, &cdef_filter_8_1_neon, &cdef_filter_8_2_neon,
590     &cdef_filter_8_3_neon }
591 };
592 
593 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncNeon[] = {
594   { &cdef_filter_16_0_neon, &cdef_filter_16_1_neon, &cdef_filter_16_2_neon,
595     &cdef_filter_16_3_neon }
596 };
597 
598 INSTANTIATE_TEST_SUITE_P(
599     NEON, CDEFBlockTest,
600     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon),
601                        ::testing::ValuesIn(kCdefFilterFuncC),
602                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
603                                          BLOCK_8X8),
604                        ::testing::Range(0, 16), ::testing::Values(8)));
605 INSTANTIATE_TEST_SUITE_P(
606     NEON, CDEFBlockHighbdTest,
607     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon),
608                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
609                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
610                                          BLOCK_8X8),
611                        ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
612 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirTest,
613                          ::testing::Values(make_tuple(&cdef_find_dir_neon,
614                                                       &cdef_find_dir_c)));
615 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirDualTest,
616                          ::testing::Values(make_tuple(&cdef_find_dir_dual_neon,
617                                                       &cdef_find_dir_dual_c)));
618 #endif
619 
620 // Test speed for all supported architectures
621 #if HAVE_SSE2
622 INSTANTIATE_TEST_SUITE_P(
623     SSE2, CDEFSpeedTest,
624     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse2),
625                        ::testing::ValuesIn(kCdefFilterFuncC),
626                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
627                                          BLOCK_8X8),
628                        ::testing::Range(0, 16), ::testing::Values(8)));
629 INSTANTIATE_TEST_SUITE_P(
630     SSE2, CDEFSpeedHighbdTest,
631     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse2),
632                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
633                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
634                                          BLOCK_8X8),
635                        ::testing::Range(0, 16), ::testing::Values(10)));
636 INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirSpeedTest,
637                          ::testing::Values(make_tuple(&cdef_find_dir_sse2,
638                                                       &cdef_find_dir_c)));
639 INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirDualSpeedTest,
640                          ::testing::Values(make_tuple(&cdef_find_dir_dual_sse2,
641                                                       &cdef_find_dir_dual_c)));
642 #endif
643 
644 #if HAVE_SSSE3
645 INSTANTIATE_TEST_SUITE_P(
646     SSSE3, CDEFSpeedTest,
647     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3),
648                        ::testing::ValuesIn(kCdefFilterFuncC),
649                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
650                                          BLOCK_8X8),
651                        ::testing::Range(0, 16), ::testing::Values(8)));
652 INSTANTIATE_TEST_SUITE_P(
653     SSSE3, CDEFSpeedHighbdTest,
654     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3),
655                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
656                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
657                                          BLOCK_8X8),
658                        ::testing::Range(0, 16), ::testing::Values(10)));
659 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirSpeedTest,
660                          ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
661                                                       &cdef_find_dir_c)));
662 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirDualSpeedTest,
663                          ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3,
664                                                       &cdef_find_dir_dual_c)));
665 #endif
666 
667 #if HAVE_SSE4_1
668 INSTANTIATE_TEST_SUITE_P(
669     SSE4_1, CDEFSpeedTest,
670     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1),
671                        ::testing::ValuesIn(kCdefFilterFuncC),
672                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
673                                          BLOCK_8X8),
674                        ::testing::Range(0, 16), ::testing::Values(8)));
675 INSTANTIATE_TEST_SUITE_P(
676     SSE4_1, CDEFSpeedHighbdTest,
677     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1),
678                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
679                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
680                                          BLOCK_8X8),
681                        ::testing::Range(0, 16), ::testing::Values(10)));
682 INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirSpeedTest,
683                          ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
684                                                       &cdef_find_dir_c)));
685 INSTANTIATE_TEST_SUITE_P(
686     SSE4_1, CDEFFindDirDualSpeedTest,
687     ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1,
688                                  &cdef_find_dir_dual_c)));
689 #endif
690 
691 #if HAVE_AVX2
692 INSTANTIATE_TEST_SUITE_P(
693     AVX2, CDEFSpeedTest,
694     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2),
695                        ::testing::ValuesIn(kCdefFilterFuncC),
696                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
697                                          BLOCK_8X8),
698                        ::testing::Range(0, 16), ::testing::Values(8)));
699 INSTANTIATE_TEST_SUITE_P(
700     AVX2, CDEFSpeedHighbdTest,
701     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2),
702                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
703                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
704                                          BLOCK_8X8),
705                        ::testing::Range(0, 16), ::testing::Values(10)));
706 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirSpeedTest,
707                          ::testing::Values(make_tuple(&cdef_find_dir_avx2,
708                                                       &cdef_find_dir_c)));
709 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirDualSpeedTest,
710                          ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2,
711                                                       &cdef_find_dir_dual_c)));
712 #endif
713 
714 #if HAVE_NEON
715 INSTANTIATE_TEST_SUITE_P(
716     NEON, CDEFSpeedTest,
717     ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon),
718                        ::testing::ValuesIn(kCdefFilterFuncC),
719                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
720                                          BLOCK_8X8),
721                        ::testing::Range(0, 16), ::testing::Values(8)));
722 INSTANTIATE_TEST_SUITE_P(
723     NEON, CDEFSpeedHighbdTest,
724     ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon),
725                        ::testing::ValuesIn(kCdefFilterHighbdFuncC),
726                        ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
727                                          BLOCK_8X8),
728                        ::testing::Range(0, 16), ::testing::Values(10)));
729 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirSpeedTest,
730                          ::testing::Values(make_tuple(&cdef_find_dir_neon,
731                                                       &cdef_find_dir_c)));
732 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirDualSpeedTest,
733                          ::testing::Values(make_tuple(&cdef_find_dir_dual_neon,
734                                                       &cdef_find_dir_dual_c)));
735 #endif
736 
737 }  // namespace
738