• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2020, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <ostream>
13 #include <set>
14 #include <vector>
15 #include "config/av1_rtcd.h"
16 #include "config/aom_dsp_rtcd.h"
17 #include "test/acm_random.h"
18 #include "aom_ports/aom_timer.h"
19 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
20 
21 namespace {
22 
23 // TODO(any): Remove following INTERP_FILTERS_ALL define, so that 12-tap filter
24 // is tested once 12-tap filter SIMD is done.
25 #undef INTERP_FILTERS_ALL
26 #define INTERP_FILTERS_ALL 4
27 
28 // All single reference convolve tests are parameterized on block size,
29 // bit-depth, and function to test.
30 //
31 // Note that parameterizing on these variables (and not other parameters) is
32 // a conscious decision - Jenkins needs some degree of parallelization to run
33 // the tests within the time limit, but if the number of parameters increases
34 // too much, the gtest framework does not handle it well (increased overhead per
35 // test, huge amount of output to stdout, etc.).
36 //
37 // Also note that the test suites must be named with the architecture, e.g.,
38 // C, C_X, AVX2_X, ... The test suite that runs on Jenkins sometimes runs tests
39 // that cannot deal with intrinsics (e.g., the Valgrind tests on 32-bit x86
40 // binaries) and will disable tests using a filter like
41 // --gtest_filter=-:SSE4_1.*. If the test suites are not named this way, the
42 // testing infrastructure will not selectively filter them properly.
43 class BlockSize {
44  public:
BlockSize(int w,int h)45   BlockSize(int w, int h) : width_(w), height_(h) {}
46 
Width() const47   int Width() const { return width_; }
Height() const48   int Height() const { return height_; }
49 
operator <(const BlockSize & other) const50   bool operator<(const BlockSize &other) const {
51     if (Width() == other.Width()) {
52       return Height() < other.Height();
53     }
54     return Width() < other.Width();
55   }
56 
operator ==(const BlockSize & other) const57   bool operator==(const BlockSize &other) const {
58     return Width() == other.Width() && Height() == other.Height();
59   }
60 
61  private:
62   int width_;
63   int height_;
64 };
65 
66 // Block size / bit depth / test function used to parameterize the tests.
67 template <typename T>
68 class TestParam {
69  public:
TestParam(const BlockSize & block,int bd,T test_func)70   TestParam(const BlockSize &block, int bd, T test_func)
71       : block_(block), bd_(bd), test_func_(test_func) {}
72 
Block() const73   const BlockSize &Block() const { return block_; }
BitDepth() const74   int BitDepth() const { return bd_; }
TestFunction() const75   T TestFunction() const { return test_func_; }
76 
operator ==(const TestParam & other) const77   bool operator==(const TestParam &other) const {
78     return Block() == other.Block() && BitDepth() == other.BitDepth() &&
79            TestFunction() == other.TestFunction();
80   }
81 
82  private:
83   BlockSize block_;
84   int bd_;
85   T test_func_;
86 };
87 
88 template <typename T>
operator <<(std::ostream & os,const TestParam<T> & test_arg)89 std::ostream &operator<<(std::ostream &os, const TestParam<T> &test_arg) {
90   return os << "TestParam { width:" << test_arg.Block().Width()
91             << " height:" << test_arg.Block().Height()
92             << " bd:" << test_arg.BitDepth() << " }";
93 }
94 
95 // Generate the list of all block widths / heights that need to be tested,
96 // includes chroma and luma sizes, for the given bit-depths. The test
97 // function is the same for all generated parameters.
98 template <typename T>
GetTestParams(std::initializer_list<int> bit_depths,T test_func)99 std::vector<TestParam<T>> GetTestParams(std::initializer_list<int> bit_depths,
100                                         T test_func) {
101   std::set<BlockSize> sizes;
102   for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
103     const int w = block_size_wide[b];
104     const int h = block_size_high[b];
105     sizes.insert(BlockSize(w, h));
106     // Add in smaller chroma sizes as well.
107     if (w == 4 || h == 4) {
108       sizes.insert(BlockSize(w / 2, h / 2));
109     }
110   }
111   std::vector<TestParam<T>> result;
112   for (const BlockSize &block : sizes) {
113     for (int bd : bit_depths) {
114       result.push_back(TestParam<T>(block, bd, test_func));
115     }
116   }
117   return result;
118 }
119 
120 template <typename T>
GetLowbdTestParams(T test_func)121 std::vector<TestParam<T>> GetLowbdTestParams(T test_func) {
122   return GetTestParams({ 8 }, test_func);
123 }
124 
125 template <typename T>
BuildLowbdParams(T test_func)126 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdParams(
127     T test_func) {
128   return ::testing::ValuesIn(GetLowbdTestParams(test_func));
129 }
130 
131 // Test the test-parameters generators work as expected.
132 class AV1ConvolveParametersTest : public ::testing::Test {};
133 
TEST_F(AV1ConvolveParametersTest,GetLowbdTestParams)134 TEST_F(AV1ConvolveParametersTest, GetLowbdTestParams) {
135   auto v = GetLowbdTestParams(av1_convolve_x_sr_c);
136   ASSERT_EQ(27U, v.size());
137   for (const auto &p : v) {
138     ASSERT_EQ(8, p.BitDepth());
139     // Needed (instead of ASSERT_EQ(...) since gtest does not
140     // have built in printing for arbitrary functions, which
141     // causes a compilation error.
142     bool same_fn = av1_convolve_x_sr_c == p.TestFunction();
143     ASSERT_TRUE(same_fn);
144   }
145 }
146 
147 #if CONFIG_AV1_HIGHBITDEPTH
148 template <typename T>
GetHighbdTestParams(T test_func)149 std::vector<TestParam<T>> GetHighbdTestParams(T test_func) {
150   return GetTestParams({ 10, 12 }, test_func);
151 }
152 
153 template <typename T>
BuildHighbdParams(T test_func)154 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdParams(
155     T test_func) {
156   return ::testing::ValuesIn(GetHighbdTestParams(test_func));
157 }
158 
TEST_F(AV1ConvolveParametersTest,GetHighbdTestParams)159 TEST_F(AV1ConvolveParametersTest, GetHighbdTestParams) {
160   auto v = GetHighbdTestParams(av1_highbd_convolve_x_sr_c);
161   ASSERT_EQ(54U, v.size());
162   int num_10 = 0;
163   int num_12 = 0;
164   for (const auto &p : v) {
165     ASSERT_TRUE(p.BitDepth() == 10 || p.BitDepth() == 12);
166     bool same_fn = av1_highbd_convolve_x_sr_c == p.TestFunction();
167     ASSERT_TRUE(same_fn);
168     if (p.BitDepth() == 10) {
169       ++num_10;
170     } else {
171       ++num_12;
172     }
173   }
174   ASSERT_EQ(num_10, num_12);
175 }
176 #endif  // CONFIG_AV1_HIGHBITDEPTH
177 
178 // AV1ConvolveTest is the base class that all convolve tests should derive from.
179 // It provides storage/methods for generating randomized buffers for both
180 // low bit-depth and high bit-depth, and setup/teardown methods for clearing
181 // system state. Implementors can get the bit-depth / block-size /
182 // test function by calling GetParam().
183 template <typename T>
184 class AV1ConvolveTest : public ::testing::TestWithParam<TestParam<T>> {
185  public:
186   ~AV1ConvolveTest() override = default;
187 
SetUp()188   void SetUp() override {
189     rnd_.Reset(libaom_test::ACMRandom::DeterministicSeed());
190   }
191 
192   // Randomizes the 8-bit input buffer and returns a pointer to it. Note that
193   // the pointer is safe to use with an 8-tap filter. The stride can range
194   // from width to (width + kPadding). Also note that the pointer is to the
195   // same memory location.
196   static constexpr int kInputPadding = 12;
197 
198   // Get a pointer to a buffer with stride == width. Note that we must have
199   // the test param passed in explicitly -- the gtest framework does not
200   // support calling GetParam() within a templatized class.
201   // Note that FirstRandomInput8 always returns the same pointer -- if two
202   // inputs are needed, also use SecondRandomInput8.
FirstRandomInput8(const TestParam<T> & param)203   const uint8_t *FirstRandomInput8(const TestParam<T> &param) {
204     // Note we can't call GetParam() directly -- gtest does not support
205     // this for parameterized types.
206     return RandomInput8(input8_1_, param);
207   }
208 
SecondRandomInput8(const TestParam<T> & param)209   const uint8_t *SecondRandomInput8(const TestParam<T> &param) {
210     return RandomInput8(input8_2_, param);
211   }
212 
213   // Some of the intrinsics perform writes in 32 byte chunks. Moreover, some
214   // of the instrinsics assume that the stride is also a multiple of 32.
215   // To satisfy these constraints and also remain simple, output buffer strides
216   // are assumed MAX_SB_SIZE.
217   static constexpr int kOutputStride = MAX_SB_SIZE;
218 
219   // Check that two 8-bit output buffers are identical.
AssertOutputBufferEq(const uint8_t * p1,const uint8_t * p2,int width,int height)220   void AssertOutputBufferEq(const uint8_t *p1, const uint8_t *p2, int width,
221                             int height) {
222     ASSERT_TRUE(p1 != p2) << "Buffers must be at different memory locations";
223     for (int j = 0; j < height; ++j) {
224       if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
225         p1 += kOutputStride;
226         p2 += kOutputStride;
227         continue;
228       }
229       for (int i = 0; i < width; ++i) {
230         ASSERT_EQ(p1[i], p2[i])
231             << width << "x" << height << " Pixel mismatch at (" << i << ", "
232             << j << ")";
233       }
234     }
235   }
236 
237   // Check that two 16-bit output buffers are identical.
AssertOutputBufferEq(const uint16_t * p1,const uint16_t * p2,int width,int height)238   void AssertOutputBufferEq(const uint16_t *p1, const uint16_t *p2, int width,
239                             int height) {
240     ASSERT_TRUE(p1 != p2) << "Buffers must be in different memory locations";
241     for (int j = 0; j < height; ++j) {
242       if (memcmp(p1, p2, sizeof(*p1) * width) == 0) {
243         p1 += kOutputStride;
244         p2 += kOutputStride;
245         continue;
246       }
247       for (int i = 0; i < width; ++i) {
248         ASSERT_EQ(p1[i], p2[i])
249             << width << "x" << height << " Pixel mismatch at (" << i << ", "
250             << j << ")";
251       }
252     }
253   }
254 
255 #if CONFIG_AV1_HIGHBITDEPTH
256   // Note that the randomized values are capped by bit-depth.
FirstRandomInput16(const TestParam<T> & param)257   const uint16_t *FirstRandomInput16(const TestParam<T> &param) {
258     return RandomInput16(input16_1_, param);
259   }
260 
SecondRandomInput16(const TestParam<T> & param)261   const uint16_t *SecondRandomInput16(const TestParam<T> &param) {
262     return RandomInput16(input16_2_, param);
263   }
264 #endif
265 
266  private:
RandomInput8(uint8_t * p,const TestParam<T> & param)267   const uint8_t *RandomInput8(uint8_t *p, const TestParam<T> &param) {
268     EXPECT_EQ(8, param.BitDepth());
269     EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
270     EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
271     const int padded_width = param.Block().Width() + kInputPadding;
272     const int padded_height = param.Block().Height() + kInputPadding;
273     Randomize(p, padded_width * padded_height);
274     return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
275   }
276 
Randomize(uint8_t * p,int size)277   void Randomize(uint8_t *p, int size) {
278     for (int i = 0; i < size; ++i) {
279       p[i] = rnd_.Rand8();
280     }
281   }
282 
283 #if CONFIG_AV1_HIGHBITDEPTH
RandomInput16(uint16_t * p,const TestParam<T> & param)284   const uint16_t *RandomInput16(uint16_t *p, const TestParam<T> &param) {
285     // Check that this is only called with high bit-depths.
286     EXPECT_TRUE(param.BitDepth() == 10 || param.BitDepth() == 12);
287     EXPECT_GE(MAX_SB_SIZE, param.Block().Width());
288     EXPECT_GE(MAX_SB_SIZE, param.Block().Height());
289     const int padded_width = param.Block().Width() + kInputPadding;
290     const int padded_height = param.Block().Height() + kInputPadding;
291     Randomize(p, padded_width * padded_height, param.BitDepth());
292     return p + (kInputPadding / 2) * padded_width + kInputPadding / 2;
293   }
294 
Randomize(uint16_t * p,int size,int bit_depth)295   void Randomize(uint16_t *p, int size, int bit_depth) {
296     for (int i = 0; i < size; ++i) {
297       p[i] = rnd_.Rand16() & ((1 << bit_depth) - 1);
298     }
299   }
300 #endif
301 
302   static constexpr int kInputStride = MAX_SB_SIZE + kInputPadding;
303 
304   libaom_test::ACMRandom rnd_;
305   // Statically allocate all the memory that is needed for the tests. Note
306   // that we cannot allocate output memory here. It must use DECLARE_ALIGNED,
307   // which is a C99 feature and interacts badly with C++ member variables.
308   uint8_t input8_1_[kInputStride * kInputStride];
309   uint8_t input8_2_[kInputStride * kInputStride];
310 #if CONFIG_AV1_HIGHBITDEPTH
311   uint16_t input16_1_[kInputStride * kInputStride];
312   uint16_t input16_2_[kInputStride * kInputStride];
313 #endif
314 };
315 
316 ////////////////////////////////////////////////////////
317 // Single reference convolve-x functions (low bit-depth)
318 ////////////////////////////////////////////////////////
319 typedef void (*convolve_x_func)(const uint8_t *src, int src_stride,
320                                 uint8_t *dst, int dst_stride, int w, int h,
321                                 const InterpFilterParams *filter_params_x,
322                                 const int subpel_x_qn,
323                                 ConvolveParams *conv_params);
324 
325 class AV1ConvolveXTest : public AV1ConvolveTest<convolve_x_func> {
326  public:
RunTest()327   void RunTest() {
328     for (int sub_x = 0; sub_x < 16; ++sub_x) {
329       for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
330            ++filter) {
331         InterpFilter f = static_cast<InterpFilter>(filter);
332         TestConvolve(sub_x, f);
333       }
334     }
335   }
336 
337  public:
SpeedTest()338   void SpeedTest() {
339     for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
340          ++filter) {
341       InterpFilter f = static_cast<InterpFilter>(filter);
342       TestConvolveSpeed(f, 10000);
343     }
344   }
345 
346  private:
TestConvolve(const int sub_x,const InterpFilter filter)347   void TestConvolve(const int sub_x, const InterpFilter filter) {
348     const int width = GetParam().Block().Width();
349     const int height = GetParam().Block().Height();
350 
351     const InterpFilterParams *filter_params_x =
352         av1_get_interp_filter_params_with_block_size(filter, width);
353     ConvolveParams conv_params1 =
354         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
355     const uint8_t *input = FirstRandomInput8(GetParam());
356     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
357     av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
358                         filter_params_x, sub_x, &conv_params1);
359 
360     ConvolveParams conv_params2 =
361         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
362     convolve_x_func test_func = GetParam().TestFunction();
363     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
364     test_func(input, width, test, kOutputStride, width, height, filter_params_x,
365               sub_x, &conv_params2);
366     AssertOutputBufferEq(reference, test, width, height);
367   }
368 
369  private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)370   void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
371     const int width = GetParam().Block().Width();
372     const int height = GetParam().Block().Height();
373 
374     const InterpFilterParams *filter_params_x =
375         av1_get_interp_filter_params_with_block_size(filter, width);
376     ConvolveParams conv_params1 =
377         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
378     const uint8_t *input = FirstRandomInput8(GetParam());
379     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
380 
381     aom_usec_timer timer;
382     aom_usec_timer_start(&timer);
383     for (int i = 0; i < num_iters; ++i) {
384       av1_convolve_x_sr_c(input, width, reference, kOutputStride, width, height,
385                           filter_params_x, 0, &conv_params1);
386     }
387     aom_usec_timer_mark(&timer);
388     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
389     ConvolveParams conv_params2 =
390         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
391     convolve_x_func test_func = GetParam().TestFunction();
392     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
393 
394     aom_usec_timer_start(&timer);
395     for (int i = 0; i < num_iters; ++i) {
396       test_func(input, width, test, kOutputStride, width, height,
397                 filter_params_x, 0, &conv_params2);
398     }
399     aom_usec_timer_mark(&timer);
400     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
401     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
402            time2, time1 / time2);
403   }
404 };
405 
TEST_P(AV1ConvolveXTest,RunTest)406 TEST_P(AV1ConvolveXTest, RunTest) { RunTest(); }
407 
TEST_P(AV1ConvolveXTest,DISABLED_SpeedTest)408 TEST_P(AV1ConvolveXTest, DISABLED_SpeedTest) { SpeedTest(); }
409 
410 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXTest,
411                          BuildLowbdParams(av1_convolve_x_sr_c));
412 
413 #if HAVE_SSE2
414 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXTest,
415                          BuildLowbdParams(av1_convolve_x_sr_sse2));
416 #endif
417 
418 #if HAVE_AVX2
419 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXTest,
420                          BuildLowbdParams(av1_convolve_x_sr_avx2));
421 #endif
422 
423 #if HAVE_NEON
424 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXTest,
425                          BuildLowbdParams(av1_convolve_x_sr_neon));
426 #endif
427 
428 #if HAVE_NEON_DOTPROD
429 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1ConvolveXTest,
430                          BuildLowbdParams(av1_convolve_x_sr_neon_dotprod));
431 #endif
432 
433 #if HAVE_NEON_I8MM
434 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1ConvolveXTest,
435                          BuildLowbdParams(av1_convolve_x_sr_neon_i8mm));
436 #endif
437 
438 ////////////////////////////////////////////////////////////////
439 // Single reference convolve-x IntraBC functions (low bit-depth)
440 ////////////////////////////////////////////////////////////////
441 
442 class AV1ConvolveXIntraBCTest : public AV1ConvolveTest<convolve_x_func> {
443  public:
RunTest()444   void RunTest() {
445     // IntraBC functions only operate for subpel_x_qn = 8.
446     constexpr int kSubX = 8;
447     const int width = GetParam().Block().Width();
448     const int height = GetParam().Block().Height();
449     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
450     const uint8_t *input = FirstRandomInput8(GetParam());
451 
452     ConvolveParams conv_params1 =
453         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
454     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
455     // Use a stride different from width to avoid potential storing errors that
456     // would go undetected. The input buffer is filled using a padding of 12, so
457     // the stride can be anywhere between width and width + 12.
458     av1_convolve_x_sr_intrabc_c(input, width + 2, reference, kOutputStride,
459                                 width, height, filter_params_x, kSubX,
460                                 &conv_params1);
461 
462     ConvolveParams conv_params2 =
463         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
464     convolve_x_func test_func = GetParam().TestFunction();
465     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
466     test_func(input, width + 2, test, kOutputStride, width, height,
467               filter_params_x, kSubX, &conv_params2);
468 
469     AssertOutputBufferEq(reference, test, width, height);
470   }
471 
SpeedTest()472   void SpeedTest() {
473     constexpr int kNumIters = 10000;
474     const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
475     const int width = GetParam().Block().Width();
476     const int height = GetParam().Block().Height();
477     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
478     const uint8_t *input = FirstRandomInput8(GetParam());
479 
480     ConvolveParams conv_params1 =
481         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
482     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
483     aom_usec_timer timer;
484     aom_usec_timer_start(&timer);
485     for (int i = 0; i < kNumIters; ++i) {
486       av1_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride, width,
487                                   height, filter_params_x, 0, &conv_params1);
488     }
489     aom_usec_timer_mark(&timer);
490     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
491 
492     ConvolveParams conv_params2 =
493         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
494     convolve_x_func test_func = GetParam().TestFunction();
495     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
496     aom_usec_timer_start(&timer);
497     for (int i = 0; i < kNumIters; ++i) {
498       test_func(input, width, test, kOutputStride, width, height,
499                 filter_params_x, 0, &conv_params2);
500     }
501     aom_usec_timer_mark(&timer);
502     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
503 
504     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
505            time2, time1 / time2);
506   }
507 };
508 
TEST_P(AV1ConvolveXIntraBCTest,RunTest)509 TEST_P(AV1ConvolveXIntraBCTest, RunTest) { RunTest(); }
510 
TEST_P(AV1ConvolveXIntraBCTest,DISABLED_SpeedTest)511 TEST_P(AV1ConvolveXIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
512 
513 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXIntraBCTest,
514                          BuildLowbdParams(av1_convolve_x_sr_intrabc_c));
515 
516 #if HAVE_NEON
517 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXIntraBCTest,
518                          BuildLowbdParams(av1_convolve_x_sr_intrabc_neon));
519 #endif
520 
521 #if CONFIG_AV1_HIGHBITDEPTH
522 /////////////////////////////////////////////////////////
523 // Single reference convolve-x functions (high bit-depth)
524 /////////////////////////////////////////////////////////
525 typedef void (*highbd_convolve_x_func)(
526     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
527     int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn,
528     ConvolveParams *conv_params, int bd);
529 
530 class AV1ConvolveXHighbdTest : public AV1ConvolveTest<highbd_convolve_x_func> {
531  public:
RunTest()532   void RunTest() {
533     for (int sub_x = 0; sub_x < 16; ++sub_x) {
534       for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
535            ++filter) {
536         InterpFilter f = static_cast<InterpFilter>(filter);
537         TestConvolve(sub_x, f);
538       }
539     }
540   }
541 
542  public:
SpeedTest()543   void SpeedTest() {
544     for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
545          ++filter) {
546       InterpFilter f = static_cast<InterpFilter>(filter);
547       TestConvolveSpeed(f, 10000);
548     }
549   }
550 
551  private:
TestConvolve(const int sub_x,const InterpFilter filter)552   void TestConvolve(const int sub_x, const InterpFilter filter) {
553     const int width = GetParam().Block().Width();
554     const int height = GetParam().Block().Height();
555     const int bit_depth = GetParam().BitDepth();
556     const InterpFilterParams *filter_params_x =
557         av1_get_interp_filter_params_with_block_size(filter, width);
558     ConvolveParams conv_params1 =
559         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
560     const uint16_t *input = FirstRandomInput16(GetParam());
561     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
562     av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
563                                height, filter_params_x, sub_x, &conv_params1,
564                                bit_depth);
565 
566     ConvolveParams conv_params2 =
567         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
568     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
569     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
570                               filter_params_x, sub_x, &conv_params2, bit_depth);
571     AssertOutputBufferEq(reference, test, width, height);
572   }
573 
574  private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)575   void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
576     const int width = GetParam().Block().Width();
577     const int height = GetParam().Block().Height();
578     const int bit_depth = GetParam().BitDepth();
579     const InterpFilterParams *filter_params_x =
580         av1_get_interp_filter_params_with_block_size(filter, width);
581     ConvolveParams conv_params1 =
582         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
583     const uint16_t *input = FirstRandomInput16(GetParam());
584     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
585 
586     aom_usec_timer timer;
587     aom_usec_timer_start(&timer);
588     for (int i = 0; i < num_iters; ++i) {
589       av1_highbd_convolve_x_sr_c(input, width, reference, kOutputStride, width,
590                                  height, filter_params_x, 0, &conv_params1,
591                                  bit_depth);
592     }
593     aom_usec_timer_mark(&timer);
594     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
595     ConvolveParams conv_params2 =
596         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
597     highbd_convolve_x_func test_func = GetParam().TestFunction();
598     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
599 
600     aom_usec_timer_start(&timer);
601     for (int i = 0; i < num_iters; ++i) {
602       test_func(input, width, test, kOutputStride, width, height,
603                 filter_params_x, 0, &conv_params2, bit_depth);
604     }
605     aom_usec_timer_mark(&timer);
606     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
607     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
608            time2, time1 / time2);
609   }
610 };
611 
TEST_P(AV1ConvolveXHighbdTest,RunTest)612 TEST_P(AV1ConvolveXHighbdTest, RunTest) { RunTest(); }
613 
TEST_P(AV1ConvolveXHighbdTest,DISABLED_SpeedTest)614 TEST_P(AV1ConvolveXHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
615 
616 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdTest,
617                          BuildHighbdParams(av1_highbd_convolve_x_sr_c));
618 
619 #if HAVE_SSSE3
620 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveXHighbdTest,
621                          BuildHighbdParams(av1_highbd_convolve_x_sr_ssse3));
622 #endif
623 
624 #if HAVE_AVX2
625 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXHighbdTest,
626                          BuildHighbdParams(av1_highbd_convolve_x_sr_avx2));
627 #endif
628 
629 #if HAVE_NEON
630 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXHighbdTest,
631                          BuildHighbdParams(av1_highbd_convolve_x_sr_neon));
632 #endif
633 
634 #if HAVE_SVE2
635 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveXHighbdTest,
636                          BuildHighbdParams(av1_highbd_convolve_x_sr_sve2));
637 #endif
638 
639 /////////////////////////////////////////////////////////////////
640 // Single reference convolve-x IntraBC functions (high bit-depth)
641 /////////////////////////////////////////////////////////////////
642 
643 class AV1ConvolveXHighbdIntraBCTest
644     : public AV1ConvolveTest<highbd_convolve_x_func> {
645  public:
RunTest()646   void RunTest() {
647     // IntraBC functions only operate for subpel_x_qn = 8.
648     constexpr int kSubX = 8;
649     const int width = GetParam().Block().Width();
650     const int height = GetParam().Block().Height();
651     const int bit_depth = GetParam().BitDepth();
652     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
653     const uint16_t *input = FirstRandomInput16(GetParam());
654 
655     ConvolveParams conv_params1 =
656         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
657     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
658     // Use a stride different from width to avoid potential storing errors that
659     // would go undetected. The input buffer is filled using a padding of 12, so
660     // the stride can be anywhere between width and width + 12.
661     av1_highbd_convolve_x_sr_intrabc_c(
662         input, width + 2, reference, kOutputStride, width, height,
663         filter_params_x, kSubX, &conv_params1, bit_depth);
664 
665     ConvolveParams conv_params2 =
666         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
667     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
668     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
669                               height, filter_params_x, kSubX, &conv_params2,
670                               bit_depth);
671 
672     AssertOutputBufferEq(reference, test, width, height);
673   }
674 
SpeedTest()675   void SpeedTest() {
676     constexpr int kNumIters = 10000;
677     const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
678     const int width = GetParam().Block().Width();
679     const int height = GetParam().Block().Height();
680     const int bit_depth = GetParam().BitDepth();
681     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
682     const uint16_t *input = FirstRandomInput16(GetParam());
683 
684     ConvolveParams conv_params1 =
685         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
686     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
687     aom_usec_timer timer;
688     aom_usec_timer_start(&timer);
689     for (int i = 0; i < kNumIters; ++i) {
690       av1_highbd_convolve_x_sr_intrabc_c(input, width, reference, kOutputStride,
691                                          width, height, filter_params_x, 0,
692                                          &conv_params1, bit_depth);
693     }
694     aom_usec_timer_mark(&timer);
695     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
696 
697     ConvolveParams conv_params2 =
698         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
699     highbd_convolve_x_func test_func = GetParam().TestFunction();
700     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
701     aom_usec_timer_start(&timer);
702     for (int i = 0; i < kNumIters; ++i) {
703       test_func(input, width, test, kOutputStride, width, height,
704                 filter_params_x, 0, &conv_params2, bit_depth);
705     }
706     aom_usec_timer_mark(&timer);
707     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
708 
709     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
710            time2, time1 / time2);
711   }
712 };
713 
TEST_P(AV1ConvolveXHighbdIntraBCTest,RunTest)714 TEST_P(AV1ConvolveXHighbdIntraBCTest, RunTest) { RunTest(); }
715 
TEST_P(AV1ConvolveXHighbdIntraBCTest,DISABLED_SpeedTest)716 TEST_P(AV1ConvolveXHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
717 
718 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXHighbdIntraBCTest,
719                          BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_c));
720 
721 #if HAVE_NEON
722 INSTANTIATE_TEST_SUITE_P(
723     NEON, AV1ConvolveXHighbdIntraBCTest,
724     BuildHighbdParams(av1_highbd_convolve_x_sr_intrabc_neon));
725 #endif
726 
727 #endif  // CONFIG_AV1_HIGHBITDEPTH
728 
729 ////////////////////////////////////////////////////////
730 // Single reference convolve-y functions (low bit-depth)
731 ////////////////////////////////////////////////////////
732 typedef void (*convolve_y_func)(const uint8_t *src, int src_stride,
733                                 uint8_t *dst, int dst_stride, int w, int h,
734                                 const InterpFilterParams *filter_params_y,
735                                 const int subpel_y_qn);
736 
737 class AV1ConvolveYTest : public AV1ConvolveTest<convolve_y_func> {
738  public:
RunTest()739   void RunTest() {
740     for (int sub_y = 0; sub_y < 16; ++sub_y) {
741       for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
742            ++filter) {
743         InterpFilter f = static_cast<InterpFilter>(filter);
744         TestConvolve(sub_y, f);
745       }
746     }
747   }
748 
749  public:
SpeedTest()750   void SpeedTest() {
751     for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
752          ++filter) {
753       InterpFilter f = static_cast<InterpFilter>(filter);
754       TestConvolveSpeed(f, 10000);
755     }
756   }
757 
758  private:
TestConvolve(const int sub_y,const InterpFilter filter)759   void TestConvolve(const int sub_y, const InterpFilter filter) {
760     const int width = GetParam().Block().Width();
761     const int height = GetParam().Block().Height();
762 
763     const InterpFilterParams *filter_params_y =
764         av1_get_interp_filter_params_with_block_size(filter, height);
765     const uint8_t *input = FirstRandomInput8(GetParam());
766     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
767     av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
768                         filter_params_y, sub_y);
769     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
770     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
771                               filter_params_y, sub_y);
772     AssertOutputBufferEq(reference, test, width, height);
773   }
774 
775  private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)776   void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
777     const int width = GetParam().Block().Width();
778     const int height = GetParam().Block().Height();
779 
780     const InterpFilterParams *filter_params_y =
781         av1_get_interp_filter_params_with_block_size(filter, height);
782     const uint8_t *input = FirstRandomInput8(GetParam());
783     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
784 
785     aom_usec_timer timer;
786     aom_usec_timer_start(&timer);
787     for (int i = 0; i < num_iters; ++i) {
788       av1_convolve_y_sr_c(input, width, reference, kOutputStride, width, height,
789                           filter_params_y, 0);
790     }
791     aom_usec_timer_mark(&timer);
792     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
793 
794     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
795 
796     aom_usec_timer_start(&timer);
797     for (int i = 0; i < num_iters; ++i) {
798       GetParam().TestFunction()(input, width, test, kOutputStride, width,
799                                 height, filter_params_y, 0);
800     }
801     aom_usec_timer_mark(&timer);
802     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
803     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
804            time2, time1 / time2);
805   }
806 };
807 
TEST_P(AV1ConvolveYTest,RunTest)808 TEST_P(AV1ConvolveYTest, RunTest) { RunTest(); }
809 
TEST_P(AV1ConvolveYTest,DISABLED_SpeedTest)810 TEST_P(AV1ConvolveYTest, DISABLED_SpeedTest) { SpeedTest(); }
811 
812 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYTest,
813                          BuildLowbdParams(av1_convolve_y_sr_c));
814 
815 #if HAVE_SSE2
816 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYTest,
817                          BuildLowbdParams(av1_convolve_y_sr_sse2));
818 #endif
819 
820 #if HAVE_AVX2
821 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYTest,
822                          BuildLowbdParams(av1_convolve_y_sr_avx2));
823 #endif
824 
825 #if HAVE_NEON
826 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYTest,
827                          BuildLowbdParams(av1_convolve_y_sr_neon));
828 #endif
829 
830 ////////////////////////////////////////////////////////////////
831 // Single reference convolve-y IntraBC functions (low bit-depth)
832 ////////////////////////////////////////////////////////////////
833 
834 class AV1ConvolveYIntraBCTest : public AV1ConvolveTest<convolve_y_func> {
835  public:
RunTest()836   void RunTest() {
837     // IntraBC functions only operate for subpel_y_qn = 8.
838     constexpr int kSubY = 8;
839     const int width = GetParam().Block().Width();
840     const int height = GetParam().Block().Height();
841     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
842     const uint8_t *input = FirstRandomInput8(GetParam());
843 
844     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
845     // Use a stride different from width to avoid potential storing errors that
846     // would go undetected. The input buffer is filled using a padding of 12, so
847     // the stride can be anywhere between width and width + 12.
848     av1_convolve_y_sr_intrabc_c(input, width + 2, reference, kOutputStride,
849                                 width, height, filter_params_y, kSubY);
850 
851     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
852     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
853                               height, filter_params_y, kSubY);
854 
855     AssertOutputBufferEq(reference, test, width, height);
856   }
857 
SpeedTest()858   void SpeedTest() {
859     constexpr int kNumIters = 10000;
860     const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
861     const int width = GetParam().Block().Width();
862     const int height = GetParam().Block().Height();
863 
864     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
865     const uint8_t *input = FirstRandomInput8(GetParam());
866     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
867 
868     aom_usec_timer timer;
869     aom_usec_timer_start(&timer);
870     for (int i = 0; i < kNumIters; ++i) {
871       av1_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride, width,
872                                   height, filter_params_y, 0);
873     }
874     aom_usec_timer_mark(&timer);
875     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
876 
877     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
878     convolve_y_func test_func = GetParam().TestFunction();
879     aom_usec_timer_start(&timer);
880     for (int i = 0; i < kNumIters; ++i) {
881       test_func(input, width, test, kOutputStride, width, height,
882                 filter_params_y, 0);
883     }
884     aom_usec_timer_mark(&timer);
885     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
886 
887     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
888            time2, time1 / time2);
889   }
890 };
891 
TEST_P(AV1ConvolveYIntraBCTest,RunTest)892 TEST_P(AV1ConvolveYIntraBCTest, RunTest) { RunTest(); }
893 
TEST_P(AV1ConvolveYIntraBCTest,DISABLED_SpeedTest)894 TEST_P(AV1ConvolveYIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
895 
896 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYIntraBCTest,
897                          BuildLowbdParams(av1_convolve_y_sr_intrabc_c));
898 
899 #if HAVE_NEON
900 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYIntraBCTest,
901                          BuildLowbdParams(av1_convolve_y_sr_intrabc_neon));
902 #endif
903 
904 #if CONFIG_AV1_HIGHBITDEPTH
905 /////////////////////////////////////////////////////////
906 // Single reference convolve-y functions (high bit-depth)
907 /////////////////////////////////////////////////////////
908 typedef void (*highbd_convolve_y_func)(
909     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
910     int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn,
911     int bd);
912 
913 class AV1ConvolveYHighbdTest : public AV1ConvolveTest<highbd_convolve_y_func> {
914  public:
RunTest()915   void RunTest() {
916     for (int sub_y = 0; sub_y < 16; ++sub_y) {
917       for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
918            ++filter) {
919         InterpFilter f = static_cast<InterpFilter>(filter);
920         TestConvolve(sub_y, f);
921       }
922     }
923   }
924 
925  public:
SpeedTest()926   void SpeedTest() {
927     for (int filter = EIGHTTAP_REGULAR; filter <= INTERP_FILTERS_ALL;
928          ++filter) {
929       InterpFilter f = static_cast<InterpFilter>(filter);
930       TestConvolveSpeed(f, 10000);
931     }
932   }
933 
934  private:
TestConvolve(const int sub_y,const InterpFilter filter)935   void TestConvolve(const int sub_y, const InterpFilter filter) {
936     const int width = GetParam().Block().Width();
937     const int height = GetParam().Block().Height();
938     const int bit_depth = GetParam().BitDepth();
939     const InterpFilterParams *filter_params_y =
940         av1_get_interp_filter_params_with_block_size(filter, height);
941     const uint16_t *input = FirstRandomInput16(GetParam());
942     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
943     av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
944                                height, filter_params_y, sub_y, bit_depth);
945     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
946     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
947                               filter_params_y, sub_y, bit_depth);
948     AssertOutputBufferEq(reference, test, width, height);
949   }
950 
951  private:
TestConvolveSpeed(const InterpFilter filter,const int num_iters)952   void TestConvolveSpeed(const InterpFilter filter, const int num_iters) {
953     const int width = GetParam().Block().Width();
954     const int height = GetParam().Block().Height();
955     const int bit_depth = GetParam().BitDepth();
956     const InterpFilterParams *filter_params_y =
957         av1_get_interp_filter_params_with_block_size(filter, width);
958     const uint16_t *input = FirstRandomInput16(GetParam());
959     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
960 
961     aom_usec_timer timer;
962     aom_usec_timer_start(&timer);
963     for (int i = 0; i < num_iters; ++i) {
964       av1_highbd_convolve_y_sr_c(input, width, reference, kOutputStride, width,
965                                  height, filter_params_y, 0, bit_depth);
966     }
967     aom_usec_timer_mark(&timer);
968     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
969     highbd_convolve_y_func test_func = GetParam().TestFunction();
970     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
971 
972     aom_usec_timer_start(&timer);
973     for (int i = 0; i < num_iters; ++i) {
974       test_func(input, width, test, kOutputStride, width, height,
975                 filter_params_y, 0, bit_depth);
976     }
977     aom_usec_timer_mark(&timer);
978     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
979     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
980            time2, time1 / time2);
981   }
982 };
983 
TEST_P(AV1ConvolveYHighbdTest,RunTest)984 TEST_P(AV1ConvolveYHighbdTest, RunTest) { RunTest(); }
985 
TEST_P(AV1ConvolveYHighbdTest,DISABLED_SpeedTest)986 TEST_P(AV1ConvolveYHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
987 
988 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdTest,
989                          BuildHighbdParams(av1_highbd_convolve_y_sr_c));
990 
991 #if HAVE_SSSE3
992 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1ConvolveYHighbdTest,
993                          BuildHighbdParams(av1_highbd_convolve_y_sr_ssse3));
994 #endif
995 
996 #if HAVE_AVX2
997 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYHighbdTest,
998                          BuildHighbdParams(av1_highbd_convolve_y_sr_avx2));
999 #endif
1000 
1001 #if HAVE_NEON
1002 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYHighbdTest,
1003                          BuildHighbdParams(av1_highbd_convolve_y_sr_neon));
1004 #endif
1005 
1006 #if HAVE_SVE2
1007 INSTANTIATE_TEST_SUITE_P(SVE2, AV1ConvolveYHighbdTest,
1008                          BuildHighbdParams(av1_highbd_convolve_y_sr_sve2));
1009 #endif
1010 
1011 /////////////////////////////////////////////////////////////////
1012 // Single reference convolve-y IntraBC functions (high bit-depth)
1013 /////////////////////////////////////////////////////////////////
1014 
1015 class AV1ConvolveYHighbdIntraBCTest
1016     : public AV1ConvolveTest<highbd_convolve_y_func> {
1017  public:
RunTest()1018   void RunTest() {
1019     // IntraBC functions only operate for subpel_y_qn = 8.
1020     constexpr int kSubY = 8;
1021     const int width = GetParam().Block().Width();
1022     const int height = GetParam().Block().Height();
1023     const int bit_depth = GetParam().BitDepth();
1024     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1025     const uint16_t *input = FirstRandomInput16(GetParam());
1026 
1027     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1028     // Use a stride different from width to avoid potential storing errors that
1029     // would go undetected. The input buffer is filled using a padding of 12, so
1030     // the stride can be anywhere between width and width + 12.
1031     av1_highbd_convolve_y_sr_intrabc_c(input, width + 2, reference,
1032                                        kOutputStride, width, height,
1033                                        filter_params_y, kSubY, bit_depth);
1034 
1035     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1036     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
1037                               height, filter_params_y, kSubY, bit_depth);
1038 
1039     AssertOutputBufferEq(reference, test, width, height);
1040   }
1041 
SpeedTest()1042   void SpeedTest() {
1043     constexpr int kNumIters = 10000;
1044     const InterpFilter filter = static_cast<InterpFilter>(BILINEAR);
1045     const int width = GetParam().Block().Width();
1046     const int height = GetParam().Block().Height();
1047     const int bit_depth = GetParam().BitDepth();
1048     const InterpFilterParams *filter_params_y =
1049         av1_get_interp_filter_params_with_block_size(filter, width);
1050     const uint16_t *input = FirstRandomInput16(GetParam());
1051 
1052     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1053     aom_usec_timer timer;
1054     aom_usec_timer_start(&timer);
1055     for (int i = 0; i < kNumIters; ++i) {
1056       av1_highbd_convolve_y_sr_intrabc_c(input, width, reference, kOutputStride,
1057                                          width, height, filter_params_y, 0,
1058                                          bit_depth);
1059     }
1060     aom_usec_timer_mark(&timer);
1061     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1062 
1063     highbd_convolve_y_func test_func = GetParam().TestFunction();
1064     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1065     aom_usec_timer_start(&timer);
1066     for (int i = 0; i < kNumIters; ++i) {
1067       test_func(input, width, test, kOutputStride, width, height,
1068                 filter_params_y, 0, bit_depth);
1069     }
1070     aom_usec_timer_mark(&timer);
1071     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1072 
1073     printf("%d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", filter, width, height, time1,
1074            time2, time1 / time2);
1075   }
1076 };
1077 
TEST_P(AV1ConvolveYHighbdIntraBCTest,RunTest)1078 TEST_P(AV1ConvolveYHighbdIntraBCTest, RunTest) { RunTest(); }
1079 
TEST_P(AV1ConvolveYHighbdIntraBCTest,DISABLED_SpeedTest)1080 TEST_P(AV1ConvolveYHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
1081 
1082 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYHighbdIntraBCTest,
1083                          BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_c));
1084 
1085 #if HAVE_NEON
1086 INSTANTIATE_TEST_SUITE_P(
1087     NEON, AV1ConvolveYHighbdIntraBCTest,
1088     BuildHighbdParams(av1_highbd_convolve_y_sr_intrabc_neon));
1089 #endif
1090 
1091 #endif  // CONFIG_AV1_HIGHBITDEPTH
1092 
1093 //////////////////////////////////////////////////////////////
1094 // Single reference convolve-copy functions (low bit-depth)
1095 //////////////////////////////////////////////////////////////
1096 typedef void (*convolve_copy_func)(const uint8_t *src, ptrdiff_t src_stride,
1097                                    uint8_t *dst, ptrdiff_t dst_stride, int w,
1098                                    int h);
1099 
1100 class AV1ConvolveCopyTest : public AV1ConvolveTest<convolve_copy_func> {
1101  public:
RunTest()1102   void RunTest() {
1103     const int width = GetParam().Block().Width();
1104     const int height = GetParam().Block().Height();
1105     const uint8_t *input = FirstRandomInput8(GetParam());
1106     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1107     aom_convolve_copy_c(input, width, reference, kOutputStride, width, height);
1108     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1109     GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
1110     AssertOutputBufferEq(reference, test, width, height);
1111   }
1112 };
1113 
1114 // Note that even though these are AOM convolve functions, we are using the
1115 // newer AV1 test framework.
TEST_P(AV1ConvolveCopyTest,RunTest)1116 TEST_P(AV1ConvolveCopyTest, RunTest) { RunTest(); }
1117 
1118 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyTest,
1119                          BuildLowbdParams(aom_convolve_copy_c));
1120 
1121 #if HAVE_SSE2
1122 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyTest,
1123                          BuildLowbdParams(aom_convolve_copy_sse2));
1124 #endif
1125 
1126 #if HAVE_AVX2
1127 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyTest,
1128                          BuildLowbdParams(aom_convolve_copy_avx2));
1129 #endif
1130 
1131 #if HAVE_NEON
1132 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyTest,
1133                          BuildLowbdParams(aom_convolve_copy_neon));
1134 #endif
1135 
1136 #if CONFIG_AV1_HIGHBITDEPTH
1137 ///////////////////////////////////////////////////////////////
1138 // Single reference convolve-copy functions (high bit-depth)
1139 ///////////////////////////////////////////////////////////////
1140 typedef void (*highbd_convolve_copy_func)(const uint16_t *src,
1141                                           ptrdiff_t src_stride, uint16_t *dst,
1142                                           ptrdiff_t dst_stride, int w, int h);
1143 
1144 class AV1ConvolveCopyHighbdTest
1145     : public AV1ConvolveTest<highbd_convolve_copy_func> {
1146  public:
RunTest()1147   void RunTest() {
1148     const BlockSize &block = GetParam().Block();
1149     const int width = block.Width();
1150     const int height = block.Height();
1151     const uint16_t *input = FirstRandomInput16(GetParam());
1152     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1153     aom_highbd_convolve_copy_c(input, width, reference, kOutputStride, width,
1154                                height);
1155     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1156     GetParam().TestFunction()(input, width, test, kOutputStride, width, height);
1157     AssertOutputBufferEq(reference, test, width, height);
1158   }
1159 };
1160 
TEST_P(AV1ConvolveCopyHighbdTest,RunTest)1161 TEST_P(AV1ConvolveCopyHighbdTest, RunTest) { RunTest(); }
1162 
1163 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveCopyHighbdTest,
1164                          BuildHighbdParams(aom_highbd_convolve_copy_c));
1165 
1166 #if HAVE_SSE2
1167 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveCopyHighbdTest,
1168                          BuildHighbdParams(aom_highbd_convolve_copy_sse2));
1169 #endif
1170 
1171 #if HAVE_AVX2
1172 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveCopyHighbdTest,
1173                          BuildHighbdParams(aom_highbd_convolve_copy_avx2));
1174 #endif
1175 
1176 #if HAVE_NEON
1177 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveCopyHighbdTest,
1178                          BuildHighbdParams(aom_highbd_convolve_copy_neon));
1179 #endif
1180 
1181 #endif  // CONFIG_AV1_HIGHBITDEPTH
1182 
1183 /////////////////////////////////////////////////////////
1184 // Single reference convolve-2D functions (low bit-depth)
1185 /////////////////////////////////////////////////////////
1186 typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
1187                                  uint8_t *dst, int dst_stride, int w, int h,
1188                                  const InterpFilterParams *filter_params_x,
1189                                  const InterpFilterParams *filter_params_y,
1190                                  const int subpel_x_qn, const int subpel_y_qn,
1191                                  ConvolveParams *conv_params);
1192 
1193 class AV1Convolve2DTest : public AV1ConvolveTest<convolve_2d_func> {
1194  public:
RunTest()1195   void RunTest() {
1196     for (int sub_x = 0; sub_x < 16; ++sub_x) {
1197       for (int sub_y = 0; sub_y < 16; ++sub_y) {
1198         for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1199           for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1200             if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1201                 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1202               continue;
1203             TestConvolve(static_cast<InterpFilter>(h_f),
1204                          static_cast<InterpFilter>(v_f), sub_x, sub_y);
1205           }
1206         }
1207       }
1208     }
1209   }
1210 
1211  public:
SpeedTest()1212   void SpeedTest() {
1213     for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1214       for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1215         if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1216             ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1217           continue;
1218         TestConvolveSpeed(static_cast<InterpFilter>(h_f),
1219                           static_cast<InterpFilter>(v_f), 10000);
1220       }
1221     }
1222   }
1223 
1224  private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)1225   void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
1226                     const int sub_x, const int sub_y) {
1227     const int width = GetParam().Block().Width();
1228     const int height = GetParam().Block().Height();
1229     const InterpFilterParams *filter_params_x =
1230         av1_get_interp_filter_params_with_block_size(h_f, width);
1231     const InterpFilterParams *filter_params_y =
1232         av1_get_interp_filter_params_with_block_size(v_f, height);
1233     const uint8_t *input = FirstRandomInput8(GetParam());
1234     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1235     ConvolveParams conv_params1 =
1236         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1237     av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width, height,
1238                          filter_params_x, filter_params_y, sub_x, sub_y,
1239                          &conv_params1);
1240     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1241     ConvolveParams conv_params2 =
1242         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1243     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
1244                               filter_params_x, filter_params_y, sub_x, sub_y,
1245                               &conv_params2);
1246     AssertOutputBufferEq(reference, test, width, height);
1247   }
1248 
1249  private:
TestConvolveSpeed(const InterpFilter h_f,const InterpFilter v_f,int num_iters)1250   void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
1251                          int num_iters) {
1252     const int width = GetParam().Block().Width();
1253     const int height = GetParam().Block().Height();
1254     const InterpFilterParams *filter_params_x =
1255         av1_get_interp_filter_params_with_block_size(h_f, width);
1256     const InterpFilterParams *filter_params_y =
1257         av1_get_interp_filter_params_with_block_size(v_f, height);
1258     const uint8_t *input = FirstRandomInput8(GetParam());
1259     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1260     ConvolveParams conv_params1 =
1261         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1262     aom_usec_timer timer;
1263     aom_usec_timer_start(&timer);
1264     for (int i = 0; i < num_iters; ++i) {
1265       av1_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
1266                            height, filter_params_x, filter_params_y, 0, 0,
1267                            &conv_params1);
1268     }
1269     aom_usec_timer_mark(&timer);
1270     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1271     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1272     ConvolveParams conv_params2 =
1273         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1274     aom_usec_timer_start(&timer);
1275     for (int i = 0; i < num_iters; ++i) {
1276       GetParam().TestFunction()(input, width, test, kOutputStride, width,
1277                                 height, filter_params_x, filter_params_y, 0, 0,
1278                                 &conv_params2);
1279     }
1280     aom_usec_timer_mark(&timer);
1281     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1282     printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1283            time1, time2, time1 / time2);
1284   }
1285 };
1286 
TEST_P(AV1Convolve2DTest,RunTest)1287 TEST_P(AV1Convolve2DTest, RunTest) { RunTest(); }
1288 
TEST_P(AV1Convolve2DTest,DISABLED_SpeedTest)1289 TEST_P(AV1Convolve2DTest, DISABLED_SpeedTest) { SpeedTest(); }
1290 
1291 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DTest,
1292                          BuildLowbdParams(av1_convolve_2d_sr_c));
1293 
1294 #if HAVE_SSE2
1295 INSTANTIATE_TEST_SUITE_P(SSE2, AV1Convolve2DTest,
1296                          BuildLowbdParams(av1_convolve_2d_sr_sse2));
1297 #endif
1298 
1299 #if HAVE_AVX2
1300 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DTest,
1301                          BuildLowbdParams(av1_convolve_2d_sr_avx2));
1302 #endif
1303 
1304 #if HAVE_NEON
1305 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DTest,
1306                          BuildLowbdParams(av1_convolve_2d_sr_neon));
1307 #endif
1308 
1309 #if HAVE_NEON_DOTPROD
1310 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AV1Convolve2DTest,
1311                          BuildLowbdParams(av1_convolve_2d_sr_neon_dotprod));
1312 #endif
1313 
1314 #if HAVE_NEON_I8MM
1315 INSTANTIATE_TEST_SUITE_P(NEON_I8MM, AV1Convolve2DTest,
1316                          BuildLowbdParams(av1_convolve_2d_sr_neon_i8mm));
1317 #endif
1318 
1319 /////////////////////////////////////////////////////////////////
1320 // Single reference convolve-2D IntraBC functions (low bit-depth)
1321 /////////////////////////////////////////////////////////////////
1322 
1323 class AV1Convolve2DIntraBCTest : public AV1ConvolveTest<convolve_2d_func> {
1324  public:
RunTest()1325   void RunTest() {
1326     // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
1327     constexpr int kSubX = 8;
1328     constexpr int kSubY = 8;
1329     const int width = GetParam().Block().Width();
1330     const int height = GetParam().Block().Height();
1331     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
1332     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1333     const uint8_t *input = FirstRandomInput8(GetParam());
1334 
1335     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1336     ConvolveParams conv_params1 =
1337         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1338     // Use a stride different from width to avoid potential storing errors that
1339     // would go undetected. The input buffer is filled using a padding of 12, so
1340     // the stride can be anywhere between width and width + 12.
1341     av1_convolve_2d_sr_intrabc_c(input, width + 2, reference, kOutputStride,
1342                                  width, height, filter_params_x,
1343                                  filter_params_y, kSubX, kSubY, &conv_params1);
1344 
1345     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1346     ConvolveParams conv_params2 =
1347         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1348     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
1349                               height, filter_params_x, filter_params_y, kSubX,
1350                               kSubY, &conv_params2);
1351 
1352     AssertOutputBufferEq(reference, test, width, height);
1353   }
1354 
SpeedTest()1355   void SpeedTest() {
1356     constexpr int kNumIters = 10000;
1357     const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
1358     const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
1359     const int width = GetParam().Block().Width();
1360     const int height = GetParam().Block().Height();
1361     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
1362     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1363     const uint8_t *input = FirstRandomInput8(GetParam());
1364 
1365     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1366     ConvolveParams conv_params1 =
1367         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1368     aom_usec_timer timer;
1369     aom_usec_timer_start(&timer);
1370     for (int i = 0; i < kNumIters; ++i) {
1371       av1_convolve_2d_sr_intrabc_c(input, width, reference, kOutputStride,
1372                                    width, height, filter_params_x,
1373                                    filter_params_y, 8, 8, &conv_params1);
1374     }
1375     aom_usec_timer_mark(&timer);
1376     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1377 
1378     convolve_2d_func test_func = GetParam().TestFunction();
1379     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1380     ConvolveParams conv_params2 =
1381         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1382     aom_usec_timer_start(&timer);
1383     for (int i = 0; i < kNumIters; ++i) {
1384       test_func(input, width, test, kOutputStride, width, height,
1385                 filter_params_x, filter_params_y, 8, 8, &conv_params2);
1386     }
1387     aom_usec_timer_mark(&timer);
1388     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1389 
1390     printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1391            time1, time2, time1 / time2);
1392   }
1393 };
1394 
TEST_P(AV1Convolve2DIntraBCTest,RunTest)1395 TEST_P(AV1Convolve2DIntraBCTest, RunTest) { RunTest(); }
1396 
TEST_P(AV1Convolve2DIntraBCTest,DISABLED_SpeedTest)1397 TEST_P(AV1Convolve2DIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
1398 
1399 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DIntraBCTest,
1400                          BuildLowbdParams(av1_convolve_2d_sr_intrabc_c));
1401 
1402 #if HAVE_NEON
1403 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DIntraBCTest,
1404                          BuildLowbdParams(av1_convolve_2d_sr_intrabc_neon));
1405 #endif
1406 
1407 #if CONFIG_AV1_HIGHBITDEPTH
1408 //////////////////////////////////////////////////////////
1409 // Single reference convolve-2d functions (high bit-depth)
1410 //////////////////////////////////////////////////////////
1411 
1412 typedef void (*highbd_convolve_2d_func)(
1413     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
1414     int h, const InterpFilterParams *filter_params_x,
1415     const InterpFilterParams *filter_params_y, const int subpel_x_qn,
1416     const int subpel_y_qn, ConvolveParams *conv_params, int bd);
1417 
1418 class AV1Convolve2DHighbdTest
1419     : public AV1ConvolveTest<highbd_convolve_2d_func> {
1420  public:
RunTest()1421   void RunTest() {
1422     for (int sub_x = 0; sub_x < 16; ++sub_x) {
1423       for (int sub_y = 0; sub_y < 16; ++sub_y) {
1424         for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1425           for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1426             if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1427                 ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1428               continue;
1429             TestConvolve(static_cast<InterpFilter>(h_f),
1430                          static_cast<InterpFilter>(v_f), sub_x, sub_y);
1431           }
1432         }
1433       }
1434     }
1435   }
1436 
1437  public:
SpeedTest()1438   void SpeedTest() {
1439     for (int h_f = EIGHTTAP_REGULAR; h_f <= INTERP_FILTERS_ALL; ++h_f) {
1440       for (int v_f = EIGHTTAP_REGULAR; v_f <= INTERP_FILTERS_ALL; ++v_f) {
1441         if (((h_f == MULTITAP_SHARP2) && (v_f < MULTITAP_SHARP2)) ||
1442             ((h_f < MULTITAP_SHARP2) && (v_f == MULTITAP_SHARP2)))
1443           continue;
1444         TestConvolveSpeed(static_cast<InterpFilter>(h_f),
1445                           static_cast<InterpFilter>(v_f), 10000);
1446       }
1447     }
1448   }
1449 
1450  private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)1451   void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
1452                     const int sub_x, const int sub_y) {
1453     const int width = GetParam().Block().Width();
1454     const int height = GetParam().Block().Height();
1455     const int bit_depth = GetParam().BitDepth();
1456     const InterpFilterParams *filter_params_x =
1457         av1_get_interp_filter_params_with_block_size(h_f, width);
1458     const InterpFilterParams *filter_params_y =
1459         av1_get_interp_filter_params_with_block_size(v_f, height);
1460     const uint16_t *input = FirstRandomInput16(GetParam());
1461     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1462     ConvolveParams conv_params1 =
1463         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1464     av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
1465                                 height, filter_params_x, filter_params_y, sub_x,
1466                                 sub_y, &conv_params1, bit_depth);
1467     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1468     ConvolveParams conv_params2 =
1469         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1470     GetParam().TestFunction()(input, width, test, kOutputStride, width, height,
1471                               filter_params_x, filter_params_y, sub_x, sub_y,
1472                               &conv_params2, bit_depth);
1473     AssertOutputBufferEq(reference, test, width, height);
1474   }
1475 
TestConvolveSpeed(const InterpFilter h_f,const InterpFilter v_f,int num_iters)1476   void TestConvolveSpeed(const InterpFilter h_f, const InterpFilter v_f,
1477                          int num_iters) {
1478     const int width = GetParam().Block().Width();
1479     const int height = GetParam().Block().Height();
1480     const int bit_depth = GetParam().BitDepth();
1481     const InterpFilterParams *filter_params_x =
1482         av1_get_interp_filter_params_with_block_size(h_f, width);
1483     const InterpFilterParams *filter_params_y =
1484         av1_get_interp_filter_params_with_block_size(v_f, height);
1485     const uint16_t *input = FirstRandomInput16(GetParam());
1486     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1487     ConvolveParams conv_params1 =
1488         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1489     aom_usec_timer timer;
1490     aom_usec_timer_start(&timer);
1491     for (int i = 0; i < num_iters; ++i) {
1492       av1_highbd_convolve_2d_sr_c(input, width, reference, kOutputStride, width,
1493                                   height, filter_params_x, filter_params_y, 0,
1494                                   0, &conv_params1, bit_depth);
1495     }
1496     aom_usec_timer_mark(&timer);
1497     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1498     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1499     ConvolveParams conv_params2 =
1500         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1501     aom_usec_timer_start(&timer);
1502     for (int i = 0; i < num_iters; ++i) {
1503       GetParam().TestFunction()(input, width, test, kOutputStride, width,
1504                                 height, filter_params_x, filter_params_y, 0, 0,
1505                                 &conv_params2, bit_depth);
1506     }
1507     aom_usec_timer_mark(&timer);
1508     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1509     printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1510            time1, time2, time1 / time2);
1511   }
1512 };
1513 
TEST_P(AV1Convolve2DHighbdTest,RunTest)1514 TEST_P(AV1Convolve2DHighbdTest, RunTest) { RunTest(); }
1515 
TEST_P(AV1Convolve2DHighbdTest,DISABLED_SpeedTest)1516 TEST_P(AV1Convolve2DHighbdTest, DISABLED_SpeedTest) { SpeedTest(); }
1517 
1518 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DHighbdTest,
1519                          BuildHighbdParams(av1_highbd_convolve_2d_sr_c));
1520 
1521 #if HAVE_SSSE3
1522 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DHighbdTest,
1523                          BuildHighbdParams(av1_highbd_convolve_2d_sr_ssse3));
1524 #endif
1525 
1526 #if HAVE_AVX2
1527 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DHighbdTest,
1528                          BuildHighbdParams(av1_highbd_convolve_2d_sr_avx2));
1529 #endif
1530 
1531 #if HAVE_NEON
1532 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DHighbdTest,
1533                          BuildHighbdParams(av1_highbd_convolve_2d_sr_neon));
1534 #endif
1535 
1536 #if HAVE_SVE2
1537 INSTANTIATE_TEST_SUITE_P(SVE2, AV1Convolve2DHighbdTest,
1538                          BuildHighbdParams(av1_highbd_convolve_2d_sr_sve2));
1539 #endif
1540 
1541 //////////////////////////////////////////////////////////////////
1542 // Single reference convolve-2d IntraBC functions (high bit-depth)
1543 //////////////////////////////////////////////////////////////////
1544 
1545 class AV1Convolve2DHighbdIntraBCTest
1546     : public AV1ConvolveTest<highbd_convolve_2d_func> {
1547  public:
RunTest()1548   void RunTest() {
1549     // IntraBC functions only operate for subpel_x_qn = 8 and subpel_y_qn = 8.
1550     constexpr int kSubX = 8;
1551     constexpr int kSubY = 8;
1552     const int width = GetParam().Block().Width();
1553     const int height = GetParam().Block().Height();
1554     const int bit_depth = GetParam().BitDepth();
1555     const InterpFilterParams *filter_params_x = &av1_intrabc_filter_params;
1556     const InterpFilterParams *filter_params_y = &av1_intrabc_filter_params;
1557     const uint16_t *input = FirstRandomInput16(GetParam());
1558 
1559     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1560     ConvolveParams conv_params1 =
1561         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1562     // Use a stride different from width to avoid potential storing errors that
1563     // would go undetected. The input buffer is filled using a padding of 12, so
1564     // the stride can be anywhere between width and width + 12.
1565     av1_highbd_convolve_2d_sr_intrabc_c(input, width + 2, reference,
1566                                         kOutputStride, width, height,
1567                                         filter_params_x, filter_params_y, kSubX,
1568                                         kSubY, &conv_params1, bit_depth);
1569 
1570     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1571     ConvolveParams conv_params2 =
1572         get_conv_params_no_round(0, 0, nullptr, 0, 0, bit_depth);
1573     GetParam().TestFunction()(input, width + 2, test, kOutputStride, width,
1574                               height, filter_params_x, filter_params_y, kSubX,
1575                               kSubY, &conv_params2, bit_depth);
1576 
1577     AssertOutputBufferEq(reference, test, width, height);
1578   }
1579 
SpeedTest()1580   void SpeedTest() {
1581     constexpr int kNumIters = 10000;
1582     const InterpFilter h_f = static_cast<InterpFilter>(BILINEAR);
1583     const InterpFilter v_f = static_cast<InterpFilter>(BILINEAR);
1584     const int width = GetParam().Block().Width();
1585     const int height = GetParam().Block().Height();
1586     const int bit_depth = GetParam().BitDepth();
1587     const InterpFilterParams *filter_params_x =
1588         av1_get_interp_filter_params_with_block_size(h_f, width);
1589     const InterpFilterParams *filter_params_y =
1590         av1_get_interp_filter_params_with_block_size(v_f, height);
1591     const uint16_t *input = FirstRandomInput16(GetParam());
1592 
1593     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1594     ConvolveParams conv_params1 =
1595         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1596     aom_usec_timer timer;
1597     aom_usec_timer_start(&timer);
1598     for (int i = 0; i < kNumIters; ++i) {
1599       av1_highbd_convolve_2d_sr_intrabc_c(
1600           input, width, reference, kOutputStride, width, height,
1601           filter_params_x, filter_params_y, 0, 0, &conv_params1, bit_depth);
1602     }
1603     aom_usec_timer_mark(&timer);
1604     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1605 
1606     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1607     highbd_convolve_2d_func test_func = GetParam().TestFunction();
1608     ConvolveParams conv_params2 =
1609         get_conv_params_no_round(0, 0, nullptr, 0, 0, 8);
1610     aom_usec_timer_start(&timer);
1611     for (int i = 0; i < kNumIters; ++i) {
1612       test_func(input, width, test, kOutputStride, width, height,
1613                 filter_params_x, filter_params_y, 0, 0, &conv_params2,
1614                 bit_depth);
1615     }
1616     aom_usec_timer_mark(&timer);
1617     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
1618 
1619     printf("%d - %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n", h_f, v_f, width, height,
1620            time1, time2, time1 / time2);
1621   }
1622 };
1623 
TEST_P(AV1Convolve2DHighbdIntraBCTest,RunTest)1624 TEST_P(AV1Convolve2DHighbdIntraBCTest, RunTest) { RunTest(); }
1625 
TEST_P(AV1Convolve2DHighbdIntraBCTest,DISABLED_SpeedTest)1626 TEST_P(AV1Convolve2DHighbdIntraBCTest, DISABLED_SpeedTest) { SpeedTest(); }
1627 
1628 INSTANTIATE_TEST_SUITE_P(
1629     C, AV1Convolve2DHighbdIntraBCTest,
1630     BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_c));
1631 
1632 #if HAVE_NEON
1633 INSTANTIATE_TEST_SUITE_P(
1634     NEON, AV1Convolve2DHighbdIntraBCTest,
1635     BuildHighbdParams(av1_highbd_convolve_2d_sr_intrabc_neon));
1636 #endif
1637 
1638 #endif  // CONFIG_AV1_HIGHBITDEPTH
1639 
1640 //////////////////////////
1641 // Compound Convolve Tests
1642 //////////////////////////
1643 
1644 // The compound functions do not work for chroma block sizes. Provide
1645 // a function to generate test parameters for just luma block sizes.
1646 template <typename T>
GetLumaTestParams(std::initializer_list<int> bit_depths,T test_func)1647 std::vector<TestParam<T>> GetLumaTestParams(
1648     std::initializer_list<int> bit_depths, T test_func) {
1649   std::set<BlockSize> sizes;
1650   for (int b = BLOCK_4X4; b < BLOCK_SIZES_ALL; ++b) {
1651     const int w = block_size_wide[b];
1652     const int h = block_size_high[b];
1653     sizes.insert(BlockSize(w, h));
1654   }
1655   std::vector<TestParam<T>> result;
1656   for (int bit_depth : bit_depths) {
1657     for (const auto &block : sizes) {
1658       result.push_back(TestParam<T>(block, bit_depth, test_func));
1659     }
1660   }
1661   return result;
1662 }
1663 
1664 template <typename T>
GetLowbdLumaTestParams(T test_func)1665 std::vector<TestParam<T>> GetLowbdLumaTestParams(T test_func) {
1666   return GetLumaTestParams({ 8 }, test_func);
1667 }
1668 
1669 template <typename T>
BuildLowbdLumaParams(T test_func)1670 ::testing::internal::ParamGenerator<TestParam<T>> BuildLowbdLumaParams(
1671     T test_func) {
1672   return ::testing::ValuesIn(GetLowbdLumaTestParams(test_func));
1673 }
1674 
TEST_F(AV1ConvolveParametersTest,GetLowbdLumaTestParams)1675 TEST_F(AV1ConvolveParametersTest, GetLowbdLumaTestParams) {
1676   auto v = GetLowbdLumaTestParams(av1_dist_wtd_convolve_x_c);
1677   ASSERT_EQ(22U, v.size());
1678   for (const auto &e : v) {
1679     ASSERT_EQ(8, e.BitDepth());
1680     bool same_fn = av1_dist_wtd_convolve_x_c == e.TestFunction();
1681     ASSERT_TRUE(same_fn);
1682   }
1683 }
1684 
1685 #if CONFIG_AV1_HIGHBITDEPTH
1686 template <typename T>
GetHighbdLumaTestParams(T test_func)1687 std::vector<TestParam<T>> GetHighbdLumaTestParams(T test_func) {
1688   return GetLumaTestParams({ 10, 12 }, test_func);
1689 }
1690 
TEST_F(AV1ConvolveParametersTest,GetHighbdLumaTestParams)1691 TEST_F(AV1ConvolveParametersTest, GetHighbdLumaTestParams) {
1692   auto v = GetHighbdLumaTestParams(av1_highbd_dist_wtd_convolve_x_c);
1693   ASSERT_EQ(44U, v.size());
1694   int num_10 = 0;
1695   int num_12 = 0;
1696   for (const auto &e : v) {
1697     ASSERT_TRUE(10 == e.BitDepth() || 12 == e.BitDepth());
1698     bool same_fn = av1_highbd_dist_wtd_convolve_x_c == e.TestFunction();
1699     ASSERT_TRUE(same_fn);
1700     if (e.BitDepth() == 10) {
1701       ++num_10;
1702     } else {
1703       ++num_12;
1704     }
1705   }
1706   ASSERT_EQ(num_10, num_12);
1707 }
1708 
1709 template <typename T>
BuildHighbdLumaParams(T test_func)1710 ::testing::internal::ParamGenerator<TestParam<T>> BuildHighbdLumaParams(
1711     T test_func) {
1712   return ::testing::ValuesIn(GetHighbdLumaTestParams(test_func));
1713 }
1714 
1715 #endif  // CONFIG_AV1_HIGHBITDEPTH
1716 
1717 // Compound cases also need to test different frame offsets and weightings.
1718 class CompoundParam {
1719  public:
CompoundParam(bool use_dist_wtd_comp_avg,int fwd_offset,int bck_offset)1720   CompoundParam(bool use_dist_wtd_comp_avg, int fwd_offset, int bck_offset)
1721       : use_dist_wtd_comp_avg_(use_dist_wtd_comp_avg), fwd_offset_(fwd_offset),
1722         bck_offset_(bck_offset) {}
1723 
UseDistWtdCompAvg() const1724   bool UseDistWtdCompAvg() const { return use_dist_wtd_comp_avg_; }
FwdOffset() const1725   int FwdOffset() const { return fwd_offset_; }
BckOffset() const1726   int BckOffset() const { return bck_offset_; }
1727 
1728  private:
1729   bool use_dist_wtd_comp_avg_;
1730   int fwd_offset_;
1731   int bck_offset_;
1732 };
1733 
GetCompoundParams()1734 std::vector<CompoundParam> GetCompoundParams() {
1735   std::vector<CompoundParam> result;
1736   result.push_back(CompoundParam(false, 0, 0));
1737   for (int k = 0; k < 2; ++k) {
1738     for (int l = 0; l < 4; ++l) {
1739       result.push_back(CompoundParam(true, quant_dist_lookup_table[l][k],
1740                                      quant_dist_lookup_table[l][1 - k]));
1741     }
1742   }
1743   return result;
1744 }
1745 
TEST_F(AV1ConvolveParametersTest,GetCompoundParams)1746 TEST_F(AV1ConvolveParametersTest, GetCompoundParams) {
1747   auto v = GetCompoundParams();
1748   ASSERT_EQ(9U, v.size());
1749   ASSERT_FALSE(v[0].UseDistWtdCompAvg());
1750   for (size_t i = 1; i < v.size(); ++i) {
1751     ASSERT_TRUE(v[i].UseDistWtdCompAvg());
1752   }
1753 }
1754 
1755 ////////////////////////////////////////////////
1756 // Compound convolve-x functions (low bit-depth)
1757 ////////////////////////////////////////////////
1758 
GetConvolveParams(int do_average,CONV_BUF_TYPE * conv_buf,int width,int bit_depth,const CompoundParam & compound)1759 ConvolveParams GetConvolveParams(int do_average, CONV_BUF_TYPE *conv_buf,
1760                                  int width, int bit_depth,
1761                                  const CompoundParam &compound) {
1762   ConvolveParams conv_params =
1763       get_conv_params_no_round(do_average, 0, conv_buf, width, 1, bit_depth);
1764   conv_params.use_dist_wtd_comp_avg = compound.UseDistWtdCompAvg();
1765   conv_params.fwd_offset = compound.FwdOffset();
1766   conv_params.bck_offset = compound.BckOffset();
1767   return conv_params;
1768 }
1769 
1770 class AV1ConvolveXCompoundTest : public AV1ConvolveTest<convolve_x_func> {
1771  public:
RunTest()1772   void RunTest() {
1773     auto compound_params = GetCompoundParams();
1774     for (int sub_pix = 0; sub_pix < 16; ++sub_pix) {
1775       for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
1776         for (const auto &c : compound_params) {
1777           TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
1778         }
1779       }
1780     }
1781   }
1782 
1783  protected:
FilterParams(InterpFilter f,const BlockSize & block) const1784   virtual const InterpFilterParams *FilterParams(InterpFilter f,
1785                                                  const BlockSize &block) const {
1786     return av1_get_interp_filter_params_with_block_size(f, block.Width());
1787   }
1788 
ReferenceFunc() const1789   virtual convolve_x_func ReferenceFunc() const {
1790     return av1_dist_wtd_convolve_x_c;
1791   }
1792 
1793  private:
TestConvolve(const int sub_pix,const InterpFilter filter,const CompoundParam & compound)1794   void TestConvolve(const int sub_pix, const InterpFilter filter,
1795                     const CompoundParam &compound) {
1796     const int width = GetParam().Block().Width();
1797     const int height = GetParam().Block().Height();
1798     const uint8_t *input1 = FirstRandomInput8(GetParam());
1799     const uint8_t *input2 = SecondRandomInput8(GetParam());
1800     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
1801     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
1802     Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
1803              compound, sub_pix, filter);
1804 
1805     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
1806     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
1807     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
1808              compound, sub_pix, filter);
1809 
1810     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
1811     AssertOutputBufferEq(reference, test, width, height);
1812   }
1813 
1814  private:
Convolve(convolve_x_func test_func,const uint8_t * src1,const uint8_t * src2,uint8_t * dst,CONV_BUF_TYPE * conv_buf,const CompoundParam & compound,const int sub_pix,const InterpFilter filter)1815   void Convolve(convolve_x_func test_func, const uint8_t *src1,
1816                 const uint8_t *src2, uint8_t *dst, CONV_BUF_TYPE *conv_buf,
1817                 const CompoundParam &compound, const int sub_pix,
1818                 const InterpFilter filter) {
1819     const int width = GetParam().Block().Width();
1820     const int height = GetParam().Block().Height();
1821     const InterpFilterParams *filter_params =
1822         FilterParams(filter, GetParam().Block());
1823 
1824     ConvolveParams conv_params =
1825         GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
1826     test_func(src1, width, dst, kOutputStride, width, height, filter_params,
1827               sub_pix, &conv_params);
1828 
1829     conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
1830     test_func(src2, width, dst, kOutputStride, width, height, filter_params,
1831               sub_pix, &conv_params);
1832   }
1833 };
1834 
TEST_P(AV1ConvolveXCompoundTest,RunTest)1835 TEST_P(AV1ConvolveXCompoundTest, RunTest) { RunTest(); }
1836 
1837 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveXCompoundTest,
1838                          BuildLowbdLumaParams(av1_dist_wtd_convolve_x_c));
1839 
1840 #if HAVE_SSE2
1841 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveXCompoundTest,
1842                          BuildLowbdLumaParams(av1_dist_wtd_convolve_x_sse2));
1843 #endif
1844 
1845 #if HAVE_AVX2
1846 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveXCompoundTest,
1847                          BuildLowbdLumaParams(av1_dist_wtd_convolve_x_avx2));
1848 #endif
1849 
1850 #if HAVE_NEON
1851 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveXCompoundTest,
1852                          BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon));
1853 #endif
1854 
1855 #if HAVE_NEON_DOTPROD
1856 INSTANTIATE_TEST_SUITE_P(
1857     NEON_DOTPROD, AV1ConvolveXCompoundTest,
1858     BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_dotprod));
1859 #endif
1860 
1861 #if HAVE_NEON_I8MM
1862 INSTANTIATE_TEST_SUITE_P(
1863     NEON_I8MM, AV1ConvolveXCompoundTest,
1864     BuildLowbdLumaParams(av1_dist_wtd_convolve_x_neon_i8mm));
1865 #endif
1866 
1867 #if CONFIG_AV1_HIGHBITDEPTH
1868 /////////////////////////////////////////////////
1869 // Compound convolve-x functions (high bit-depth)
1870 /////////////////////////////////////////////////
1871 class AV1ConvolveXHighbdCompoundTest
1872     : public AV1ConvolveTest<highbd_convolve_x_func> {
1873  public:
RunTest()1874   void RunTest() {
1875     auto compound_params = GetCompoundParams();
1876     for (int sub_pix = 0; sub_pix < 16; ++sub_pix) {
1877       for (int f = EIGHTTAP_REGULAR; f < INTERP_FILTERS_ALL; ++f) {
1878         for (const auto &c : compound_params) {
1879           TestConvolve(sub_pix, static_cast<InterpFilter>(f), c);
1880         }
1881       }
1882     }
1883   }
1884 
1885  protected:
FilterParams(InterpFilter f,const BlockSize & block) const1886   virtual const InterpFilterParams *FilterParams(InterpFilter f,
1887                                                  const BlockSize &block) const {
1888     return av1_get_interp_filter_params_with_block_size(f, block.Width());
1889   }
1890 
ReferenceFunc() const1891   virtual highbd_convolve_x_func ReferenceFunc() const {
1892     return av1_highbd_dist_wtd_convolve_x_c;
1893   }
1894 
1895  private:
TestConvolve(const int sub_pix,const InterpFilter filter,const CompoundParam & compound)1896   void TestConvolve(const int sub_pix, const InterpFilter filter,
1897                     const CompoundParam &compound) {
1898     const int width = GetParam().Block().Width();
1899     const int height = GetParam().Block().Height();
1900 
1901     const uint16_t *input1 = FirstRandomInput16(GetParam());
1902     const uint16_t *input2 = SecondRandomInput16(GetParam());
1903     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
1904     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
1905     Convolve(ReferenceFunc(), input1, input2, reference, reference_conv_buf,
1906              compound, sub_pix, filter);
1907 
1908     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
1909     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
1910     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
1911              compound, sub_pix, filter);
1912 
1913     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
1914     AssertOutputBufferEq(reference, test, width, height);
1915   }
1916 
Convolve(highbd_convolve_x_func test_func,const uint16_t * src1,const uint16_t * src2,uint16_t * dst,CONV_BUF_TYPE * conv_buf,const CompoundParam & compound,const int sub_pix,const InterpFilter filter)1917   void Convolve(highbd_convolve_x_func test_func, const uint16_t *src1,
1918                 const uint16_t *src2, uint16_t *dst, CONV_BUF_TYPE *conv_buf,
1919                 const CompoundParam &compound, const int sub_pix,
1920                 const InterpFilter filter) {
1921     const int width = GetParam().Block().Width();
1922     const int height = GetParam().Block().Height();
1923     const int bit_depth = GetParam().BitDepth();
1924     const InterpFilterParams *filter_params =
1925         FilterParams(filter, GetParam().Block());
1926     ConvolveParams conv_params =
1927         GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
1928     test_func(src1, width, dst, kOutputStride, width, height, filter_params,
1929               sub_pix, &conv_params, bit_depth);
1930     conv_params =
1931         GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
1932     test_func(src2, width, dst, kOutputStride, width, height, filter_params,
1933               sub_pix, &conv_params, bit_depth);
1934   }
1935 };
1936 
TEST_P(AV1ConvolveXHighbdCompoundTest,RunTest)1937 TEST_P(AV1ConvolveXHighbdCompoundTest, RunTest) { RunTest(); }
1938 
1939 INSTANTIATE_TEST_SUITE_P(
1940     C, AV1ConvolveXHighbdCompoundTest,
1941     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_c));
1942 
1943 #if HAVE_SSE4_1
1944 INSTANTIATE_TEST_SUITE_P(
1945     SSE4_1, AV1ConvolveXHighbdCompoundTest,
1946     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sse4_1));
1947 #endif
1948 
1949 #if HAVE_AVX2
1950 INSTANTIATE_TEST_SUITE_P(
1951     AVX2, AV1ConvolveXHighbdCompoundTest,
1952     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_avx2));
1953 #endif
1954 
1955 #if HAVE_NEON
1956 INSTANTIATE_TEST_SUITE_P(
1957     NEON, AV1ConvolveXHighbdCompoundTest,
1958     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_neon));
1959 #endif
1960 
1961 #if HAVE_SVE2
1962 INSTANTIATE_TEST_SUITE_P(
1963     SVE2, AV1ConvolveXHighbdCompoundTest,
1964     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_x_sve2));
1965 #endif
1966 
1967 #endif  // CONFIG_AV1_HIGHBITDEPTH
1968 
1969 ////////////////////////////////////////////////
1970 // Compound convolve-y functions (low bit-depth)
1971 ////////////////////////////////////////////////
1972 
1973 // Note that the X and Y convolve functions have the same type signature and
1974 // logic; they only differentiate the filter parameters and reference function.
1975 class AV1ConvolveYCompoundTest : public AV1ConvolveXCompoundTest {
1976  protected:
FilterParams(InterpFilter f,const BlockSize & block) const1977   const InterpFilterParams *FilterParams(
1978       InterpFilter f, const BlockSize &block) const override {
1979     return av1_get_interp_filter_params_with_block_size(f, block.Height());
1980   }
1981 
ReferenceFunc() const1982   convolve_x_func ReferenceFunc() const override {
1983     return av1_dist_wtd_convolve_y_c;
1984   }
1985 };
1986 
TEST_P(AV1ConvolveYCompoundTest,RunTest)1987 TEST_P(AV1ConvolveYCompoundTest, RunTest) { RunTest(); }
1988 
1989 INSTANTIATE_TEST_SUITE_P(C, AV1ConvolveYCompoundTest,
1990                          BuildLowbdLumaParams(av1_dist_wtd_convolve_y_c));
1991 
1992 #if HAVE_SSE2
1993 INSTANTIATE_TEST_SUITE_P(SSE2, AV1ConvolveYCompoundTest,
1994                          BuildLowbdLumaParams(av1_dist_wtd_convolve_y_sse2));
1995 #endif
1996 
1997 #if HAVE_AVX2
1998 INSTANTIATE_TEST_SUITE_P(AVX2, AV1ConvolveYCompoundTest,
1999                          BuildLowbdLumaParams(av1_dist_wtd_convolve_y_avx2));
2000 #endif
2001 
2002 #if HAVE_NEON
2003 INSTANTIATE_TEST_SUITE_P(NEON, AV1ConvolveYCompoundTest,
2004                          BuildLowbdLumaParams(av1_dist_wtd_convolve_y_neon));
2005 #endif
2006 
2007 #if CONFIG_AV1_HIGHBITDEPTH
2008 /////////////////////////////////////////////////
2009 // Compound convolve-y functions (high bit-depth)
2010 /////////////////////////////////////////////////
2011 
2012 // Again, the X and Y convolve functions have the same type signature and logic.
2013 class AV1ConvolveYHighbdCompoundTest : public AV1ConvolveXHighbdCompoundTest {
ReferenceFunc() const2014   highbd_convolve_x_func ReferenceFunc() const override {
2015     return av1_highbd_dist_wtd_convolve_y_c;
2016   }
FilterParams(InterpFilter f,const BlockSize & block) const2017   const InterpFilterParams *FilterParams(
2018       InterpFilter f, const BlockSize &block) const override {
2019     return av1_get_interp_filter_params_with_block_size(f, block.Height());
2020   }
2021 };
2022 
TEST_P(AV1ConvolveYHighbdCompoundTest,RunTest)2023 TEST_P(AV1ConvolveYHighbdCompoundTest, RunTest) { RunTest(); }
2024 
2025 INSTANTIATE_TEST_SUITE_P(
2026     C, AV1ConvolveYHighbdCompoundTest,
2027     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_c));
2028 
2029 #if HAVE_SSE4_1
2030 INSTANTIATE_TEST_SUITE_P(
2031     SSE4_1, AV1ConvolveYHighbdCompoundTest,
2032     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_sse4_1));
2033 #endif
2034 
2035 #if HAVE_AVX2
2036 INSTANTIATE_TEST_SUITE_P(
2037     AVX2, AV1ConvolveYHighbdCompoundTest,
2038     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_avx2));
2039 #endif
2040 
2041 #if HAVE_NEON
2042 INSTANTIATE_TEST_SUITE_P(
2043     NEON, AV1ConvolveYHighbdCompoundTest,
2044     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_y_neon));
2045 #endif
2046 
2047 #endif  // CONFIG_AV1_HIGHBITDEPTH
2048 
2049 //////////////////////////////////////////////////////
2050 // Compound convolve-2d-copy functions (low bit-depth)
2051 //////////////////////////////////////////////////////
2052 typedef void (*compound_conv_2d_copy_func)(const uint8_t *src, int src_stride,
2053                                            uint8_t *dst, int dst_stride, int w,
2054                                            int h, ConvolveParams *conv_params);
2055 
2056 class AV1Convolve2DCopyCompoundTest
2057     : public AV1ConvolveTest<compound_conv_2d_copy_func> {
2058  public:
RunTest()2059   void RunTest() {
2060     auto compound_params = GetCompoundParams();
2061     for (const auto &compound : compound_params) {
2062       TestConvolve(compound);
2063     }
2064   }
SpeedTest()2065   void SpeedTest() {
2066     for (const auto &compound : GetCompoundParams()) {
2067       TestConvolveSpeed(compound, 100000);
2068     }
2069   }
2070 
2071  private:
TestConvolve(const CompoundParam & compound)2072   void TestConvolve(const CompoundParam &compound) {
2073     const BlockSize &block = GetParam().Block();
2074     const int width = block.Width();
2075     const int height = block.Height();
2076 
2077     const uint8_t *input1 = FirstRandomInput8(GetParam());
2078     const uint8_t *input2 = SecondRandomInput8(GetParam());
2079     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
2080     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2081     Convolve(av1_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
2082              reference_conv_buf, compound);
2083 
2084     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
2085     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2086     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2087              compound);
2088 
2089     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2090     AssertOutputBufferEq(reference, test, width, height);
2091   }
2092 
TestConvolveSpeed(const CompoundParam & compound,const int num_iters)2093   void TestConvolveSpeed(const CompoundParam &compound, const int num_iters) {
2094     const int width = GetParam().Block().Width();
2095     const int height = GetParam().Block().Height();
2096 
2097     const uint8_t *src0 = FirstRandomInput8(GetParam());
2098     const uint8_t *src1 = SecondRandomInput8(GetParam());
2099     DECLARE_ALIGNED(32, uint8_t, dst[MAX_SB_SQUARE]);
2100     DECLARE_ALIGNED(32, CONV_BUF_TYPE, conv_buf[MAX_SB_SQUARE]);
2101 
2102     const auto test_func = GetParam().TestFunction();
2103 
2104     ConvolveParams conv_params_0 =
2105         GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
2106     ConvolveParams conv_params_1 =
2107         GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
2108 
2109     aom_usec_timer timer;
2110     aom_usec_timer_start(&timer);
2111     for (int i = 0; i < num_iters; ++i) {
2112       av1_dist_wtd_convolve_2d_copy_c(src0, width, dst, kOutputStride, width,
2113                                       height, &conv_params_0);
2114       av1_dist_wtd_convolve_2d_copy_c(src1, width, dst, kOutputStride, width,
2115                                       height, &conv_params_1);
2116     }
2117     aom_usec_timer_mark(&timer);
2118     const double time1 = static_cast<double>(aom_usec_timer_elapsed(&timer));
2119 
2120     aom_usec_timer_start(&timer);
2121     for (int i = 0; i < num_iters; ++i) {
2122       test_func(src0, width, dst, kOutputStride, width, height, &conv_params_0);
2123       test_func(src1, width, dst, kOutputStride, width, height, &conv_params_1);
2124     }
2125     aom_usec_timer_mark(&timer);
2126     const double time2 = static_cast<double>(aom_usec_timer_elapsed(&timer));
2127     printf("Dist Weighted: %d %3dx%-3d:%7.2f/%7.2fns (%3.2f)\n",
2128            compound.UseDistWtdCompAvg(), width, height, time1, time2,
2129            time1 / time2);
2130   }
2131 
Convolve(compound_conv_2d_copy_func test_func,const uint8_t * src1,const uint8_t * src2,uint8_t * dst,uint16_t * conv_buf,const CompoundParam & compound)2132   void Convolve(compound_conv_2d_copy_func test_func, const uint8_t *src1,
2133                 const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
2134                 const CompoundParam &compound) {
2135     const BlockSize &block = GetParam().Block();
2136     const int width = block.Width();
2137     const int height = block.Height();
2138     ConvolveParams conv_params =
2139         GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
2140     test_func(src1, width, dst, kOutputStride, width, height, &conv_params);
2141 
2142     conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
2143     test_func(src2, width, dst, kOutputStride, width, height, &conv_params);
2144   }
2145 };
2146 
TEST_P(AV1Convolve2DCopyCompoundTest,RunTest)2147 TEST_P(AV1Convolve2DCopyCompoundTest, RunTest) { RunTest(); }
TEST_P(AV1Convolve2DCopyCompoundTest,DISABLED_SpeedTest)2148 TEST_P(AV1Convolve2DCopyCompoundTest, DISABLED_SpeedTest) { SpeedTest(); }
2149 
2150 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCopyCompoundTest,
2151                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_c));
2152 
2153 #if HAVE_SSE2
2154 INSTANTIATE_TEST_SUITE_P(
2155     SSE2, AV1Convolve2DCopyCompoundTest,
2156     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_sse2));
2157 #endif
2158 
2159 #if HAVE_AVX2
2160 INSTANTIATE_TEST_SUITE_P(
2161     AVX2, AV1Convolve2DCopyCompoundTest,
2162     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_avx2));
2163 #endif
2164 
2165 #if HAVE_NEON
2166 INSTANTIATE_TEST_SUITE_P(
2167     NEON, AV1Convolve2DCopyCompoundTest,
2168     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_copy_neon));
2169 #endif
2170 
2171 #if CONFIG_AV1_HIGHBITDEPTH
2172 ///////////////////////////////////////////////////////
2173 // Compound convolve-2d-copy functions (high bit-depth)
2174 ///////////////////////////////////////////////////////
2175 typedef void (*highbd_compound_conv_2d_copy_func)(const uint16_t *src,
2176                                                   int src_stride, uint16_t *dst,
2177                                                   int dst_stride, int w, int h,
2178                                                   ConvolveParams *conv_params,
2179                                                   int bd);
2180 
2181 class AV1Convolve2DCopyHighbdCompoundTest
2182     : public AV1ConvolveTest<highbd_compound_conv_2d_copy_func> {
2183  public:
RunTest()2184   void RunTest() {
2185     auto compound_params = GetCompoundParams();
2186     for (const auto &compound : compound_params) {
2187       TestConvolve(compound);
2188     }
2189   }
2190 
2191  private:
TestConvolve(const CompoundParam & compound)2192   void TestConvolve(const CompoundParam &compound) {
2193     const BlockSize &block = GetParam().Block();
2194     const int width = block.Width();
2195     const int height = block.Height();
2196 
2197     const uint16_t *input1 = FirstRandomInput16(GetParam());
2198     const uint16_t *input2 = SecondRandomInput16(GetParam());
2199     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
2200     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2201     Convolve(av1_highbd_dist_wtd_convolve_2d_copy_c, input1, input2, reference,
2202              reference_conv_buf, compound);
2203 
2204     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
2205     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2206     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2207              compound);
2208 
2209     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2210     AssertOutputBufferEq(reference, test, width, height);
2211   }
2212 
Convolve(highbd_compound_conv_2d_copy_func test_func,const uint16_t * src1,const uint16_t * src2,uint16_t * dst,uint16_t * conv_buf,const CompoundParam & compound)2213   void Convolve(highbd_compound_conv_2d_copy_func test_func,
2214                 const uint16_t *src1, const uint16_t *src2, uint16_t *dst,
2215                 uint16_t *conv_buf, const CompoundParam &compound) {
2216     const BlockSize &block = GetParam().Block();
2217     const int width = block.Width();
2218     const int height = block.Height();
2219     const int bit_depth = GetParam().BitDepth();
2220 
2221     ConvolveParams conv_params =
2222         GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
2223     test_func(src1, width, dst, kOutputStride, width, height, &conv_params,
2224               bit_depth);
2225 
2226     conv_params =
2227         GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
2228     test_func(src2, width, dst, kOutputStride, width, height, &conv_params,
2229               bit_depth);
2230   }
2231 };
2232 
TEST_P(AV1Convolve2DCopyHighbdCompoundTest,RunTest)2233 TEST_P(AV1Convolve2DCopyHighbdCompoundTest, RunTest) { RunTest(); }
2234 
2235 INSTANTIATE_TEST_SUITE_P(
2236     C, AV1Convolve2DCopyHighbdCompoundTest,
2237     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_c));
2238 
2239 #if HAVE_SSE4_1
2240 INSTANTIATE_TEST_SUITE_P(
2241     SSE4_1, AV1Convolve2DCopyHighbdCompoundTest,
2242     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_sse4_1));
2243 #endif
2244 
2245 #if HAVE_AVX2
2246 INSTANTIATE_TEST_SUITE_P(
2247     AVX2, AV1Convolve2DCopyHighbdCompoundTest,
2248     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_avx2));
2249 #endif
2250 
2251 #if HAVE_NEON
2252 INSTANTIATE_TEST_SUITE_P(
2253     NEON, AV1Convolve2DCopyHighbdCompoundTest,
2254     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_copy_neon));
2255 #endif
2256 
2257 #endif  // CONFIG_AV1_HIGHBITDEPTH
2258 
2259 /////////////////////////////////////////////////
2260 // Compound convolve-2d functions (low bit-depth)
2261 /////////////////////////////////////////////////
2262 
2263 class AV1Convolve2DCompoundTest : public AV1ConvolveTest<convolve_2d_func> {
2264  public:
RunTest()2265   void RunTest() {
2266     auto compound_params = GetCompoundParams();
2267     for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
2268       for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
2269         for (int sub_x = 0; sub_x < 16; ++sub_x) {
2270           for (int sub_y = 0; sub_y < 16; ++sub_y) {
2271             for (const auto &compound : compound_params) {
2272               TestConvolve(static_cast<InterpFilter>(h_f),
2273                            static_cast<InterpFilter>(v_f), sub_x, sub_y,
2274                            compound);
2275             }
2276           }
2277         }
2278       }
2279     }
2280   }
2281 
2282  private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y,const CompoundParam & compound)2283   void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
2284                     const int sub_x, const int sub_y,
2285                     const CompoundParam &compound) {
2286     const BlockSize &block = GetParam().Block();
2287     const int width = block.Width();
2288     const int height = block.Height();
2289 
2290     const uint8_t *input1 = FirstRandomInput8(GetParam());
2291     const uint8_t *input2 = SecondRandomInput8(GetParam());
2292     DECLARE_ALIGNED(32, uint8_t, reference[MAX_SB_SQUARE]);
2293     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2294     Convolve(av1_dist_wtd_convolve_2d_c, input1, input2, reference,
2295              reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
2296 
2297     DECLARE_ALIGNED(32, uint8_t, test[MAX_SB_SQUARE]);
2298     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2299     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2300              compound, h_f, v_f, sub_x, sub_y);
2301 
2302     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2303     AssertOutputBufferEq(reference, test, width, height);
2304   }
2305 
2306  private:
Convolve(convolve_2d_func test_func,const uint8_t * src1,const uint8_t * src2,uint8_t * dst,uint16_t * conv_buf,const CompoundParam & compound,const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)2307   void Convolve(convolve_2d_func test_func, const uint8_t *src1,
2308                 const uint8_t *src2, uint8_t *dst, uint16_t *conv_buf,
2309                 const CompoundParam &compound, const InterpFilter h_f,
2310                 const InterpFilter v_f, const int sub_x, const int sub_y) {
2311     const BlockSize &block = GetParam().Block();
2312     const int width = block.Width();
2313     const int height = block.Height();
2314 
2315     const InterpFilterParams *filter_params_x =
2316         av1_get_interp_filter_params_with_block_size(h_f, width);
2317     const InterpFilterParams *filter_params_y =
2318         av1_get_interp_filter_params_with_block_size(v_f, height);
2319     ConvolveParams conv_params =
2320         GetConvolveParams(0, conv_buf, kOutputStride, 8, compound);
2321 
2322     test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
2323               filter_params_y, sub_x, sub_y, &conv_params);
2324 
2325     conv_params = GetConvolveParams(1, conv_buf, kOutputStride, 8, compound);
2326     test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
2327               filter_params_y, sub_x, sub_y, &conv_params);
2328   }
2329 };
2330 
TEST_P(AV1Convolve2DCompoundTest,RunTest)2331 TEST_P(AV1Convolve2DCompoundTest, RunTest) { RunTest(); }
2332 
2333 INSTANTIATE_TEST_SUITE_P(C, AV1Convolve2DCompoundTest,
2334                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_c));
2335 
2336 #if HAVE_SSSE3
2337 INSTANTIATE_TEST_SUITE_P(SSSE3, AV1Convolve2DCompoundTest,
2338                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_ssse3));
2339 #endif
2340 
2341 #if HAVE_AVX2
2342 INSTANTIATE_TEST_SUITE_P(AVX2, AV1Convolve2DCompoundTest,
2343                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_avx2));
2344 #endif
2345 
2346 #if HAVE_NEON
2347 INSTANTIATE_TEST_SUITE_P(NEON, AV1Convolve2DCompoundTest,
2348                          BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon));
2349 #endif
2350 
2351 #if HAVE_NEON_DOTPROD
2352 INSTANTIATE_TEST_SUITE_P(
2353     NEON_DOTPROD, AV1Convolve2DCompoundTest,
2354     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_dotprod));
2355 #endif
2356 
2357 #if HAVE_NEON_I8MM
2358 INSTANTIATE_TEST_SUITE_P(
2359     NEON_I8MM, AV1Convolve2DCompoundTest,
2360     BuildLowbdLumaParams(av1_dist_wtd_convolve_2d_neon_i8mm));
2361 #endif
2362 
2363 #if CONFIG_AV1_HIGHBITDEPTH
2364 //////////////////////////////////////////////////
2365 // Compound convolve-2d functions (high bit-depth)
2366 //////////////////////////////////////////////////
2367 
2368 class AV1Convolve2DHighbdCompoundTest
2369     : public AV1ConvolveTest<highbd_convolve_2d_func> {
2370  public:
RunTest()2371   void RunTest() {
2372     auto compound_params = GetCompoundParams();
2373     for (int h_f = EIGHTTAP_REGULAR; h_f < INTERP_FILTERS_ALL; ++h_f) {
2374       for (int v_f = EIGHTTAP_REGULAR; v_f < INTERP_FILTERS_ALL; ++v_f) {
2375         for (int sub_x = 0; sub_x < 16; ++sub_x) {
2376           for (int sub_y = 0; sub_y < 16; ++sub_y) {
2377             for (const auto &compound : compound_params) {
2378               TestConvolve(static_cast<InterpFilter>(h_f),
2379                            static_cast<InterpFilter>(v_f), sub_x, sub_y,
2380                            compound);
2381             }
2382           }
2383         }
2384       }
2385     }
2386   }
2387 
2388  private:
TestConvolve(const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y,const CompoundParam & compound)2389   void TestConvolve(const InterpFilter h_f, const InterpFilter v_f,
2390                     const int sub_x, const int sub_y,
2391                     const CompoundParam &compound) {
2392     const BlockSize &block = GetParam().Block();
2393     const int width = block.Width();
2394     const int height = block.Height();
2395     const uint16_t *input1 = FirstRandomInput16(GetParam());
2396     const uint16_t *input2 = SecondRandomInput16(GetParam());
2397     DECLARE_ALIGNED(32, uint16_t, reference[MAX_SB_SQUARE]);
2398     DECLARE_ALIGNED(32, CONV_BUF_TYPE, reference_conv_buf[MAX_SB_SQUARE]);
2399     Convolve(av1_highbd_dist_wtd_convolve_2d_c, input1, input2, reference,
2400              reference_conv_buf, compound, h_f, v_f, sub_x, sub_y);
2401 
2402     DECLARE_ALIGNED(32, uint16_t, test[MAX_SB_SQUARE]);
2403     DECLARE_ALIGNED(32, CONV_BUF_TYPE, test_conv_buf[MAX_SB_SQUARE]);
2404     Convolve(GetParam().TestFunction(), input1, input2, test, test_conv_buf,
2405              compound, h_f, v_f, sub_x, sub_y);
2406 
2407     AssertOutputBufferEq(reference_conv_buf, test_conv_buf, width, height);
2408     AssertOutputBufferEq(reference, test, width, height);
2409   }
2410 
2411  private:
Convolve(highbd_convolve_2d_func test_func,const uint16_t * src1,const uint16_t * src2,uint16_t * dst,uint16_t * conv_buf,const CompoundParam & compound,const InterpFilter h_f,const InterpFilter v_f,const int sub_x,const int sub_y)2412   void Convolve(highbd_convolve_2d_func test_func, const uint16_t *src1,
2413                 const uint16_t *src2, uint16_t *dst, uint16_t *conv_buf,
2414                 const CompoundParam &compound, const InterpFilter h_f,
2415                 const InterpFilter v_f, const int sub_x, const int sub_y) {
2416     const BlockSize &block = GetParam().Block();
2417     const int width = block.Width();
2418     const int height = block.Height();
2419 
2420     const InterpFilterParams *filter_params_x =
2421         av1_get_interp_filter_params_with_block_size(h_f, width);
2422     const InterpFilterParams *filter_params_y =
2423         av1_get_interp_filter_params_with_block_size(v_f, height);
2424     const int bit_depth = GetParam().BitDepth();
2425     ConvolveParams conv_params =
2426         GetConvolveParams(0, conv_buf, kOutputStride, bit_depth, compound);
2427     test_func(src1, width, dst, kOutputStride, width, height, filter_params_x,
2428               filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
2429 
2430     conv_params =
2431         GetConvolveParams(1, conv_buf, kOutputStride, bit_depth, compound);
2432     test_func(src2, width, dst, kOutputStride, width, height, filter_params_x,
2433               filter_params_y, sub_x, sub_y, &conv_params, bit_depth);
2434   }
2435 };
2436 
TEST_P(AV1Convolve2DHighbdCompoundTest,RunTest)2437 TEST_P(AV1Convolve2DHighbdCompoundTest, RunTest) { RunTest(); }
2438 
2439 INSTANTIATE_TEST_SUITE_P(
2440     C, AV1Convolve2DHighbdCompoundTest,
2441     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_c));
2442 
2443 #if HAVE_SSE4_1
2444 INSTANTIATE_TEST_SUITE_P(
2445     SSE4_1, AV1Convolve2DHighbdCompoundTest,
2446     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_sse4_1));
2447 #endif
2448 
2449 #if HAVE_AVX2
2450 INSTANTIATE_TEST_SUITE_P(
2451     AVX2, AV1Convolve2DHighbdCompoundTest,
2452     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_avx2));
2453 #endif
2454 
2455 #if HAVE_NEON
2456 INSTANTIATE_TEST_SUITE_P(
2457     NEON, AV1Convolve2DHighbdCompoundTest,
2458     BuildHighbdLumaParams(av1_highbd_dist_wtd_convolve_2d_neon));
2459 #endif
2460 
2461 #endif  // CONFIG_AV1_HIGHBITDEPTH
2462 
2463 }  // namespace
2464