• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <cstdlib>
13 #include <new>
14 #include <ostream>
15 #include <tuple>
16 
17 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
18 
19 #include "config/aom_config.h"
20 #include "config/aom_dsp_rtcd.h"
21 
22 #include "test/acm_random.h"
23 #include "test/register_state_check.h"
24 #include "aom/aom_codec.h"
25 #include "aom/aom_integer.h"
26 #include "aom_mem/aom_mem.h"
27 #include "aom_ports/aom_timer.h"
28 #include "aom_ports/mem.h"
29 #include "av1/common/cdef_block.h"
30 
31 namespace {
32 
33 typedef uint64_t (*MseWxH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
34                                     int sstride, int w, int h);
35 typedef uint64_t (*Mse16xH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
36                                      int w, int h);
37 typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
38                                         const uint8_t *b, int b_stride,
39                                         unsigned int *sse);
40 typedef void (*GetSseSum8x8QuadFunc)(const uint8_t *a, int a_stride,
41                                      const uint8_t *b, int b_stride,
42                                      uint32_t *sse8x8, int *sum8x8,
43                                      unsigned int *tot_sse, int *tot_sum,
44                                      uint32_t *var8x8);
45 typedef void (*GetSseSum16x16DualFunc)(const uint8_t *a, int a_stride,
46                                        const uint8_t *b, int b_stride,
47                                        uint32_t *sse16x16,
48                                        unsigned int *tot_sse, int *tot_sum,
49                                        uint32_t *var16x16);
50 typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
51                                          int xoffset, int yoffset,
52                                          const uint8_t *b, int b_stride,
53                                          unsigned int *sse);
54 typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride,
55                                             int xoffset, int yoffset,
56                                             const uint8_t *b, int b_stride,
57                                             uint32_t *sse,
58                                             const uint8_t *second_pred);
59 typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
60 typedef unsigned int (*DistWtdSubpixAvgVarMxNFunc)(
61     const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
62     int b_stride, uint32_t *sse, const uint8_t *second_pred,
63     const DIST_WTD_COMP_PARAMS *jcp_param);
64 
65 #if !CONFIG_REALTIME_ONLY
66 typedef uint32_t (*ObmcSubpelVarFunc)(const uint8_t *pre, int pre_stride,
67                                       int xoffset, int yoffset,
68                                       const int32_t *wsrc, const int32_t *mask,
69                                       unsigned int *sse);
70 #endif
71 
72 using libaom_test::ACMRandom;
73 
74 // Truncate high bit depth results by downshifting (with rounding) by:
75 // 2 * (bit_depth - 8) for sse
76 // (bit_depth - 8) for se
RoundHighBitDepth(int bit_depth,int64_t * se,uint64_t * sse)77 static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
78   switch (bit_depth) {
79     case AOM_BITS_12:
80       *sse = (*sse + 128) >> 8;
81       *se = (*se + 8) >> 4;
82       break;
83     case AOM_BITS_10:
84       *sse = (*sse + 8) >> 4;
85       *se = (*se + 2) >> 2;
86       break;
87     case AOM_BITS_8:
88     default: break;
89   }
90 }
91 
mb_ss_ref(const int16_t * src)92 static unsigned int mb_ss_ref(const int16_t *src) {
93   unsigned int res = 0;
94   for (int i = 0; i < 256; ++i) {
95     res += src[i] * src[i];
96   }
97   return res;
98 }
99 
100 /* Note:
101  *  Our codebase calculates the "diff" value in the variance algorithm by
102  *  (src - ref).
103  */
variance_ref(const uint8_t * src,const uint8_t * ref,int l2w,int l2h,int src_stride,int ref_stride,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)104 static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
105                              int l2h, int src_stride, int ref_stride,
106                              uint32_t *sse_ptr, bool use_high_bit_depth_,
107                              aom_bit_depth_t bit_depth) {
108   int64_t se = 0;
109   uint64_t sse = 0;
110   const int w = 1 << l2w;
111   const int h = 1 << l2h;
112   for (int y = 0; y < h; y++) {
113     for (int x = 0; x < w; x++) {
114       int diff;
115       if (!use_high_bit_depth_) {
116         diff = src[y * src_stride + x] - ref[y * ref_stride + x];
117         se += diff;
118         sse += diff * diff;
119       } else {
120         diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
121                CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
122         se += diff;
123         sse += diff * diff;
124       }
125     }
126   }
127   RoundHighBitDepth(bit_depth, &se, &sse);
128   *sse_ptr = static_cast<uint32_t>(sse);
129   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
130 }
131 
132 /* The subpel reference functions differ from the codec version in one aspect:
133  * they calculate the bilinear factors directly instead of using a lookup table
134  * and therefore upshift xoff and yoff by 1. Only every other calculated value
135  * is used so the codec version shrinks the table to save space.
136  */
subpel_variance_ref(const uint8_t * ref,const uint8_t * src,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)137 static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
138                                     int l2w, int l2h, int xoff, int yoff,
139                                     uint32_t *sse_ptr, bool use_high_bit_depth_,
140                                     aom_bit_depth_t bit_depth) {
141   int64_t se = 0;
142   uint64_t sse = 0;
143   const int w = 1 << l2w;
144   const int h = 1 << l2h;
145 
146   xoff <<= 1;
147   yoff <<= 1;
148 
149   for (int y = 0; y < h; y++) {
150     for (int x = 0; x < w; x++) {
151       // Bilinear interpolation at a 16th pel step.
152       if (!use_high_bit_depth_) {
153         const int a1 = ref[(w + 1) * (y + 0) + x + 0];
154         const int a2 = ref[(w + 1) * (y + 0) + x + 1];
155         const int b1 = ref[(w + 1) * (y + 1) + x + 0];
156         const int b2 = ref[(w + 1) * (y + 1) + x + 1];
157         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
158         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
159         const int r = a + (((b - a) * yoff + 8) >> 4);
160         const int diff = r - src[w * y + x];
161         se += diff;
162         sse += diff * diff;
163       } else {
164         uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
165         uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
166         const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
167         const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
168         const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
169         const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
170         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
171         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
172         const int r = a + (((b - a) * yoff + 8) >> 4);
173         const int diff = r - src16[w * y + x];
174         se += diff;
175         sse += diff * diff;
176       }
177     }
178   }
179   RoundHighBitDepth(bit_depth, &se, &sse);
180   *sse_ptr = static_cast<uint32_t>(sse);
181   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
182 }
183 
subpel_avg_variance_ref(const uint8_t * ref,const uint8_t * src,const uint8_t * second_pred,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth,aom_bit_depth_t bit_depth)184 static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
185                                         const uint8_t *second_pred, int l2w,
186                                         int l2h, int xoff, int yoff,
187                                         uint32_t *sse_ptr,
188                                         bool use_high_bit_depth,
189                                         aom_bit_depth_t bit_depth) {
190   int64_t se = 0;
191   uint64_t sse = 0;
192   const int w = 1 << l2w;
193   const int h = 1 << l2h;
194 
195   xoff <<= 1;
196   yoff <<= 1;
197 
198   for (int y = 0; y < h; y++) {
199     for (int x = 0; x < w; x++) {
200       // bilinear interpolation at a 16th pel step
201       if (!use_high_bit_depth) {
202         const int a1 = ref[(w + 1) * (y + 0) + x + 0];
203         const int a2 = ref[(w + 1) * (y + 0) + x + 1];
204         const int b1 = ref[(w + 1) * (y + 1) + x + 0];
205         const int b2 = ref[(w + 1) * (y + 1) + x + 1];
206         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
207         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
208         const int r = a + (((b - a) * yoff + 8) >> 4);
209         const int diff =
210             ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
211         se += diff;
212         sse += diff * diff;
213       } else {
214         const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
215         const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
216         const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
217         const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
218         const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
219         const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
220         const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
221         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
222         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
223         const int r = a + (((b - a) * yoff + 8) >> 4);
224         const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
225         se += diff;
226         sse += diff * diff;
227       }
228     }
229   }
230   RoundHighBitDepth(bit_depth, &se, &sse);
231   *sse_ptr = static_cast<uint32_t>(sse);
232   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
233 }
234 
dist_wtd_subpel_avg_variance_ref(const uint8_t * ref,const uint8_t * src,const uint8_t * second_pred,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth,aom_bit_depth_t bit_depth,DIST_WTD_COMP_PARAMS * jcp_param)235 static uint32_t dist_wtd_subpel_avg_variance_ref(
236     const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w,
237     int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth,
238     aom_bit_depth_t bit_depth, DIST_WTD_COMP_PARAMS *jcp_param) {
239   int64_t se = 0;
240   uint64_t sse = 0;
241   const int w = 1 << l2w;
242   const int h = 1 << l2h;
243 
244   xoff <<= 1;
245   yoff <<= 1;
246 
247   for (int y = 0; y < h; y++) {
248     for (int x = 0; x < w; x++) {
249       // bilinear interpolation at a 16th pel step
250       if (!use_high_bit_depth) {
251         const int a1 = ref[(w + 0) * (y + 0) + x + 0];
252         const int a2 = ref[(w + 0) * (y + 0) + x + 1];
253         const int b1 = ref[(w + 0) * (y + 1) + x + 0];
254         const int b2 = ref[(w + 0) * (y + 1) + x + 1];
255         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
256         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
257         const int r = a + (((b - a) * yoff + 8) >> 4);
258         const int avg = ROUND_POWER_OF_TWO(
259             r * jcp_param->fwd_offset +
260                 second_pred[w * y + x] * jcp_param->bck_offset,
261             DIST_PRECISION_BITS);
262         const int diff = avg - src[w * y + x];
263 
264         se += diff;
265         sse += diff * diff;
266       } else {
267         const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
268         const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
269         const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
270         const int a1 = ref16[(w + 0) * (y + 0) + x + 0];
271         const int a2 = ref16[(w + 0) * (y + 0) + x + 1];
272         const int b1 = ref16[(w + 0) * (y + 1) + x + 0];
273         const int b2 = ref16[(w + 0) * (y + 1) + x + 1];
274         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
275         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
276         const int r = a + (((b - a) * yoff + 8) >> 4);
277         const int avg =
278             ROUND_POWER_OF_TWO(r * jcp_param->fwd_offset +
279                                    sec16[w * y + x] * jcp_param->bck_offset,
280                                DIST_PRECISION_BITS);
281         const int diff = avg - src16[w * y + x];
282 
283         se += diff;
284         sse += diff * diff;
285       }
286     }
287   }
288   RoundHighBitDepth(bit_depth, &se, &sse);
289   *sse_ptr = static_cast<uint32_t>(sse);
290   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
291 }
292 
293 #if !CONFIG_REALTIME_ONLY
obmc_subpel_variance_ref(const uint8_t * pre,int l2w,int l2h,int xoff,int yoff,const int32_t * wsrc,const int32_t * mask,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)294 static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h,
295                                          int xoff, int yoff,
296                                          const int32_t *wsrc,
297                                          const int32_t *mask, uint32_t *sse_ptr,
298                                          bool use_high_bit_depth_,
299                                          aom_bit_depth_t bit_depth) {
300   int64_t se = 0;
301   uint64_t sse = 0;
302   const int w = 1 << l2w;
303   const int h = 1 << l2h;
304 
305   xoff <<= 1;
306   yoff <<= 1;
307 
308   for (int y = 0; y < h; y++) {
309     for (int x = 0; x < w; x++) {
310       // Bilinear interpolation at a 16th pel step.
311       if (!use_high_bit_depth_) {
312         const int a1 = pre[(w + 1) * (y + 0) + x + 0];
313         const int a2 = pre[(w + 1) * (y + 0) + x + 1];
314         const int b1 = pre[(w + 1) * (y + 1) + x + 0];
315         const int b2 = pre[(w + 1) * (y + 1) + x + 1];
316         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
317         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
318         const int r = a + (((b - a) * yoff + 8) >> 4);
319         const int diff = ROUND_POWER_OF_TWO_SIGNED(
320             wsrc[w * y + x] - r * mask[w * y + x], 12);
321         se += diff;
322         sse += diff * diff;
323       } else {
324         uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre);
325         const int a1 = pre16[(w + 1) * (y + 0) + x + 0];
326         const int a2 = pre16[(w + 1) * (y + 0) + x + 1];
327         const int b1 = pre16[(w + 1) * (y + 1) + x + 0];
328         const int b2 = pre16[(w + 1) * (y + 1) + x + 1];
329         const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
330         const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
331         const int r = a + (((b - a) * yoff + 8) >> 4);
332         const int diff = ROUND_POWER_OF_TWO_SIGNED(
333             wsrc[w * y + x] - r * mask[w * y + x], 12);
334         se += diff;
335         sse += diff * diff;
336       }
337     }
338   }
339   RoundHighBitDepth(bit_depth, &se, &sse);
340   *sse_ptr = static_cast<uint32_t>(sse);
341   return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
342 }
343 #endif
344 
345 ////////////////////////////////////////////////////////////////////////////////
346 
347 class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
348  public:
SumOfSquaresTest()349   SumOfSquaresTest() : func_(GetParam()) {}
350 
351   ~SumOfSquaresTest() override = default;
352 
353  protected:
354   void ConstTest();
355   void RefTest();
356 
357   SumOfSquaresFunction func_;
358   ACMRandom rnd_;
359 };
360 
ConstTest()361 void SumOfSquaresTest::ConstTest() {
362   int16_t mem[256];
363   unsigned int res;
364   for (int v = 0; v < 256; ++v) {
365     for (int i = 0; i < 256; ++i) {
366       mem[i] = v;
367     }
368     API_REGISTER_STATE_CHECK(res = func_(mem));
369     EXPECT_EQ(256u * (v * v), res);
370   }
371 }
372 
RefTest()373 void SumOfSquaresTest::RefTest() {
374   int16_t mem[256];
375   for (int i = 0; i < 100; ++i) {
376     for (int j = 0; j < 256; ++j) {
377       mem[j] = rnd_.Rand8() - rnd_.Rand8();
378     }
379 
380     const unsigned int expected = mb_ss_ref(mem);
381     unsigned int res;
382     API_REGISTER_STATE_CHECK(res = func_(mem));
383     EXPECT_EQ(expected, res);
384   }
385 }
386 
387 ////////////////////////////////////////////////////////////////////////////////
388 // Encapsulating struct to store the function to test along with
389 // some testing context.
390 // Can be used for MSE, SSE, Variance, etc.
391 
392 template <typename Func>
393 struct TestParams {
TestParams__anon1c7945d30111::TestParams394   TestParams(int log2w = 0, int log2h = 0, Func function = nullptr,
395              int bit_depth_value = 0)
396       : log2width(log2w), log2height(log2h), func(function) {
397     use_high_bit_depth = (bit_depth_value > 0);
398     if (use_high_bit_depth) {
399       bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value);
400     } else {
401       bit_depth = AOM_BITS_8;
402     }
403     width = 1 << log2width;
404     height = 1 << log2height;
405     block_size = width * height;
406     mask = (1u << bit_depth) - 1;
407   }
408 
409   int log2width, log2height;
410   int width, height;
411   int block_size;
412   Func func;
413   aom_bit_depth_t bit_depth;
414   bool use_high_bit_depth;
415   uint32_t mask;
416 };
417 
418 template <typename Func>
operator <<(std::ostream & os,const TestParams<Func> & p)419 std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
420   return os << "width/height:" << p.width << "/" << p.height
421             << " function:" << reinterpret_cast<const void *>(p.func)
422             << " bit-depth:" << p.bit_depth;
423 }
424 
425 // Main class for testing a function type
426 template <typename FunctionType>
427 class MseWxHTestClass
428     : public ::testing::TestWithParam<TestParams<FunctionType> > {
429  public:
SetUp()430   void SetUp() override {
431     params_ = this->GetParam();
432 
433     rnd_.Reset(ACMRandom::DeterministicSeed());
434     src_ = reinterpret_cast<uint16_t *>(
435         aom_memalign(16, block_size() * sizeof(src_)));
436     dst_ = reinterpret_cast<uint8_t *>(
437         aom_memalign(16, block_size() * sizeof(dst_)));
438     ASSERT_NE(src_, nullptr);
439     ASSERT_NE(dst_, nullptr);
440   }
441 
TearDown()442   void TearDown() override {
443     aom_free(src_);
444     aom_free(dst_);
445     src_ = nullptr;
446     dst_ = nullptr;
447   }
448 
449  protected:
450   void RefMatchTestMse();
451   void SpeedTest();
452 
453  protected:
454   ACMRandom rnd_;
455   uint8_t *dst_;
456   uint16_t *src_;
457   TestParams<FunctionType> params_;
458 
459   // some relay helpers
block_size() const460   int block_size() const { return params_.block_size; }
width() const461   int width() const { return params_.width; }
height() const462   int height() const { return params_.height; }
d_stride() const463   int d_stride() const { return params_.width; }  // stride is same as width
s_stride() const464   int s_stride() const { return params_.width; }  // stride is same as width
465 };
466 
467 template <typename MseWxHFunctionType>
SpeedTest()468 void MseWxHTestClass<MseWxHFunctionType>::SpeedTest() {
469   aom_usec_timer ref_timer, test_timer;
470   double elapsed_time_c = 0;
471   double elapsed_time_simd = 0;
472   int run_time = 10000000;
473   int w = width();
474   int h = height();
475   int dstride = d_stride();
476   int sstride = s_stride();
477 
478   for (int k = 0; k < block_size(); ++k) {
479     dst_[k] = rnd_.Rand8();
480     src_[k] = rnd_.Rand8();
481   }
482   aom_usec_timer_start(&ref_timer);
483   for (int i = 0; i < run_time; i++) {
484     aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h);
485   }
486   aom_usec_timer_mark(&ref_timer);
487   elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
488 
489   aom_usec_timer_start(&test_timer);
490   for (int i = 0; i < run_time; i++) {
491     params_.func(dst_, dstride, src_, sstride, w, h);
492   }
493   aom_usec_timer_mark(&test_timer);
494   elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
495 
496   printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
497          elapsed_time_c, elapsed_time_simd,
498          (elapsed_time_c / elapsed_time_simd));
499 }
500 
501 template <typename MseWxHFunctionType>
RefMatchTestMse()502 void MseWxHTestClass<MseWxHFunctionType>::RefMatchTestMse() {
503   uint64_t mse_ref = 0;
504   uint64_t mse_mod = 0;
505   int w = width();
506   int h = height();
507   int dstride = d_stride();
508   int sstride = s_stride();
509 
510   for (int i = 0; i < 10; i++) {
511     for (int k = 0; k < block_size(); ++k) {
512       dst_[k] = rnd_.Rand8();
513       src_[k] = rnd_.Rand8();
514     }
515     API_REGISTER_STATE_CHECK(
516         mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h));
517     API_REGISTER_STATE_CHECK(
518         mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
519     EXPECT_EQ(mse_ref, mse_mod)
520         << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
521   }
522 }
523 
524 template <typename FunctionType>
525 class Mse16xHTestClass
526     : public ::testing::TestWithParam<TestParams<FunctionType> > {
527  public:
528   // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for
529   // maximum width 16 and maximum height 8.
530   int mem_size = 16 * 8;
SetUp()531   void SetUp() override {
532     params_ = this->GetParam();
533     rnd_.Reset(ACMRandom::DeterministicSeed());
534     src_ = reinterpret_cast<uint16_t *>(
535         aom_memalign(16, mem_size * sizeof(*src_)));
536     dst_ =
537         reinterpret_cast<uint8_t *>(aom_memalign(16, mem_size * sizeof(*dst_)));
538     ASSERT_NE(src_, nullptr);
539     ASSERT_NE(dst_, nullptr);
540   }
541 
TearDown()542   void TearDown() override {
543     aom_free(src_);
544     aom_free(dst_);
545     src_ = nullptr;
546     dst_ = nullptr;
547   }
548 
RandBool()549   uint8_t RandBool() {
550     const uint32_t value = rnd_.Rand8();
551     return (value & 0x1);
552   }
553 
554  protected:
555   void RefMatchExtremeTestMse();
556   void RefMatchTestMse();
557   void SpeedTest();
558 
559  protected:
560   ACMRandom rnd_;
561   uint8_t *dst_;
562   uint16_t *src_;
563   TestParams<FunctionType> params_;
564 
565   // some relay helpers
width() const566   int width() const { return params_.width; }
height() const567   int height() const { return params_.height; }
d_stride() const568   int d_stride() const { return params_.width; }
569 };
570 
571 template <typename Mse16xHFunctionType>
SpeedTest()572 void Mse16xHTestClass<Mse16xHFunctionType>::SpeedTest() {
573   aom_usec_timer ref_timer, test_timer;
574   double elapsed_time_c = 0.0;
575   double elapsed_time_simd = 0.0;
576   const int loop_count = 10000000;
577   const int w = width();
578   const int h = height();
579   const int dstride = d_stride();
580 
581   for (int k = 0; k < mem_size; ++k) {
582     dst_[k] = rnd_.Rand8();
583     // Right shift by 6 is done to generate more input in range of [0,255] than
584     // CDEF_VERY_LARGE
585     int rnd_i10 = rnd_.Rand16() >> 6;
586     src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
587   }
588 
589   aom_usec_timer_start(&ref_timer);
590   for (int i = 0; i < loop_count; i++) {
591     aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h);
592   }
593   aom_usec_timer_mark(&ref_timer);
594   elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
595 
596   aom_usec_timer_start(&test_timer);
597   for (int i = 0; i < loop_count; i++) {
598     params_.func(dst_, dstride, src_, w, h);
599   }
600   aom_usec_timer_mark(&test_timer);
601   elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
602 
603   printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%.31f\n", width(),
604          height(), elapsed_time_c, elapsed_time_simd,
605          (elapsed_time_c / elapsed_time_simd));
606 }
607 
608 template <typename Mse16xHFunctionType>
RefMatchTestMse()609 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchTestMse() {
610   uint64_t mse_ref = 0;
611   uint64_t mse_mod = 0;
612   const int w = width();
613   const int h = height();
614   const int dstride = d_stride();
615 
616   for (int i = 0; i < 10; i++) {
617     for (int k = 0; k < mem_size; ++k) {
618       dst_[k] = rnd_.Rand8();
619       // Right shift by 6 is done to generate more input in range of [0,255]
620       // than CDEF_VERY_LARGE
621       int rnd_i10 = rnd_.Rand16() >> 6;
622       src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
623     }
624 
625     API_REGISTER_STATE_CHECK(
626         mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
627     API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
628     EXPECT_EQ(mse_ref, mse_mod)
629         << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
630   }
631 }
632 
633 template <typename Mse16xHFunctionType>
RefMatchExtremeTestMse()634 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchExtremeTestMse() {
635   uint64_t mse_ref = 0;
636   uint64_t mse_mod = 0;
637   const int w = width();
638   const int h = height();
639   const int dstride = d_stride();
640   const int iter = 10;
641 
642   // Fill the buffers with extreme values
643   for (int i = 0; i < iter; i++) {
644     for (int k = 0; k < mem_size; ++k) {
645       dst_[k] = static_cast<uint8_t>(RandBool() ? 0 : 255);
646       src_[k] = static_cast<uint16_t>(RandBool() ? 0 : CDEF_VERY_LARGE);
647     }
648 
649     API_REGISTER_STATE_CHECK(
650         mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
651     API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
652     EXPECT_EQ(mse_ref, mse_mod)
653         << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
654   }
655 }
656 
657 // Main class for testing a function type
658 template <typename FunctionType>
659 class MainTestClass
660     : public ::testing::TestWithParam<TestParams<FunctionType> > {
661  public:
SetUp()662   void SetUp() override {
663     params_ = this->GetParam();
664 
665     rnd_.Reset(ACMRandom::DeterministicSeed());
666     const size_t unit =
667         use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
668     src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit));
669     ref_ = new uint8_t[block_size() * unit];
670     ASSERT_NE(src_, nullptr);
671     ASSERT_NE(ref_, nullptr);
672     memset(src_, 0, block_size() * sizeof(src_[0]));
673     memset(ref_, 0, block_size() * sizeof(ref_[0]));
674     if (use_high_bit_depth()) {
675       // TODO(skal): remove!
676       src_ = CONVERT_TO_BYTEPTR(src_);
677       ref_ = CONVERT_TO_BYTEPTR(ref_);
678     }
679   }
680 
TearDown()681   void TearDown() override {
682     if (use_high_bit_depth()) {
683       // TODO(skal): remove!
684       src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_));
685       ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_));
686     }
687 
688     aom_free(src_);
689     delete[] ref_;
690     src_ = nullptr;
691     ref_ = nullptr;
692   }
693 
694  protected:
695   // We could sub-class MainTestClass into dedicated class for Variance
696   // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
697   // to access top class fields xxx. That's cumbersome, so for now we'll just
698   // implement the testing methods here:
699 
700   // Variance tests
701   void ZeroTest();
702   void RefTest();
703   void RefStrideTest();
704   void OneQuarterTest();
705   void SpeedTest();
706 
707   // SSE&SUM tests
708   void RefTestSseSum();
709   void MinTestSseSum();
710   void MaxTestSseSum();
711   void SseSum_SpeedTest();
712 
713   // SSE&SUM dual tests
714   void RefTestSseSumDual();
715   void MinTestSseSumDual();
716   void MaxTestSseSumDual();
717   void SseSum_SpeedTestDual();
718 
719   // MSE/SSE tests
720   void RefTestMse();
721   void RefTestSse();
722   void MaxTestMse();
723   void MaxTestSse();
724 
725  protected:
726   ACMRandom rnd_;
727   uint8_t *src_;
728   uint8_t *ref_;
729   TestParams<FunctionType> params_;
730 
731   // some relay helpers
use_high_bit_depth() const732   bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const733   int byte_shift() const { return params_.bit_depth - 8; }
block_size() const734   int block_size() const { return params_.block_size; }
width() const735   int width() const { return params_.width; }
height() const736   int height() const { return params_.height; }
mask() const737   uint32_t mask() const { return params_.mask; }
738 };
739 
740 ////////////////////////////////////////////////////////////////////////////////
741 // Tests related to variance.
742 
743 template <typename VarianceFunctionType>
ZeroTest()744 void MainTestClass<VarianceFunctionType>::ZeroTest() {
745   for (int i = 0; i <= 255; ++i) {
746     if (!use_high_bit_depth()) {
747       memset(src_, i, block_size());
748     } else {
749       uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
750       for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
751     }
752     for (int j = 0; j <= 255; ++j) {
753       if (!use_high_bit_depth()) {
754         memset(ref_, j, block_size());
755       } else {
756         uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
757         for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
758       }
759       unsigned int sse, var;
760       API_REGISTER_STATE_CHECK(
761           var = params_.func(src_, width(), ref_, width(), &sse));
762       EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
763     }
764   }
765 }
766 
767 template <typename VarianceFunctionType>
RefTest()768 void MainTestClass<VarianceFunctionType>::RefTest() {
769   for (int i = 0; i < 10; ++i) {
770     for (int j = 0; j < block_size(); j++) {
771       if (!use_high_bit_depth()) {
772         src_[j] = rnd_.Rand8();
773         ref_[j] = rnd_.Rand8();
774       } else {
775         CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
776         CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
777       }
778     }
779     unsigned int sse1, sse2, var1, var2;
780     const int stride = width();
781     API_REGISTER_STATE_CHECK(
782         var1 = params_.func(src_, stride, ref_, stride, &sse1));
783     var2 =
784         variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
785                      stride, &sse2, use_high_bit_depth(), params_.bit_depth);
786     EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
787     EXPECT_EQ(var1, var2) << "Error at test index: " << i;
788   }
789 }
790 
791 template <typename VarianceFunctionType>
RefStrideTest()792 void MainTestClass<VarianceFunctionType>::RefStrideTest() {
793   for (int i = 0; i < 10; ++i) {
794     const int ref_stride = (i & 1) * width();
795     const int src_stride = ((i >> 1) & 1) * width();
796     for (int j = 0; j < block_size(); j++) {
797       const int ref_ind = (j / width()) * ref_stride + j % width();
798       const int src_ind = (j / width()) * src_stride + j % width();
799       if (!use_high_bit_depth()) {
800         src_[src_ind] = rnd_.Rand8();
801         ref_[ref_ind] = rnd_.Rand8();
802       } else {
803         CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask();
804         CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask();
805       }
806     }
807     unsigned int sse1, sse2;
808     unsigned int var1, var2;
809 
810     API_REGISTER_STATE_CHECK(
811         var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
812     var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
813                         src_stride, ref_stride, &sse2, use_high_bit_depth(),
814                         params_.bit_depth);
815     EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
816     EXPECT_EQ(var1, var2) << "Error at test index: " << i;
817   }
818 }
819 
820 template <typename VarianceFunctionType>
OneQuarterTest()821 void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
822   const int half = block_size() / 2;
823   if (!use_high_bit_depth()) {
824     memset(src_, 255, block_size());
825     memset(ref_, 255, half);
826     memset(ref_ + half, 0, half);
827   } else {
828     aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size());
829     aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half);
830     aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
831   }
832   unsigned int sse, var, expected;
833   API_REGISTER_STATE_CHECK(
834       var = params_.func(src_, width(), ref_, width(), &sse));
835   expected = block_size() * 255 * 255 / 4;
836   EXPECT_EQ(expected, var);
837 }
838 
839 template <typename VarianceFunctionType>
SpeedTest()840 void MainTestClass<VarianceFunctionType>::SpeedTest() {
841   for (int j = 0; j < block_size(); j++) {
842     if (!use_high_bit_depth()) {
843       src_[j] = rnd_.Rand8();
844       ref_[j] = rnd_.Rand8();
845 #if CONFIG_AV1_HIGHBITDEPTH
846     } else {
847       CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
848       CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
849 #endif  // CONFIG_AV1_HIGHBITDEPTH
850     }
851   }
852   unsigned int sse;
853   const int stride = width();
854   int run_time = 1000000000 / block_size();
855   aom_usec_timer timer;
856   aom_usec_timer_start(&timer);
857   for (int i = 0; i < run_time; ++i) {
858     params_.func(src_, stride, ref_, stride, &sse);
859   }
860 
861   aom_usec_timer_mark(&timer);
862   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
863   printf("Variance %dx%d : %d us\n", width(), height(), elapsed_time);
864 }
865 
866 template <typename GetSseSum8x8QuadFuncType>
RefTestSseSum()867 void MainTestClass<GetSseSum8x8QuadFuncType>::RefTestSseSum() {
868   for (int i = 0; i < 10; ++i) {
869     for (int j = 0; j < block_size(); ++j) {
870       src_[j] = rnd_.Rand8();
871       ref_[j] = rnd_.Rand8();
872     }
873     unsigned int sse1[256] = { 0 };
874     unsigned int sse2[256] = { 0 };
875     unsigned int var1[256] = { 0 };
876     unsigned int var2[256] = { 0 };
877     int sum1[256] = { 0 };
878     int sum2[256] = { 0 };
879     unsigned int sse_tot_c = 0;
880     unsigned int sse_tot_simd = 0;
881     int sum_tot_c = 0;
882     int sum_tot_simd = 0;
883     const int stride = width();
884     int k = 0;
885 
886     for (int row = 0; row < height(); row += 8) {
887       for (int col = 0; col < width(); col += 32) {
888         API_REGISTER_STATE_CHECK(params_.func(src_ + stride * row + col, stride,
889                                               ref_ + stride * row + col, stride,
890                                               &sse1[k], &sum1[k], &sse_tot_simd,
891                                               &sum_tot_simd, &var1[k]));
892         aom_get_var_sse_sum_8x8_quad_c(
893             src_ + stride * row + col, stride, ref_ + stride * row + col,
894             stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
895         k += 4;
896       }
897     }
898     EXPECT_EQ(sse_tot_c, sse_tot_simd);
899     EXPECT_EQ(sum_tot_c, sum_tot_simd);
900     for (int p = 0; p < 256; p++) {
901       EXPECT_EQ(sse1[p], sse2[p]);
902       EXPECT_EQ(sum1[p], sum2[p]);
903       EXPECT_EQ(var1[p], var2[p]);
904     }
905   }
906 }
907 
908 template <typename GetSseSum8x8QuadFuncType>
MinTestSseSum()909 void MainTestClass<GetSseSum8x8QuadFuncType>::MinTestSseSum() {
910   memset(src_, 0, block_size());
911   memset(ref_, 255, block_size());
912   unsigned int sse1[256] = { 0 };
913   unsigned int sse2[256] = { 0 };
914   unsigned int var1[256] = { 0 };
915   unsigned int var2[256] = { 0 };
916   int sum1[256] = { 0 };
917   int sum2[256] = { 0 };
918   unsigned int sse_tot_c = 0;
919   unsigned int sse_tot_simd = 0;
920   int sum_tot_c = 0;
921   int sum_tot_simd = 0;
922   const int stride = width();
923   int k = 0;
924 
925   for (int i = 0; i < height(); i += 8) {
926     for (int j = 0; j < width(); j += 32) {
927       API_REGISTER_STATE_CHECK(params_.func(
928           src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
929           &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
930       aom_get_var_sse_sum_8x8_quad_c(
931           src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
932           &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
933       k += 4;
934     }
935   }
936   EXPECT_EQ(sse_tot_simd, sse_tot_c);
937   EXPECT_EQ(sum_tot_simd, sum_tot_c);
938   for (int p = 0; p < 256; p++) {
939     EXPECT_EQ(sse1[p], sse2[p]);
940     EXPECT_EQ(sum1[p], sum2[p]);
941     EXPECT_EQ(var1[p], var2[p]);
942   }
943 }
944 
945 template <typename GetSseSum8x8QuadFuncType>
MaxTestSseSum()946 void MainTestClass<GetSseSum8x8QuadFuncType>::MaxTestSseSum() {
947   memset(src_, 255, block_size());
948   memset(ref_, 0, block_size());
949   unsigned int sse1[256] = { 0 };
950   unsigned int sse2[256] = { 0 };
951   unsigned int var1[256] = { 0 };
952   unsigned int var2[256] = { 0 };
953   int sum1[256] = { 0 };
954   int sum2[256] = { 0 };
955   unsigned int sse_tot_c = 0;
956   unsigned int sse_tot_simd = 0;
957   int sum_tot_c = 0;
958   int sum_tot_simd = 0;
959   const int stride = width();
960   int k = 0;
961 
962   for (int i = 0; i < height(); i += 8) {
963     for (int j = 0; j < width(); j += 32) {
964       API_REGISTER_STATE_CHECK(params_.func(
965           src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
966           &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
967       aom_get_var_sse_sum_8x8_quad_c(
968           src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
969           &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
970       k += 4;
971     }
972   }
973   EXPECT_EQ(sse_tot_c, sse_tot_simd);
974   EXPECT_EQ(sum_tot_c, sum_tot_simd);
975 
976   for (int p = 0; p < 256; p++) {
977     EXPECT_EQ(sse1[p], sse2[p]);
978     EXPECT_EQ(sum1[p], sum2[p]);
979     EXPECT_EQ(var1[p], var2[p]);
980   }
981 }
982 
983 template <typename GetSseSum8x8QuadFuncType>
SseSum_SpeedTest()984 void MainTestClass<GetSseSum8x8QuadFuncType>::SseSum_SpeedTest() {
985   const int loop_count = 1000000000 / block_size();
986   for (int j = 0; j < block_size(); ++j) {
987     src_[j] = rnd_.Rand8();
988     ref_[j] = rnd_.Rand8();
989   }
990 
991   unsigned int sse1[4] = { 0 };
992   unsigned int sse2[4] = { 0 };
993   unsigned int var1[4] = { 0 };
994   unsigned int var2[4] = { 0 };
995   int sum1[4] = { 0 };
996   int sum2[4] = { 0 };
997   unsigned int sse_tot_c = 0;
998   unsigned int sse_tot_simd = 0;
999   int sum_tot_c = 0;
1000   int sum_tot_simd = 0;
1001   const int stride = width();
1002 
1003   aom_usec_timer timer;
1004   aom_usec_timer_start(&timer);
1005   for (int r = 0; r < loop_count; ++r) {
1006     for (int i = 0; i < height(); i += 8) {
1007       for (int j = 0; j < width(); j += 32) {
1008         aom_get_var_sse_sum_8x8_quad_c(src_ + stride * i + j, stride,
1009                                        ref_ + stride * i + j, stride, sse2,
1010                                        sum2, &sse_tot_c, &sum_tot_c, var2);
1011       }
1012     }
1013   }
1014   aom_usec_timer_mark(&timer);
1015   const double elapsed_time_ref =
1016       static_cast<double>(aom_usec_timer_elapsed(&timer));
1017 
1018   aom_usec_timer_start(&timer);
1019   for (int r = 0; r < loop_count; ++r) {
1020     for (int i = 0; i < height(); i += 8) {
1021       for (int j = 0; j < width(); j += 32) {
1022         params_.func(src_ + stride * i + j, stride, ref_ + stride * i + j,
1023                      stride, sse1, sum1, &sse_tot_simd, &sum_tot_simd, var1);
1024       }
1025     }
1026   }
1027   aom_usec_timer_mark(&timer);
1028   const double elapsed_time_simd =
1029       static_cast<double>(aom_usec_timer_elapsed(&timer));
1030 
1031   printf(
1032       "aom_getvar_8x8_quad for block=%dx%d : ref_time=%lf \t simd_time=%lf \t "
1033       "gain=%lf \n",
1034       width(), height(), elapsed_time_ref, elapsed_time_simd,
1035       elapsed_time_ref / elapsed_time_simd);
1036 }
1037 
1038 template <typename GetSseSum16x16DualFuncType>
RefTestSseSumDual()1039 void MainTestClass<GetSseSum16x16DualFuncType>::RefTestSseSumDual() {
1040   for (int iter = 0; iter < 10; ++iter) {
1041     for (int idx = 0; idx < block_size(); ++idx) {
1042       src_[idx] = rnd_.Rand8();
1043       ref_[idx] = rnd_.Rand8();
1044     }
1045     unsigned int sse1[64] = { 0 };
1046     unsigned int sse2[64] = { 0 };
1047     unsigned int var1[64] = { 0 };
1048     unsigned int var2[64] = { 0 };
1049     unsigned int sse_tot_c = 0;
1050     unsigned int sse_tot_simd = 0;
1051     int sum_tot_c = 0;
1052     int sum_tot_simd = 0;
1053     const int stride = width();
1054     int k = 0;
1055 
1056     for (int row = 0; row < height(); row += 16) {
1057       for (int col = 0; col < width(); col += 32) {
1058         API_REGISTER_STATE_CHECK(params_.func(
1059             src_ + stride * row + col, stride, ref_ + stride * row + col,
1060             stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1061         aom_get_var_sse_sum_16x16_dual_c(
1062             src_ + stride * row + col, stride, ref_ + stride * row + col,
1063             stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1064         k += 2;
1065       }
1066     }
1067     EXPECT_EQ(sse_tot_c, sse_tot_simd);
1068     EXPECT_EQ(sum_tot_c, sum_tot_simd);
1069     for (int p = 0; p < 64; p++) {
1070       EXPECT_EQ(sse1[p], sse2[p]);
1071       EXPECT_EQ(sse_tot_simd, sse_tot_c);
1072       EXPECT_EQ(sum_tot_simd, sum_tot_c);
1073       EXPECT_EQ(var1[p], var2[p]);
1074     }
1075   }
1076 }
1077 
1078 template <typename GetSseSum16x16DualFuncType>
MinTestSseSumDual()1079 void MainTestClass<GetSseSum16x16DualFuncType>::MinTestSseSumDual() {
1080   memset(src_, 0, block_size());
1081   memset(ref_, 255, block_size());
1082   unsigned int sse1[64] = { 0 };
1083   unsigned int sse2[64] = { 0 };
1084   unsigned int var1[64] = { 0 };
1085   unsigned int var2[64] = { 0 };
1086   unsigned int sse_tot_c = 0;
1087   unsigned int sse_tot_simd = 0;
1088   int sum_tot_c = 0;
1089   int sum_tot_simd = 0;
1090   const int stride = width();
1091   int k = 0;
1092 
1093   for (int row = 0; row < height(); row += 16) {
1094     for (int col = 0; col < width(); col += 32) {
1095       API_REGISTER_STATE_CHECK(params_.func(
1096           src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1097           &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1098       aom_get_var_sse_sum_16x16_dual_c(
1099           src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1100           &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1101       k += 2;
1102     }
1103   }
1104   EXPECT_EQ(sse_tot_simd, sse_tot_c);
1105   EXPECT_EQ(sum_tot_simd, sum_tot_c);
1106   for (int p = 0; p < 64; p++) {
1107     EXPECT_EQ(sse1[p], sse2[p]);
1108     EXPECT_EQ(var1[p], var2[p]);
1109   }
1110 }
1111 
1112 template <typename GetSseSum16x16DualFuncType>
MaxTestSseSumDual()1113 void MainTestClass<GetSseSum16x16DualFuncType>::MaxTestSseSumDual() {
1114   memset(src_, 255, block_size());
1115   memset(ref_, 0, block_size());
1116   unsigned int sse1[64] = { 0 };
1117   unsigned int sse2[64] = { 0 };
1118   unsigned int var1[64] = { 0 };
1119   unsigned int var2[64] = { 0 };
1120   unsigned int sse_tot_c = 0;
1121   unsigned int sse_tot_simd = 0;
1122   int sum_tot_c = 0;
1123   int sum_tot_simd = 0;
1124   const int stride = width();
1125   int k = 0;
1126 
1127   for (int row = 0; row < height(); row += 16) {
1128     for (int col = 0; col < width(); col += 32) {
1129       API_REGISTER_STATE_CHECK(params_.func(
1130           src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1131           &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1132       aom_get_var_sse_sum_16x16_dual_c(
1133           src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1134           &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1135       k += 2;
1136     }
1137   }
1138   EXPECT_EQ(sse_tot_c, sse_tot_simd);
1139   EXPECT_EQ(sum_tot_c, sum_tot_simd);
1140 
1141   for (int p = 0; p < 64; p++) {
1142     EXPECT_EQ(sse1[p], sse2[p]);
1143     EXPECT_EQ(var1[p], var2[p]);
1144   }
1145 }
1146 
1147 template <typename GetSseSum16x16DualFuncType>
SseSum_SpeedTestDual()1148 void MainTestClass<GetSseSum16x16DualFuncType>::SseSum_SpeedTestDual() {
1149   const int loop_count = 1000000000 / block_size();
1150   for (int idx = 0; idx < block_size(); ++idx) {
1151     src_[idx] = rnd_.Rand8();
1152     ref_[idx] = rnd_.Rand8();
1153   }
1154 
1155   unsigned int sse1[2] = { 0 };
1156   unsigned int sse2[2] = { 0 };
1157   unsigned int var1[2] = { 0 };
1158   unsigned int var2[2] = { 0 };
1159   unsigned int sse_tot_c = 0;
1160   unsigned int sse_tot_simd = 0;
1161   int sum_tot_c = 0;
1162   int sum_tot_simd = 0;
1163   const int stride = width();
1164 
1165   aom_usec_timer timer;
1166   aom_usec_timer_start(&timer);
1167   for (int r = 0; r < loop_count; ++r) {
1168     for (int row = 0; row < height(); row += 16) {
1169       for (int col = 0; col < width(); col += 32) {
1170         aom_get_var_sse_sum_16x16_dual_c(src_ + stride * row + col, stride,
1171                                          ref_ + stride * row + col, stride,
1172                                          sse2, &sse_tot_c, &sum_tot_c, var2);
1173       }
1174     }
1175   }
1176   aom_usec_timer_mark(&timer);
1177   const double elapsed_time_ref =
1178       static_cast<double>(aom_usec_timer_elapsed(&timer));
1179 
1180   aom_usec_timer_start(&timer);
1181   for (int r = 0; r < loop_count; ++r) {
1182     for (int row = 0; row < height(); row += 16) {
1183       for (int col = 0; col < width(); col += 32) {
1184         params_.func(src_ + stride * row + col, stride,
1185                      ref_ + stride * row + col, stride, sse1, &sse_tot_simd,
1186                      &sum_tot_simd, var1);
1187       }
1188     }
1189   }
1190   aom_usec_timer_mark(&timer);
1191   const double elapsed_time_simd =
1192       static_cast<double>(aom_usec_timer_elapsed(&timer));
1193 
1194   printf(
1195       "aom_getvar_16x16_dual for block=%dx%d : ref_time=%lf \t simd_time=%lf "
1196       "\t "
1197       "gain=%lf \n",
1198       width(), height(), elapsed_time_ref, elapsed_time_simd,
1199       elapsed_time_ref / elapsed_time_simd);
1200 }
1201 
1202 ////////////////////////////////////////////////////////////////////////////////
1203 // Tests related to MSE / SSE.
1204 
1205 template <typename FunctionType>
RefTestMse()1206 void MainTestClass<FunctionType>::RefTestMse() {
1207   for (int i = 0; i < 10; ++i) {
1208     for (int j = 0; j < block_size(); ++j) {
1209       if (!use_high_bit_depth()) {
1210         src_[j] = rnd_.Rand8();
1211         ref_[j] = rnd_.Rand8();
1212 #if CONFIG_AV1_HIGHBITDEPTH
1213       } else {
1214         CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1215         CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1216 #endif  // CONFIG_AV1_HIGHBITDEPTH
1217       }
1218     }
1219     unsigned int sse1, sse2;
1220     const int stride = width();
1221     API_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
1222     variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
1223                  stride, &sse2, use_high_bit_depth(), params_.bit_depth);
1224     EXPECT_EQ(sse1, sse2);
1225   }
1226 }
1227 
1228 template <typename FunctionType>
RefTestSse()1229 void MainTestClass<FunctionType>::RefTestSse() {
1230   for (int i = 0; i < 10; ++i) {
1231     for (int j = 0; j < block_size(); ++j) {
1232       src_[j] = rnd_.Rand8();
1233       ref_[j] = rnd_.Rand8();
1234     }
1235     unsigned int sse2;
1236     unsigned int var1;
1237     const int stride = width();
1238     API_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
1239     variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
1240                  stride, &sse2, false, AOM_BITS_8);
1241     EXPECT_EQ(var1, sse2);
1242   }
1243 }
1244 
1245 template <typename FunctionType>
MaxTestMse()1246 void MainTestClass<FunctionType>::MaxTestMse() {
1247   int max_value = (1 << params_.bit_depth) - 1;
1248   if (!use_high_bit_depth()) {
1249     memset(src_, max_value, block_size());
1250     memset(ref_, 0, block_size());
1251 #if CONFIG_AV1_HIGHBITDEPTH
1252   } else {
1253     aom_memset16(CONVERT_TO_SHORTPTR(src_), max_value, block_size());
1254     aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, block_size());
1255 #endif  // CONFIG_AV1_HIGHBITDEPTH
1256   }
1257   unsigned int sse;
1258   API_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
1259   unsigned int expected = (unsigned int)block_size() * max_value * max_value;
1260   switch (params_.bit_depth) {
1261     case AOM_BITS_12: expected = ROUND_POWER_OF_TWO(expected, 8); break;
1262     case AOM_BITS_10: expected = ROUND_POWER_OF_TWO(expected, 4); break;
1263     case AOM_BITS_8:
1264     default: break;
1265   }
1266   EXPECT_EQ(expected, sse);
1267 }
1268 
1269 template <typename FunctionType>
MaxTestSse()1270 void MainTestClass<FunctionType>::MaxTestSse() {
1271   memset(src_, 255, block_size());
1272   memset(ref_, 0, block_size());
1273   unsigned int var;
1274   API_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
1275   const unsigned int expected = block_size() * 255 * 255;
1276   EXPECT_EQ(expected, var);
1277 }
1278 
1279 ////////////////////////////////////////////////////////////////////////////////
1280 
1281 using std::get;
1282 using std::make_tuple;
1283 using std::tuple;
1284 
1285 template <typename FunctionType>
1286 class SubpelVarianceTest
1287     : public ::testing::TestWithParam<TestParams<FunctionType> > {
1288  public:
SetUp()1289   void SetUp() override {
1290     params_ = this->GetParam();
1291 
1292     rnd_.Reset(ACMRandom::DeterministicSeed());
1293     if (!use_high_bit_depth()) {
1294       src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
1295       sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
1296       ref_ = reinterpret_cast<uint8_t *>(
1297           aom_memalign(32, block_size() + width() + height() + 1));
1298     } else {
1299       src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
1300           aom_memalign(32, block_size() * sizeof(uint16_t))));
1301       sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
1302           aom_memalign(32, block_size() * sizeof(uint16_t))));
1303       ref_ = CONVERT_TO_BYTEPTR(aom_memalign(
1304           32, (block_size() + width() + height() + 1) * sizeof(uint16_t)));
1305     }
1306     ASSERT_NE(src_, nullptr);
1307     ASSERT_NE(sec_, nullptr);
1308     ASSERT_NE(ref_, nullptr);
1309   }
1310 
TearDown()1311   void TearDown() override {
1312     if (!use_high_bit_depth()) {
1313       aom_free(src_);
1314       aom_free(ref_);
1315       aom_free(sec_);
1316     } else {
1317       aom_free(CONVERT_TO_SHORTPTR(src_));
1318       aom_free(CONVERT_TO_SHORTPTR(ref_));
1319       aom_free(CONVERT_TO_SHORTPTR(sec_));
1320     }
1321   }
1322 
1323  protected:
1324   void RefTest();
1325   void ExtremeRefTest();
1326   void SpeedTest();
1327 
1328   ACMRandom rnd_;
1329   uint8_t *src_;
1330   uint8_t *ref_;
1331   uint8_t *sec_;
1332   TestParams<FunctionType> params_;
1333   DIST_WTD_COMP_PARAMS jcp_param_;
1334 
1335   // some relay helpers
use_high_bit_depth() const1336   bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const1337   int byte_shift() const { return params_.bit_depth - 8; }
block_size() const1338   int block_size() const { return params_.block_size; }
width() const1339   int width() const { return params_.width; }
height() const1340   int height() const { return params_.height; }
mask() const1341   uint32_t mask() const { return params_.mask; }
1342 };
1343 
1344 template <typename SubpelVarianceFunctionType>
RefTest()1345 void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
1346   for (int x = 0; x < 8; ++x) {
1347     for (int y = 0; y < 8; ++y) {
1348       if (!use_high_bit_depth()) {
1349         for (int j = 0; j < block_size(); j++) {
1350           src_[j] = rnd_.Rand8();
1351         }
1352         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1353           ref_[j] = rnd_.Rand8();
1354         }
1355       } else {
1356         for (int j = 0; j < block_size(); j++) {
1357           CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1358         }
1359         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1360           CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1361         }
1362       }
1363       unsigned int sse1, sse2;
1364       unsigned int var1;
1365       API_REGISTER_STATE_CHECK(
1366           var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
1367       const unsigned int var2 = subpel_variance_ref(
1368           ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
1369           use_high_bit_depth(), params_.bit_depth);
1370       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1371       EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1372     }
1373   }
1374 }
1375 
1376 template <typename SubpelVarianceFunctionType>
ExtremeRefTest()1377 void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
1378   // Compare against reference.
1379   // Src: Set the first half of values to 0, the second half to the maximum.
1380   // Ref: Set the first half of values to the maximum, the second half to 0.
1381   for (int x = 0; x < 8; ++x) {
1382     for (int y = 0; y < 8; ++y) {
1383       const int half = block_size() / 2;
1384       if (!use_high_bit_depth()) {
1385         memset(src_, 0, half);
1386         memset(src_ + half, 255, half);
1387         memset(ref_, 255, half);
1388         memset(ref_ + half, 0, half + width() + height() + 1);
1389       } else {
1390         aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half);
1391         aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
1392         aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
1393         aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(),
1394                      half + width() + height() + 1);
1395       }
1396       unsigned int sse1, sse2;
1397       unsigned int var1;
1398       API_REGISTER_STATE_CHECK(
1399           var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
1400       const unsigned int var2 = subpel_variance_ref(
1401           ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
1402           use_high_bit_depth(), params_.bit_depth);
1403       EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1404       EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1405     }
1406   }
1407 }
1408 
1409 template <typename SubpelVarianceFunctionType>
SpeedTest()1410 void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() {
1411   if (!use_high_bit_depth()) {
1412     for (int j = 0; j < block_size(); j++) {
1413       src_[j] = rnd_.Rand8();
1414     }
1415     for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1416       ref_[j] = rnd_.Rand8();
1417     }
1418   } else {
1419     for (int j = 0; j < block_size(); j++) {
1420       CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1421     }
1422     for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1423       CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1424     }
1425   }
1426 
1427   unsigned int sse1, sse2;
1428   int run_time = 1000000000 / block_size();
1429   aom_usec_timer timer;
1430 
1431   aom_usec_timer_start(&timer);
1432   for (int i = 0; i < run_time; ++i) {
1433     int x = rnd_(8);
1434     int y = rnd_(8);
1435     params_.func(ref_, width() + 1, x, y, src_, width(), &sse1);
1436   }
1437   aom_usec_timer_mark(&timer);
1438 
1439   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
1440 
1441   aom_usec_timer timer_c;
1442 
1443   aom_usec_timer_start(&timer_c);
1444   for (int i = 0; i < run_time; ++i) {
1445     int x = rnd_(8);
1446     int y = rnd_(8);
1447     subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y,
1448                         &sse2, use_high_bit_depth(), params_.bit_depth);
1449   }
1450   aom_usec_timer_mark(&timer_c);
1451 
1452   const int elapsed_time_c = static_cast<int>(aom_usec_timer_elapsed(&timer_c));
1453 
1454   printf(
1455       "sub_pixel_variance_%dx%d_%d: ref_time=%d us opt_time=%d us gain=%d \n",
1456       width(), height(), params_.bit_depth, elapsed_time_c, elapsed_time,
1457       elapsed_time_c / elapsed_time);
1458 }
1459 
1460 template <>
RefTest()1461 void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
1462   for (int x = 0; x < 8; ++x) {
1463     for (int y = 0; y < 8; ++y) {
1464       if (!use_high_bit_depth()) {
1465         for (int j = 0; j < block_size(); j++) {
1466           src_[j] = rnd_.Rand8();
1467           sec_[j] = rnd_.Rand8();
1468         }
1469         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1470           ref_[j] = rnd_.Rand8();
1471         }
1472       } else {
1473         for (int j = 0; j < block_size(); j++) {
1474           CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1475           CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
1476         }
1477         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1478           CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1479         }
1480       }
1481       uint32_t sse1, sse2;
1482       uint32_t var1, var2;
1483       API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y,
1484                                                    src_, width(), &sse1, sec_));
1485       var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width,
1486                                      params_.log2height, x, y, &sse2,
1487                                      use_high_bit_depth(), params_.bit_depth);
1488       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1489       EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1490     }
1491   }
1492 }
1493 
1494 template <>
RefTest()1495 void SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>::RefTest() {
1496   for (int x = 0; x < 8; ++x) {
1497     for (int y = 0; y < 8; ++y) {
1498       if (!use_high_bit_depth()) {
1499         for (int j = 0; j < block_size(); j++) {
1500           src_[j] = rnd_.Rand8();
1501           sec_[j] = rnd_.Rand8();
1502         }
1503         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1504           ref_[j] = rnd_.Rand8();
1505         }
1506       } else {
1507         for (int j = 0; j < block_size(); j++) {
1508           CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1509           CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
1510         }
1511         for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1512           CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1513         }
1514       }
1515       for (int x0 = 0; x0 < 2; ++x0) {
1516         for (int y0 = 0; y0 < 4; ++y0) {
1517           uint32_t sse1, sse2;
1518           uint32_t var1, var2;
1519           jcp_param_.fwd_offset = quant_dist_lookup_table[y0][x0];
1520           jcp_param_.bck_offset = quant_dist_lookup_table[y0][1 - x0];
1521           API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 0, x, y,
1522                                                        src_, width(), &sse1,
1523                                                        sec_, &jcp_param_));
1524           var2 = dist_wtd_subpel_avg_variance_ref(
1525               ref_, src_, sec_, params_.log2width, params_.log2height, x, y,
1526               &sse2, use_high_bit_depth(), params_.bit_depth, &jcp_param_);
1527           EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1528           EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1529         }
1530       }
1531     }
1532   }
1533 }
1534 
1535 ////////////////////////////////////////////////////////////////////////////////
1536 
1537 #if !CONFIG_REALTIME_ONLY
1538 
1539 static const int kMaskMax = 64;
1540 
1541 typedef TestParams<ObmcSubpelVarFunc> ObmcSubpelVarianceParams;
1542 
1543 template <typename FunctionType>
1544 class ObmcVarianceTest
1545     : public ::testing::TestWithParam<TestParams<FunctionType> > {
1546  public:
SetUp()1547   void SetUp() override {
1548     params_ = this->GetParam();
1549 
1550     rnd_.Reset(ACMRandom::DeterministicSeed());
1551     if (!use_high_bit_depth()) {
1552       pre_ = reinterpret_cast<uint8_t *>(
1553           aom_memalign(32, block_size() + width() + height() + 1));
1554     } else {
1555       pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(aom_memalign(
1556           32, (block_size() + width() + height() + 1) * sizeof(uint16_t))));
1557     }
1558     wsrc_ = reinterpret_cast<int32_t *>(
1559         aom_memalign(32, block_size() * sizeof(uint32_t)));
1560     mask_ = reinterpret_cast<int32_t *>(
1561         aom_memalign(32, block_size() * sizeof(uint32_t)));
1562     ASSERT_NE(pre_, nullptr);
1563     ASSERT_NE(wsrc_, nullptr);
1564     ASSERT_NE(mask_, nullptr);
1565   }
1566 
TearDown()1567   void TearDown() override {
1568     if (!use_high_bit_depth()) {
1569       aom_free(pre_);
1570     } else {
1571       aom_free(CONVERT_TO_SHORTPTR(pre_));
1572     }
1573     aom_free(wsrc_);
1574     aom_free(mask_);
1575   }
1576 
1577  protected:
1578   void RefTest();
1579   void ExtremeRefTest();
1580   void SpeedTest();
1581 
1582   ACMRandom rnd_;
1583   uint8_t *pre_;
1584   int32_t *wsrc_;
1585   int32_t *mask_;
1586   TestParams<FunctionType> params_;
1587 
1588   // some relay helpers
use_high_bit_depth() const1589   bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const1590   int byte_shift() const { return params_.bit_depth - 8; }
block_size() const1591   int block_size() const { return params_.block_size; }
width() const1592   int width() const { return params_.width; }
height() const1593   int height() const { return params_.height; }
bd_mask() const1594   uint32_t bd_mask() const { return params_.mask; }
1595 };
1596 
1597 template <>
RefTest()1598 void ObmcVarianceTest<ObmcSubpelVarFunc>::RefTest() {
1599   for (int x = 0; x < 8; ++x) {
1600     for (int y = 0; y < 8; ++y) {
1601       if (!use_high_bit_depth())
1602         for (int j = 0; j < block_size() + width() + height() + 1; j++)
1603           pre_[j] = rnd_.Rand8();
1604       else
1605         for (int j = 0; j < block_size() + width() + height() + 1; j++)
1606           CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
1607       for (int j = 0; j < block_size(); j++) {
1608         wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
1609         mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
1610       }
1611 
1612       uint32_t sse1, sse2;
1613       uint32_t var1, var2;
1614       API_REGISTER_STATE_CHECK(
1615           var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
1616       var2 = obmc_subpel_variance_ref(
1617           pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
1618           &sse2, use_high_bit_depth(), params_.bit_depth);
1619       EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1620       EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1621     }
1622   }
1623 }
1624 
1625 template <>
ExtremeRefTest()1626 void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() {
1627   // Pre: Set the first half of values to the maximum, the second half to 0.
1628   // Mask: same as above
1629   // WSrc: Set the first half of values to 0, the second half to the maximum.
1630   for (int x = 0; x < 8; ++x) {
1631     for (int y = 0; y < 8; ++y) {
1632       const int half = block_size() / 2;
1633       if (!use_high_bit_depth()) {
1634         memset(pre_, 255, half);
1635         memset(pre_ + half, 0, half + width() + height() + 1);
1636       } else {
1637         aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half);
1638         aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0,
1639                      half + width() + height() + 1);
1640       }
1641       for (int j = 0; j < half; j++) {
1642         wsrc_[j] = bd_mask() * kMaskMax * kMaskMax;
1643         mask_[j] = 0;
1644       }
1645       for (int j = half; j < block_size(); j++) {
1646         wsrc_[j] = 0;
1647         mask_[j] = kMaskMax * kMaskMax;
1648       }
1649 
1650       uint32_t sse1, sse2;
1651       uint32_t var1, var2;
1652       API_REGISTER_STATE_CHECK(
1653           var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
1654       var2 = obmc_subpel_variance_ref(
1655           pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
1656           &sse2, use_high_bit_depth(), params_.bit_depth);
1657       EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1658       EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1659     }
1660   }
1661 }
1662 
1663 template <>
SpeedTest()1664 void ObmcVarianceTest<ObmcSubpelVarFunc>::SpeedTest() {
1665   if (!use_high_bit_depth())
1666     for (int j = 0; j < block_size() + width() + height() + 1; j++)
1667       pre_[j] = rnd_.Rand8();
1668   else
1669     for (int j = 0; j < block_size() + width() + height() + 1; j++)
1670       CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
1671   for (int j = 0; j < block_size(); j++) {
1672     wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
1673     mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
1674   }
1675   unsigned int sse1;
1676   const int stride = width() + 1;
1677   int run_time = 1000000000 / block_size();
1678   aom_usec_timer timer;
1679 
1680   aom_usec_timer_start(&timer);
1681   for (int i = 0; i < run_time; ++i) {
1682     int x = rnd_(8);
1683     int y = rnd_(8);
1684     API_REGISTER_STATE_CHECK(
1685         params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1));
1686   }
1687   aom_usec_timer_mark(&timer);
1688 
1689   const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
1690   printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(),
1691          params_.bit_depth, elapsed_time);
1692 }
1693 
1694 #endif  // !CONFIG_REALTIME_ONLY
1695 
1696 typedef MseWxHTestClass<MseWxH16bitFunc> MseWxHTest;
1697 typedef Mse16xHTestClass<Mse16xH16bitFunc> Mse16xHTest;
1698 typedef MainTestClass<VarianceMxNFunc> AvxMseTest;
1699 typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest;
1700 typedef MainTestClass<GetSseSum8x8QuadFunc> GetSseSum8x8QuadTest;
1701 typedef MainTestClass<GetSseSum16x16DualFunc> GetSseSum16x16DualTest;
1702 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest;
1703 typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest;
1704 typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
1705     AvxDistWtdSubpelAvgVarianceTest;
1706 #if !CONFIG_REALTIME_ONLY
1707 typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxObmcSubpelVarianceTest;
1708 #endif
1709 typedef TestParams<MseWxH16bitFunc> MseWxHParams;
1710 typedef TestParams<Mse16xH16bitFunc> Mse16xHParams;
1711 
TEST_P(MseWxHTest,RefMse)1712 TEST_P(MseWxHTest, RefMse) { RefMatchTestMse(); }
TEST_P(MseWxHTest,DISABLED_SpeedMse)1713 TEST_P(MseWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(Mse16xHTest,RefMse)1714 TEST_P(Mse16xHTest, RefMse) { RefMatchTestMse(); }
TEST_P(Mse16xHTest,RefMseExtreme)1715 TEST_P(Mse16xHTest, RefMseExtreme) { RefMatchExtremeTestMse(); }
TEST_P(Mse16xHTest,DISABLED_SpeedMse)1716 TEST_P(Mse16xHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxMseTest,RefMse)1717 TEST_P(AvxMseTest, RefMse) { RefTestMse(); }
TEST_P(AvxMseTest,MaxMse)1718 TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); }
TEST_P(AvxVarianceTest,Zero)1719 TEST_P(AvxVarianceTest, Zero) { ZeroTest(); }
TEST_P(AvxVarianceTest,Ref)1720 TEST_P(AvxVarianceTest, Ref) { RefTest(); }
TEST_P(AvxVarianceTest,RefStride)1721 TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); }
TEST_P(AvxVarianceTest,OneQuarter)1722 TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); }
TEST_P(AvxVarianceTest,DISABLED_Speed)1723 TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(GetSseSum8x8QuadTest,RefMseSum)1724 TEST_P(GetSseSum8x8QuadTest, RefMseSum) { RefTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,MinSseSum)1725 TEST_P(GetSseSum8x8QuadTest, MinSseSum) { MinTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,MaxMseSum)1726 TEST_P(GetSseSum8x8QuadTest, MaxMseSum) { MaxTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,DISABLED_Speed)1727 TEST_P(GetSseSum8x8QuadTest, DISABLED_Speed) { SseSum_SpeedTest(); }
TEST_P(GetSseSum16x16DualTest,RefMseSum)1728 TEST_P(GetSseSum16x16DualTest, RefMseSum) { RefTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,MinSseSum)1729 TEST_P(GetSseSum16x16DualTest, MinSseSum) { MinTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,MaxMseSum)1730 TEST_P(GetSseSum16x16DualTest, MaxMseSum) { MaxTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,DISABLED_Speed)1731 TEST_P(GetSseSum16x16DualTest, DISABLED_Speed) { SseSum_SpeedTestDual(); }
TEST_P(SumOfSquaresTest,Const)1732 TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
TEST_P(SumOfSquaresTest,Ref)1733 TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
TEST_P(AvxSubpelVarianceTest,Ref)1734 TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxSubpelVarianceTest,ExtremeRef)1735 TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxSubpelVarianceTest,DISABLED_Speed)1736 TEST_P(AvxSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxSubpelAvgVarianceTest,Ref)1737 TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); }
TEST_P(AvxDistWtdSubpelAvgVarianceTest,Ref)1738 TEST_P(AvxDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
1739 #if !CONFIG_REALTIME_ONLY
TEST_P(AvxObmcSubpelVarianceTest,Ref)1740 TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxObmcSubpelVarianceTest,ExtremeRef)1741 TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxObmcSubpelVarianceTest,DISABLED_Speed)1742 TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
1743 #endif
1744 
1745 INSTANTIATE_TEST_SUITE_P(
1746     C, MseWxHTest,
1747     ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_c, 8),
1748                       MseWxHParams(3, 2, &aom_mse_wxh_16bit_c, 8),
1749                       MseWxHParams(2, 3, &aom_mse_wxh_16bit_c, 8),
1750                       MseWxHParams(2, 2, &aom_mse_wxh_16bit_c, 8)));
1751 
1752 INSTANTIATE_TEST_SUITE_P(
1753     C, Mse16xHTest,
1754     ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_c, 8),
1755                       Mse16xHParams(3, 2, &aom_mse_16xh_16bit_c, 8),
1756                       Mse16xHParams(2, 3, &aom_mse_16xh_16bit_c, 8),
1757                       Mse16xHParams(2, 2, &aom_mse_16xh_16bit_c, 8)));
1758 
1759 INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest,
1760                          ::testing::Values(aom_get_mb_ss_c));
1761 
1762 typedef TestParams<VarianceMxNFunc> MseParams;
1763 INSTANTIATE_TEST_SUITE_P(C, AvxMseTest,
1764                          ::testing::Values(MseParams(4, 4, &aom_mse16x16_c),
1765                                            MseParams(4, 3, &aom_mse16x8_c),
1766                                            MseParams(3, 4, &aom_mse8x16_c),
1767                                            MseParams(3, 3, &aom_mse8x8_c)));
1768 
1769 typedef TestParams<VarianceMxNFunc> VarianceParams;
1770 const VarianceParams kArrayVariance_c[] = {
1771   VarianceParams(7, 7, &aom_variance128x128_c),
1772   VarianceParams(7, 6, &aom_variance128x64_c),
1773   VarianceParams(6, 7, &aom_variance64x128_c),
1774   VarianceParams(6, 6, &aom_variance64x64_c),
1775   VarianceParams(6, 5, &aom_variance64x32_c),
1776   VarianceParams(5, 6, &aom_variance32x64_c),
1777   VarianceParams(5, 5, &aom_variance32x32_c),
1778   VarianceParams(5, 4, &aom_variance32x16_c),
1779   VarianceParams(4, 5, &aom_variance16x32_c),
1780   VarianceParams(4, 4, &aom_variance16x16_c),
1781   VarianceParams(4, 3, &aom_variance16x8_c),
1782   VarianceParams(3, 4, &aom_variance8x16_c),
1783   VarianceParams(3, 3, &aom_variance8x8_c),
1784   VarianceParams(3, 2, &aom_variance8x4_c),
1785   VarianceParams(2, 3, &aom_variance4x8_c),
1786   VarianceParams(2, 2, &aom_variance4x4_c),
1787 #if !CONFIG_REALTIME_ONLY
1788   VarianceParams(6, 4, &aom_variance64x16_c),
1789   VarianceParams(4, 6, &aom_variance16x64_c),
1790   VarianceParams(5, 3, &aom_variance32x8_c),
1791   VarianceParams(3, 5, &aom_variance8x32_c),
1792   VarianceParams(4, 2, &aom_variance16x4_c),
1793   VarianceParams(2, 4, &aom_variance4x16_c),
1794 #endif
1795 };
1796 INSTANTIATE_TEST_SUITE_P(C, AvxVarianceTest,
1797                          ::testing::ValuesIn(kArrayVariance_c));
1798 
1799 typedef TestParams<GetSseSum8x8QuadFunc> GetSseSumParams;
1800 const GetSseSumParams kArrayGetSseSum8x8Quad_c[] = {
1801   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_c, 0),
1802   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_c, 0),
1803   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_c, 0),
1804   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_c, 0)
1805 };
1806 INSTANTIATE_TEST_SUITE_P(C, GetSseSum8x8QuadTest,
1807                          ::testing::ValuesIn(kArrayGetSseSum8x8Quad_c));
1808 
1809 typedef TestParams<GetSseSum16x16DualFunc> GetSseSumParamsDual;
1810 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_c[] = {
1811   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_c, 0),
1812   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_c, 0),
1813   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_c, 0),
1814   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_c, 0)
1815 };
1816 
1817 INSTANTIATE_TEST_SUITE_P(C, GetSseSum16x16DualTest,
1818                          ::testing::ValuesIn(kArrayGetSseSum16x16Dual_c));
1819 
1820 typedef TestParams<SubpixVarMxNFunc> SubpelVarianceParams;
1821 const SubpelVarianceParams kArraySubpelVariance_c[] = {
1822   SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0),
1823   SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0),
1824   SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0),
1825   SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0),
1826   SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0),
1827   SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0),
1828   SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0),
1829   SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0),
1830   SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0),
1831   SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0),
1832   SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0),
1833   SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0),
1834   SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0),
1835   SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0),
1836   SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0),
1837   SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0),
1838 #if !CONFIG_REALTIME_ONLY
1839   SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0),
1840   SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0),
1841   SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0),
1842   SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0),
1843   SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0),
1844   SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0),
1845 #endif
1846 };
1847 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelVarianceTest,
1848                          ::testing::ValuesIn(kArraySubpelVariance_c));
1849 
1850 typedef TestParams<SubpixAvgVarMxNFunc> SubpelAvgVarianceParams;
1851 const SubpelAvgVarianceParams kArraySubpelAvgVariance_c[] = {
1852   SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0),
1853   SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0),
1854   SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0),
1855   SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0),
1856   SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0),
1857   SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0),
1858   SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0),
1859   SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0),
1860   SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0),
1861   SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0),
1862   SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0),
1863   SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0),
1864   SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0),
1865   SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0),
1866   SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
1867   SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0),
1868 #if !CONFIG_REALTIME_ONLY
1869   SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0),
1870   SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0),
1871   SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0),
1872   SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0),
1873   SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0),
1874   SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0),
1875 #endif
1876 };
1877 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelAvgVarianceTest,
1878                          ::testing::ValuesIn(kArraySubpelAvgVariance_c));
1879 
1880 typedef TestParams<DistWtdSubpixAvgVarMxNFunc> DistWtdSubpelAvgVarianceParams;
1881 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_c[] = {
1882   DistWtdSubpelAvgVarianceParams(
1883       6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_c, 0),
1884   DistWtdSubpelAvgVarianceParams(
1885       6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_c, 0),
1886   DistWtdSubpelAvgVarianceParams(
1887       5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_c, 0),
1888   DistWtdSubpelAvgVarianceParams(
1889       5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_c, 0),
1890   DistWtdSubpelAvgVarianceParams(
1891       5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_c, 0),
1892   DistWtdSubpelAvgVarianceParams(
1893       4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_c, 0),
1894   DistWtdSubpelAvgVarianceParams(
1895       4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_c, 0),
1896   DistWtdSubpelAvgVarianceParams(4, 3,
1897                                  &aom_dist_wtd_sub_pixel_avg_variance16x8_c, 0),
1898   DistWtdSubpelAvgVarianceParams(3, 4,
1899                                  &aom_dist_wtd_sub_pixel_avg_variance8x16_c, 0),
1900   DistWtdSubpelAvgVarianceParams(3, 3,
1901                                  &aom_dist_wtd_sub_pixel_avg_variance8x8_c, 0),
1902   DistWtdSubpelAvgVarianceParams(3, 2,
1903                                  &aom_dist_wtd_sub_pixel_avg_variance8x4_c, 0),
1904   DistWtdSubpelAvgVarianceParams(2, 3,
1905                                  &aom_dist_wtd_sub_pixel_avg_variance4x8_c, 0),
1906   DistWtdSubpelAvgVarianceParams(2, 2,
1907                                  &aom_dist_wtd_sub_pixel_avg_variance4x4_c, 0),
1908 #if !CONFIG_REALTIME_ONLY
1909 
1910   DistWtdSubpelAvgVarianceParams(
1911       6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_c, 0),
1912   DistWtdSubpelAvgVarianceParams(
1913       4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_c, 0),
1914   DistWtdSubpelAvgVarianceParams(5, 3,
1915                                  &aom_dist_wtd_sub_pixel_avg_variance32x8_c, 0),
1916   DistWtdSubpelAvgVarianceParams(3, 5,
1917                                  &aom_dist_wtd_sub_pixel_avg_variance8x32_c, 0),
1918   DistWtdSubpelAvgVarianceParams(4, 2,
1919                                  &aom_dist_wtd_sub_pixel_avg_variance16x4_c, 0),
1920   DistWtdSubpelAvgVarianceParams(2, 4,
1921                                  &aom_dist_wtd_sub_pixel_avg_variance4x16_c, 0),
1922 #endif
1923 };
1924 INSTANTIATE_TEST_SUITE_P(C, AvxDistWtdSubpelAvgVarianceTest,
1925                          ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_c));
1926 
1927 #if !CONFIG_REALTIME_ONLY
1928 INSTANTIATE_TEST_SUITE_P(
1929     C, AvxObmcSubpelVarianceTest,
1930     ::testing::Values(
1931         ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c,
1932                                  0),
1933         ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0),
1934         ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0),
1935         ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0),
1936         ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0),
1937         ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0),
1938         ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0),
1939         ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0),
1940         ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0),
1941         ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0),
1942         ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0),
1943         ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0),
1944         ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0),
1945         ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0),
1946         ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0),
1947         ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0),
1948 
1949         ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0),
1950         ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0),
1951         ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0),
1952         ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0),
1953         ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0),
1954         ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0)));
1955 #endif
1956 
1957 #if CONFIG_AV1_HIGHBITDEPTH
1958 typedef uint64_t (*MseHBDWxH16bitFunc)(uint16_t *dst, int dstride,
1959                                        uint16_t *src, int sstride, int w,
1960                                        int h);
1961 
1962 template <typename FunctionType>
1963 class MseHBDWxHTestClass
1964     : public ::testing::TestWithParam<TestParams<FunctionType> > {
1965  public:
SetUp()1966   void SetUp() override {
1967     params_ = this->GetParam();
1968 
1969     rnd_.Reset(ACMRandom::DeterministicSeed());
1970     src_ = reinterpret_cast<uint16_t *>(
1971         aom_memalign(16, block_size() * sizeof(src_)));
1972     dst_ = reinterpret_cast<uint16_t *>(
1973         aom_memalign(16, block_size() * sizeof(dst_)));
1974     ASSERT_NE(src_, nullptr);
1975     ASSERT_NE(dst_, nullptr);
1976   }
1977 
TearDown()1978   void TearDown() override {
1979     aom_free(src_);
1980     aom_free(dst_);
1981     src_ = nullptr;
1982     dst_ = nullptr;
1983   }
1984 
1985  protected:
1986   void RefMatchTestMse();
1987   void SpeedTest();
1988 
1989  protected:
1990   ACMRandom rnd_;
1991   uint16_t *dst_;
1992   uint16_t *src_;
1993   TestParams<FunctionType> params_;
1994 
1995   // some relay helpers
block_size() const1996   int block_size() const { return params_.block_size; }
width() const1997   int width() const { return params_.width; }
d_stride() const1998   int d_stride() const { return params_.width; }  // stride is same as width
s_stride() const1999   int s_stride() const { return params_.width; }  // stride is same as width
height() const2000   int height() const { return params_.height; }
mask() const2001   int mask() const { return params_.mask; }
2002 };
2003 
2004 template <typename MseHBDWxHFunctionType>
SpeedTest()2005 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::SpeedTest() {
2006   aom_usec_timer ref_timer, test_timer;
2007   double elapsed_time_c = 0;
2008   double elapsed_time_simd = 0;
2009   int run_time = 10000000;
2010   int w = width();
2011   int h = height();
2012   int dstride = d_stride();
2013   int sstride = s_stride();
2014   for (int k = 0; k < block_size(); ++k) {
2015     dst_[k] = rnd_.Rand16() & mask();
2016     src_[k] = rnd_.Rand16() & mask();
2017   }
2018   aom_usec_timer_start(&ref_timer);
2019   for (int i = 0; i < run_time; i++) {
2020     aom_mse_wxh_16bit_highbd_c(dst_, dstride, src_, sstride, w, h);
2021   }
2022   aom_usec_timer_mark(&ref_timer);
2023   elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
2024 
2025   aom_usec_timer_start(&test_timer);
2026   for (int i = 0; i < run_time; i++) {
2027     params_.func(dst_, dstride, src_, sstride, w, h);
2028   }
2029   aom_usec_timer_mark(&test_timer);
2030   elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
2031 
2032   printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
2033          elapsed_time_c, elapsed_time_simd,
2034          (elapsed_time_c / elapsed_time_simd));
2035 }
2036 
2037 template <typename MseHBDWxHFunctionType>
RefMatchTestMse()2038 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::RefMatchTestMse() {
2039   uint64_t mse_ref = 0;
2040   uint64_t mse_mod = 0;
2041   int w = width();
2042   int h = height();
2043   int dstride = d_stride();
2044   int sstride = s_stride();
2045   for (int i = 0; i < 10; i++) {
2046     for (int k = 0; k < block_size(); ++k) {
2047       dst_[k] = rnd_.Rand16() & mask();
2048       src_[k] = rnd_.Rand16() & mask();
2049     }
2050     API_REGISTER_STATE_CHECK(mse_ref = aom_mse_wxh_16bit_highbd_c(
2051                                  dst_, dstride, src_, sstride, w, h));
2052     API_REGISTER_STATE_CHECK(
2053         mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
2054     EXPECT_EQ(mse_ref, mse_mod)
2055         << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
2056   }
2057 }
2058 
2059 typedef TestParams<MseHBDWxH16bitFunc> MseHBDWxHParams;
2060 typedef MseHBDWxHTestClass<MseHBDWxH16bitFunc> MseHBDWxHTest;
2061 typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest;
2062 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDMseTest);
2063 typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest;
2064 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest;
2065 typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxHBDSubpelAvgVarianceTest;
2066 typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
2067     AvxHBDDistWtdSubpelAvgVarianceTest;
2068 #if !CONFIG_REALTIME_ONLY
2069 typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxHBDObmcSubpelVarianceTest;
2070 #endif
2071 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDObmcSubpelVarianceTest);
2072 
TEST_P(MseHBDWxHTest,RefMse)2073 TEST_P(MseHBDWxHTest, RefMse) { RefMatchTestMse(); }
TEST_P(MseHBDWxHTest,DISABLED_SpeedMse)2074 TEST_P(MseHBDWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxHBDMseTest,RefMse)2075 TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); }
TEST_P(AvxHBDMseTest,MaxMse)2076 TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); }
TEST_P(AvxHBDMseTest,DISABLED_SpeedMse)2077 TEST_P(AvxHBDMseTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxHBDVarianceTest,Zero)2078 TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); }
TEST_P(AvxHBDVarianceTest,Ref)2079 TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDVarianceTest,RefStride)2080 TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); }
TEST_P(AvxHBDVarianceTest,OneQuarter)2081 TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); }
TEST_P(AvxHBDVarianceTest,DISABLED_Speed)2082 TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxHBDSubpelVarianceTest,Ref)2083 TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDSubpelVarianceTest,ExtremeRef)2084 TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxHBDSubpelVarianceTest,DISABLED_Speed)2085 TEST_P(AvxHBDSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxHBDSubpelAvgVarianceTest,Ref)2086 TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest,Ref)2087 TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
2088 #if !CONFIG_REALTIME_ONLY
TEST_P(AvxHBDObmcSubpelVarianceTest,Ref)2089 TEST_P(AvxHBDObmcSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDObmcSubpelVarianceTest,ExtremeRef)2090 TEST_P(AvxHBDObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxHBDObmcSubpelVarianceTest,DISABLED_Speed)2091 TEST_P(AvxHBDObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
2092 #endif
2093 
2094 INSTANTIATE_TEST_SUITE_P(
2095     C, MseHBDWxHTest,
2096     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_c, 10),
2097                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_c, 10),
2098                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_c, 10),
2099                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_c, 10)));
2100 
2101 INSTANTIATE_TEST_SUITE_P(
2102     C, AvxHBDMseTest,
2103     ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_c, 12),
2104                       MseParams(4, 3, &aom_highbd_12_mse16x8_c, 12),
2105                       MseParams(3, 4, &aom_highbd_12_mse8x16_c, 12),
2106                       MseParams(3, 3, &aom_highbd_12_mse8x8_c, 12),
2107                       MseParams(4, 4, &aom_highbd_10_mse16x16_c, 10),
2108                       MseParams(4, 3, &aom_highbd_10_mse16x8_c, 10),
2109                       MseParams(3, 4, &aom_highbd_10_mse8x16_c, 10),
2110                       MseParams(3, 3, &aom_highbd_10_mse8x8_c, 10),
2111                       MseParams(4, 4, &aom_highbd_8_mse16x16_c, 8),
2112                       MseParams(4, 3, &aom_highbd_8_mse16x8_c, 8),
2113                       MseParams(3, 4, &aom_highbd_8_mse8x16_c, 8),
2114                       MseParams(3, 3, &aom_highbd_8_mse8x8_c, 8)));
2115 
2116 #if HAVE_NEON
2117 INSTANTIATE_TEST_SUITE_P(
2118     NEON, MseHBDWxHTest,
2119     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
2120                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_neon, 10),
2121                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
2122                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_neon,
2123                                       10)));
2124 
2125 INSTANTIATE_TEST_SUITE_P(
2126     NEON, AvxHBDMseTest,
2127     ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_neon, 12),
2128                       MseParams(4, 3, &aom_highbd_12_mse16x8_neon, 12),
2129                       MseParams(3, 4, &aom_highbd_12_mse8x16_neon, 12),
2130                       MseParams(3, 3, &aom_highbd_12_mse8x8_neon, 12),
2131                       MseParams(4, 4, &aom_highbd_10_mse16x16_neon, 10),
2132                       MseParams(4, 3, &aom_highbd_10_mse16x8_neon, 10),
2133                       MseParams(3, 4, &aom_highbd_10_mse8x16_neon, 10),
2134                       MseParams(3, 3, &aom_highbd_10_mse8x8_neon, 10),
2135                       MseParams(4, 4, &aom_highbd_8_mse16x16_neon, 8),
2136                       MseParams(4, 3, &aom_highbd_8_mse16x8_neon, 8),
2137                       MseParams(3, 4, &aom_highbd_8_mse8x16_neon, 8),
2138                       MseParams(3, 3, &aom_highbd_8_mse8x8_neon, 8)));
2139 #endif  // HAVE_NEON
2140 
2141 #if HAVE_NEON_DOTPROD
2142 INSTANTIATE_TEST_SUITE_P(
2143     NEON_DOTPROD, AvxHBDMseTest,
2144     ::testing::Values(MseParams(4, 4, &aom_highbd_8_mse16x16_neon_dotprod, 8),
2145                       MseParams(4, 3, &aom_highbd_8_mse16x8_neon_dotprod, 8),
2146                       MseParams(3, 4, &aom_highbd_8_mse8x16_neon_dotprod, 8),
2147                       MseParams(3, 3, &aom_highbd_8_mse8x8_neon_dotprod, 8)));
2148 #endif  // HAVE_NEON_DOTPROD
2149 
2150 #if HAVE_SVE
2151 INSTANTIATE_TEST_SUITE_P(
2152     SVE, MseHBDWxHTest,
2153     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
2154                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sve, 10),
2155                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
2156                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sve,
2157                                       10)));
2158 
2159 INSTANTIATE_TEST_SUITE_P(
2160     SVE, AvxHBDMseTest,
2161     ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sve, 12),
2162                       MseParams(4, 3, &aom_highbd_12_mse16x8_sve, 12),
2163                       MseParams(3, 4, &aom_highbd_12_mse8x16_sve, 12),
2164                       MseParams(3, 3, &aom_highbd_12_mse8x8_sve, 12),
2165                       MseParams(4, 4, &aom_highbd_10_mse16x16_sve, 10),
2166                       MseParams(4, 3, &aom_highbd_10_mse16x8_sve, 10),
2167                       MseParams(3, 4, &aom_highbd_10_mse8x16_sve, 10),
2168                       MseParams(3, 3, &aom_highbd_10_mse8x8_sve, 10)));
2169 #endif  // HAVE_SVE
2170 
2171 const VarianceParams kArrayHBDVariance_c[] = {
2172   VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12),
2173   VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12),
2174   VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12),
2175   VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12),
2176   VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12),
2177   VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12),
2178   VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12),
2179   VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12),
2180   VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12),
2181   VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12),
2182   VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12),
2183   VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12),
2184   VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12),
2185   VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12),
2186   VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12),
2187   VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12),
2188   VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10),
2189   VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10),
2190   VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10),
2191   VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10),
2192   VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10),
2193   VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10),
2194   VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10),
2195   VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10),
2196   VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10),
2197   VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10),
2198   VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10),
2199   VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10),
2200   VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10),
2201   VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10),
2202   VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10),
2203   VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10),
2204   VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8),
2205   VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8),
2206   VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8),
2207   VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8),
2208   VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8),
2209   VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8),
2210   VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8),
2211   VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8),
2212   VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8),
2213   VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8),
2214   VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8),
2215   VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8),
2216   VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8),
2217   VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8),
2218   VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8),
2219   VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8),
2220 #if !CONFIG_REALTIME_ONLY
2221   VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12),
2222   VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12),
2223   VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12),
2224   VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12),
2225   VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12),
2226   VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12),
2227   VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10),
2228   VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10),
2229   VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10),
2230   VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10),
2231   VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10),
2232   VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10),
2233   VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8),
2234   VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8),
2235   VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8),
2236   VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8),
2237   VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8),
2238   VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8),
2239 #endif
2240 };
2241 INSTANTIATE_TEST_SUITE_P(C, AvxHBDVarianceTest,
2242                          ::testing::ValuesIn(kArrayHBDVariance_c));
2243 
2244 #if HAVE_SSE4_1
2245 INSTANTIATE_TEST_SUITE_P(
2246     SSE4_1, AvxHBDVarianceTest,
2247     ::testing::Values(
2248         VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8),
2249         VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10),
2250         VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12)));
2251 #endif  // HAVE_SSE4_1
2252 
2253 const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
2254   SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8),
2255   SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8),
2256   SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8),
2257   SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8),
2258   SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8),
2259   SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8),
2260   SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8),
2261   SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8),
2262   SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8),
2263   SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8),
2264   SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8),
2265   SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8),
2266   SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8),
2267   SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8),
2268   SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8),
2269   SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8),
2270   SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10),
2271   SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10),
2272   SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10),
2273   SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10),
2274   SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10),
2275   SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10),
2276   SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10),
2277   SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10),
2278   SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10),
2279   SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10),
2280   SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10),
2281   SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10),
2282   SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10),
2283   SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10),
2284   SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10),
2285   SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10),
2286   SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12),
2287   SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12),
2288   SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12),
2289   SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12),
2290   SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12),
2291   SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12),
2292   SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12),
2293   SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12),
2294   SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12),
2295   SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12),
2296   SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12),
2297   SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12),
2298   SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12),
2299   SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12),
2300   SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12),
2301   SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12),
2302 #if !CONFIG_REALTIME_ONLY
2303   SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8),
2304   SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8),
2305   SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8),
2306   SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8),
2307   SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8),
2308   SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8),
2309   SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10),
2310   SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10),
2311   SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10),
2312   SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10),
2313   SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10),
2314   SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10),
2315   SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12),
2316   SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12),
2317   SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12),
2318   SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12),
2319   SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12),
2320   SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12),
2321 #endif
2322 };
2323 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelVarianceTest,
2324                          ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
2325 
2326 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
2327   SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c,
2328                           8),
2329   SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c,
2330                           8),
2331   SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c,
2332                           8),
2333   SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8),
2334   SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8),
2335   SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8),
2336   SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8),
2337   SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8),
2338   SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8),
2339   SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8),
2340   SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8),
2341   SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8),
2342   SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8),
2343   SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8),
2344   SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8),
2345   SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8),
2346   SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c,
2347                           10),
2348   SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c,
2349                           10),
2350   SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c,
2351                           10),
2352   SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c,
2353                           10),
2354   SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c,
2355                           10),
2356   SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c,
2357                           10),
2358   SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c,
2359                           10),
2360   SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c,
2361                           10),
2362   SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c,
2363                           10),
2364   SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c,
2365                           10),
2366   SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c,
2367                           10),
2368   SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c,
2369                           10),
2370   SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10),
2371   SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10),
2372   SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10),
2373   SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10),
2374   SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c,
2375                           12),
2376   SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c,
2377                           12),
2378   SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c,
2379                           12),
2380   SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c,
2381                           12),
2382   SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c,
2383                           12),
2384   SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c,
2385                           12),
2386   SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c,
2387                           12),
2388   SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c,
2389                           12),
2390   SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c,
2391                           12),
2392   SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c,
2393                           12),
2394   SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c,
2395                           12),
2396   SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c,
2397                           12),
2398   SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12),
2399   SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12),
2400   SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12),
2401   SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12),
2402 
2403 #if !CONFIG_REALTIME_ONLY
2404   SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8),
2405   SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8),
2406   SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8),
2407   SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8),
2408   SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8),
2409   SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8),
2410   SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c,
2411                           10),
2412   SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c,
2413                           10),
2414   SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c,
2415                           10),
2416   SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c,
2417                           10),
2418   SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c,
2419                           10),
2420   SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c,
2421                           10),
2422   SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c,
2423                           12),
2424   SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c,
2425                           12),
2426   SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c,
2427                           12),
2428   SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c,
2429                           12),
2430   SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c,
2431                           12),
2432   SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c,
2433                           12),
2434 #endif
2435 };
2436 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelAvgVarianceTest,
2437                          ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
2438 
2439 const DistWtdSubpelAvgVarianceParams kArrayHBDDistWtdSubpelAvgVariance_c[] = {
2440   DistWtdSubpelAvgVarianceParams(
2441       7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_c, 8),
2442   DistWtdSubpelAvgVarianceParams(
2443       7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_c, 8),
2444   DistWtdSubpelAvgVarianceParams(
2445       6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_c, 8),
2446   DistWtdSubpelAvgVarianceParams(
2447       6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_c, 8),
2448   DistWtdSubpelAvgVarianceParams(
2449       6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_c, 8),
2450   DistWtdSubpelAvgVarianceParams(
2451       5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_c, 8),
2452   DistWtdSubpelAvgVarianceParams(
2453       5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_c, 8),
2454   DistWtdSubpelAvgVarianceParams(
2455       5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_c, 8),
2456   DistWtdSubpelAvgVarianceParams(
2457       4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_c, 8),
2458   DistWtdSubpelAvgVarianceParams(
2459       4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_c, 8),
2460   DistWtdSubpelAvgVarianceParams(
2461       4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_c, 8),
2462   DistWtdSubpelAvgVarianceParams(
2463       3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_c, 8),
2464   DistWtdSubpelAvgVarianceParams(
2465       3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_c, 8),
2466   DistWtdSubpelAvgVarianceParams(
2467       3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_c, 8),
2468   DistWtdSubpelAvgVarianceParams(
2469       2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_c, 8),
2470   DistWtdSubpelAvgVarianceParams(
2471       2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_c, 8),
2472   DistWtdSubpelAvgVarianceParams(
2473       7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_c, 10),
2474   DistWtdSubpelAvgVarianceParams(
2475       7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_c, 10),
2476   DistWtdSubpelAvgVarianceParams(
2477       6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_c, 10),
2478   DistWtdSubpelAvgVarianceParams(
2479       6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_c, 10),
2480   DistWtdSubpelAvgVarianceParams(
2481       6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_c, 10),
2482   DistWtdSubpelAvgVarianceParams(
2483       5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_c, 10),
2484   DistWtdSubpelAvgVarianceParams(
2485       5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_c, 10),
2486   DistWtdSubpelAvgVarianceParams(
2487       5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_c, 10),
2488   DistWtdSubpelAvgVarianceParams(
2489       4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_c, 10),
2490   DistWtdSubpelAvgVarianceParams(
2491       4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_c, 10),
2492   DistWtdSubpelAvgVarianceParams(
2493       4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_c, 10),
2494   DistWtdSubpelAvgVarianceParams(
2495       3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_c, 10),
2496   DistWtdSubpelAvgVarianceParams(
2497       3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_c, 10),
2498   DistWtdSubpelAvgVarianceParams(
2499       3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_c, 10),
2500   DistWtdSubpelAvgVarianceParams(
2501       2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_c, 10),
2502   DistWtdSubpelAvgVarianceParams(
2503       2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_c, 10),
2504   DistWtdSubpelAvgVarianceParams(
2505       7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_c, 12),
2506   DistWtdSubpelAvgVarianceParams(
2507       7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_c, 12),
2508   DistWtdSubpelAvgVarianceParams(
2509       6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_c, 12),
2510   DistWtdSubpelAvgVarianceParams(
2511       6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_c, 12),
2512   DistWtdSubpelAvgVarianceParams(
2513       6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_c, 12),
2514   DistWtdSubpelAvgVarianceParams(
2515       5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_c, 12),
2516   DistWtdSubpelAvgVarianceParams(
2517       5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_c, 12),
2518   DistWtdSubpelAvgVarianceParams(
2519       5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_c, 12),
2520   DistWtdSubpelAvgVarianceParams(
2521       4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_c, 12),
2522   DistWtdSubpelAvgVarianceParams(
2523       4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_c, 12),
2524   DistWtdSubpelAvgVarianceParams(
2525       4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_c, 12),
2526   DistWtdSubpelAvgVarianceParams(
2527       3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_c, 12),
2528   DistWtdSubpelAvgVarianceParams(
2529       3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_c, 12),
2530   DistWtdSubpelAvgVarianceParams(
2531       3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_c, 12),
2532   DistWtdSubpelAvgVarianceParams(
2533       2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_c, 12),
2534   DistWtdSubpelAvgVarianceParams(
2535       2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_c, 12),
2536 
2537 #if !CONFIG_REALTIME_ONLY
2538   DistWtdSubpelAvgVarianceParams(
2539       6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_c, 8),
2540   DistWtdSubpelAvgVarianceParams(
2541       4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_c, 8),
2542   DistWtdSubpelAvgVarianceParams(
2543       5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_c, 8),
2544   DistWtdSubpelAvgVarianceParams(
2545       3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_c, 8),
2546   DistWtdSubpelAvgVarianceParams(
2547       4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_c, 8),
2548   DistWtdSubpelAvgVarianceParams(
2549       2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_c, 8),
2550   DistWtdSubpelAvgVarianceParams(
2551       6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_c, 10),
2552   DistWtdSubpelAvgVarianceParams(
2553       4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_c, 10),
2554   DistWtdSubpelAvgVarianceParams(
2555       5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_c, 10),
2556   DistWtdSubpelAvgVarianceParams(
2557       3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_c, 10),
2558   DistWtdSubpelAvgVarianceParams(
2559       4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_c, 10),
2560   DistWtdSubpelAvgVarianceParams(
2561       2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_c, 10),
2562   DistWtdSubpelAvgVarianceParams(
2563       6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_c, 12),
2564   DistWtdSubpelAvgVarianceParams(
2565       4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_c, 12),
2566   DistWtdSubpelAvgVarianceParams(
2567       5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_c, 12),
2568   DistWtdSubpelAvgVarianceParams(
2569       3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_c, 12),
2570   DistWtdSubpelAvgVarianceParams(
2571       4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_c, 12),
2572   DistWtdSubpelAvgVarianceParams(
2573       2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_c, 12),
2574 #endif
2575 };
2576 INSTANTIATE_TEST_SUITE_P(
2577     C, AvxHBDDistWtdSubpelAvgVarianceTest,
2578     ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_c));
2579 
2580 #if !CONFIG_REALTIME_ONLY
2581 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = {
2582   ObmcSubpelVarianceParams(7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_c,
2583                            8),
2584   ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_c,
2585                            8),
2586   ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_c,
2587                            8),
2588   ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_c,
2589                            8),
2590   ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_c,
2591                            8),
2592   ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_c,
2593                            8),
2594   ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_c,
2595                            8),
2596   ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_c,
2597                            8),
2598   ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_c,
2599                            8),
2600   ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_c,
2601                            8),
2602   ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_c,
2603                            8),
2604   ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_c,
2605                            8),
2606   ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_c, 8),
2607   ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_c, 8),
2608   ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_c, 8),
2609   ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_c, 8),
2610   ObmcSubpelVarianceParams(7, 7,
2611                            &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10),
2612   ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c,
2613                            10),
2614   ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c,
2615                            10),
2616   ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c,
2617                            10),
2618   ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c,
2619                            10),
2620   ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c,
2621                            10),
2622   ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c,
2623                            10),
2624   ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c,
2625                            10),
2626   ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c,
2627                            10),
2628   ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c,
2629                            10),
2630   ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c,
2631                            10),
2632   ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c,
2633                            10),
2634   ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c,
2635                            10),
2636   ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c,
2637                            10),
2638   ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c,
2639                            10),
2640   ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c,
2641                            10),
2642   ObmcSubpelVarianceParams(7, 7,
2643                            &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12),
2644   ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c,
2645                            12),
2646   ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c,
2647                            12),
2648   ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c,
2649                            12),
2650   ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c,
2651                            12),
2652   ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c,
2653                            12),
2654   ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c,
2655                            12),
2656   ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c,
2657                            12),
2658   ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c,
2659                            12),
2660   ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c,
2661                            12),
2662   ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c,
2663                            12),
2664   ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c,
2665                            12),
2666   ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c,
2667                            12),
2668   ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c,
2669                            12),
2670   ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c,
2671                            12),
2672   ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c,
2673                            12),
2674 
2675   ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_c,
2676                            8),
2677   ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_c,
2678                            8),
2679   ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_c,
2680                            8),
2681   ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_c,
2682                            8),
2683   ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_c,
2684                            8),
2685   ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_c,
2686                            8),
2687   ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c,
2688                            10),
2689   ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c,
2690                            10),
2691   ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c,
2692                            10),
2693   ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c,
2694                            10),
2695   ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c,
2696                            10),
2697   ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c,
2698                            10),
2699   ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c,
2700                            12),
2701   ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c,
2702                            12),
2703   ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c,
2704                            12),
2705   ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c,
2706                            12),
2707   ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c,
2708                            12),
2709   ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c,
2710                            12),
2711 };
2712 INSTANTIATE_TEST_SUITE_P(C, AvxHBDObmcSubpelVarianceTest,
2713                          ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c));
2714 #endif  // !CONFIG_REALTIME_ONLY
2715 #endif  // CONFIG_AV1_HIGHBITDEPTH
2716 
2717 #if HAVE_SSE2
2718 INSTANTIATE_TEST_SUITE_P(
2719     SSE2, MseWxHTest,
2720     ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_sse2, 8),
2721                       MseWxHParams(3, 2, &aom_mse_wxh_16bit_sse2, 8),
2722                       MseWxHParams(2, 3, &aom_mse_wxh_16bit_sse2, 8),
2723                       MseWxHParams(2, 2, &aom_mse_wxh_16bit_sse2, 8)));
2724 
2725 INSTANTIATE_TEST_SUITE_P(
2726     SSE2, Mse16xHTest,
2727     ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_sse2, 8),
2728                       Mse16xHParams(3, 2, &aom_mse_16xh_16bit_sse2, 8),
2729                       Mse16xHParams(2, 3, &aom_mse_16xh_16bit_sse2, 8),
2730                       Mse16xHParams(2, 2, &aom_mse_16xh_16bit_sse2, 8)));
2731 
2732 INSTANTIATE_TEST_SUITE_P(SSE2, SumOfSquaresTest,
2733                          ::testing::Values(aom_get_mb_ss_sse2));
2734 
2735 INSTANTIATE_TEST_SUITE_P(SSE2, AvxMseTest,
2736                          ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2),
2737                                            MseParams(4, 3, &aom_mse16x8_sse2),
2738                                            MseParams(3, 4, &aom_mse8x16_sse2),
2739                                            MseParams(3, 3, &aom_mse8x8_sse2)));
2740 
2741 const VarianceParams kArrayVariance_sse2[] = {
2742   VarianceParams(7, 7, &aom_variance128x128_sse2),
2743   VarianceParams(7, 6, &aom_variance128x64_sse2),
2744   VarianceParams(6, 7, &aom_variance64x128_sse2),
2745   VarianceParams(6, 6, &aom_variance64x64_sse2),
2746   VarianceParams(6, 5, &aom_variance64x32_sse2),
2747   VarianceParams(5, 6, &aom_variance32x64_sse2),
2748   VarianceParams(5, 5, &aom_variance32x32_sse2),
2749   VarianceParams(5, 4, &aom_variance32x16_sse2),
2750   VarianceParams(4, 5, &aom_variance16x32_sse2),
2751   VarianceParams(4, 4, &aom_variance16x16_sse2),
2752   VarianceParams(4, 3, &aom_variance16x8_sse2),
2753   VarianceParams(3, 4, &aom_variance8x16_sse2),
2754   VarianceParams(3, 3, &aom_variance8x8_sse2),
2755   VarianceParams(3, 2, &aom_variance8x4_sse2),
2756   VarianceParams(2, 3, &aom_variance4x8_sse2),
2757   VarianceParams(2, 2, &aom_variance4x4_sse2),
2758 #if !CONFIG_REALTIME_ONLY
2759   VarianceParams(6, 4, &aom_variance64x16_sse2),
2760   VarianceParams(5, 3, &aom_variance32x8_sse2),
2761   VarianceParams(4, 6, &aom_variance16x64_sse2),
2762   VarianceParams(4, 2, &aom_variance16x4_sse2),
2763   VarianceParams(3, 5, &aom_variance8x32_sse2),
2764   VarianceParams(2, 4, &aom_variance4x16_sse2),
2765 #endif
2766 };
2767 INSTANTIATE_TEST_SUITE_P(SSE2, AvxVarianceTest,
2768                          ::testing::ValuesIn(kArrayVariance_sse2));
2769 
2770 const GetSseSumParams kArrayGetSseSum8x8Quad_sse2[] = {
2771   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2772   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2773   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2774   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_sse2, 0)
2775 };
2776 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum8x8QuadTest,
2777                          ::testing::ValuesIn(kArrayGetSseSum8x8Quad_sse2));
2778 
2779 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_sse2[] = {
2780   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2781   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2782   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2783   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_sse2, 0)
2784 };
2785 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum16x16DualTest,
2786                          ::testing::ValuesIn(kArrayGetSseSum16x16Dual_sse2));
2787 
2788 #if CONFIG_AV1_HIGHBITDEPTH
2789 #if HAVE_SSE2
2790 INSTANTIATE_TEST_SUITE_P(
2791     SSE2, MseHBDWxHTest,
2792     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
2793                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sse2, 10),
2794                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
2795                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sse2,
2796                                       10)));
2797 #endif  // HAVE_SSE2
2798 #if HAVE_SSE4_1
2799 INSTANTIATE_TEST_SUITE_P(
2800     SSE4_1, AvxSubpelVarianceTest,
2801     ::testing::Values(
2802         SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1,
2803                              8),
2804         SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1,
2805                              10),
2806         SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1,
2807                              12)));
2808 
2809 INSTANTIATE_TEST_SUITE_P(
2810     SSE4_1, AvxSubpelAvgVarianceTest,
2811     ::testing::Values(
2812         SubpelAvgVarianceParams(2, 2,
2813                                 &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1,
2814                                 8),
2815         SubpelAvgVarianceParams(2, 2,
2816                                 &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1,
2817                                 10),
2818         SubpelAvgVarianceParams(2, 2,
2819                                 &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1,
2820                                 12)));
2821 #endif  // HAVE_SSE4_1
2822 
2823 INSTANTIATE_TEST_SUITE_P(
2824     SSE2, AvxHBDMseTest,
2825     ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2, 12),
2826                       MseParams(3, 3, &aom_highbd_12_mse8x8_sse2, 12),
2827                       MseParams(4, 4, &aom_highbd_10_mse16x16_sse2, 10),
2828                       MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10),
2829                       MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8),
2830                       MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8)));
2831 
2832 const VarianceParams kArrayHBDVariance_sse2[] = {
2833   VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12),
2834   VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12),
2835   VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12),
2836   VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12),
2837   VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12),
2838   VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12),
2839   VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12),
2840   VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12),
2841   VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12),
2842   VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12),
2843   VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12),
2844   VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12),
2845   VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12),
2846   VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10),
2847   VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10),
2848   VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10),
2849   VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10),
2850   VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10),
2851   VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10),
2852   VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10),
2853   VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10),
2854   VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10),
2855   VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10),
2856   VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10),
2857   VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10),
2858   VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10),
2859   VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8),
2860   VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8),
2861   VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8),
2862   VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8),
2863   VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8),
2864   VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8),
2865   VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8),
2866   VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8),
2867   VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8),
2868   VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8),
2869   VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8),
2870   VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8),
2871   VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8),
2872 #if !CONFIG_REALTIME_ONLY
2873   VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12),
2874   VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12),
2875   VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12),
2876   VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12),
2877   // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12),
2878   // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12),
2879   VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10),
2880   VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10),
2881   VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10),
2882   VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10),
2883   // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10),
2884   // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10),
2885   VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8),
2886   VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8),
2887   VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8),
2888   VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8),
2889 // VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8),
2890 // VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8),
2891 #endif
2892 };
2893 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDVarianceTest,
2894                          ::testing::ValuesIn(kArrayHBDVariance_sse2));
2895 
2896 #if HAVE_AVX2
2897 
2898 INSTANTIATE_TEST_SUITE_P(
2899     AVX2, MseHBDWxHTest,
2900     ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
2901                       MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_avx2, 10),
2902                       MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
2903                       MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_avx2,
2904                                       10)));
2905 
2906 const VarianceParams kArrayHBDVariance_avx2[] = {
2907   VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10),
2908   VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10),
2909   VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10),
2910   VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10),
2911   VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10),
2912   VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10),
2913   VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10),
2914   VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10),
2915   VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10),
2916   VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10),
2917   VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10),
2918   VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10),
2919   VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10),
2920 #if !CONFIG_REALTIME_ONLY
2921   VarianceParams(6, 4, &aom_highbd_10_variance64x16_avx2, 10),
2922   VarianceParams(5, 3, &aom_highbd_10_variance32x8_avx2, 10),
2923   VarianceParams(4, 6, &aom_highbd_10_variance16x64_avx2, 10),
2924   VarianceParams(3, 5, &aom_highbd_10_variance8x32_avx2, 10),
2925 #endif
2926 };
2927 
2928 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDVarianceTest,
2929                          ::testing::ValuesIn(kArrayHBDVariance_avx2));
2930 
2931 const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = {
2932   SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_avx2, 10),
2933   SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_avx2, 10),
2934   SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_avx2, 10),
2935   SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_avx2, 10),
2936   SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_avx2, 10),
2937   SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_avx2, 10),
2938   SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_avx2, 10),
2939   SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_avx2, 10),
2940   SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_avx2, 10),
2941   SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_avx2, 10),
2942   SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_avx2, 10),
2943   SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_avx2, 10),
2944   SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_avx2, 10),
2945 };
2946 
2947 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDSubpelVarianceTest,
2948                          ::testing::ValuesIn(kArrayHBDSubpelVariance_avx2));
2949 #endif  // HAVE_AVX2
2950 
2951 const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = {
2952   SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_sse2, 12),
2953   SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_sse2, 12),
2954   SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_sse2, 12),
2955   SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12),
2956   SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12),
2957   SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12),
2958   SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12),
2959   SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12),
2960   SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12),
2961   SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12),
2962   SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12),
2963   SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12),
2964   SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12),
2965   SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12),
2966   SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_sse2, 10),
2967   SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_sse2, 10),
2968   SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_sse2, 10),
2969   SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10),
2970   SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10),
2971   SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10),
2972   SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10),
2973   SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10),
2974   SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10),
2975   SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10),
2976   SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10),
2977   SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10),
2978   SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10),
2979   SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10),
2980   SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_sse2, 8),
2981   SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_sse2, 8),
2982   SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_sse2, 8),
2983   SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8),
2984   SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8),
2985   SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8),
2986   SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8),
2987   SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8),
2988   SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8),
2989   SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8),
2990   SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
2991   SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
2992   SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
2993   SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8),
2994 #if !CONFIG_REALTIME_ONLY
2995   SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12),
2996   SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12),
2997   SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12),
2998   SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12),
2999   SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12),
3000   // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12),
3001   SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10),
3002   SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10),
3003   SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10),
3004   SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10),
3005   SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10),
3006   // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10),
3007   SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8),
3008   SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8),
3009   SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8),
3010   SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8),
3011   SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8),
3012 // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8),
3013 #endif
3014 };
3015 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest,
3016                          ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2));
3017 
3018 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = {
3019   SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2,
3020                           12),
3021   SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2,
3022                           12),
3023   SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2,
3024                           12),
3025   SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2,
3026                           12),
3027   SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2,
3028                           12),
3029   SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2,
3030                           12),
3031   SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2,
3032                           12),
3033   SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2,
3034                           12),
3035   SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2,
3036                           12),
3037   SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2,
3038                           12),
3039   SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2,
3040                           12),
3041   SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2,
3042                           10),
3043   SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2,
3044                           10),
3045   SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2,
3046                           10),
3047   SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2,
3048                           10),
3049   SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2,
3050                           10),
3051   SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2,
3052                           10),
3053   SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2,
3054                           10),
3055   SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2,
3056                           10),
3057   SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2,
3058                           10),
3059   SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2,
3060                           10),
3061   SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2,
3062                           10),
3063   SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2,
3064                           8),
3065   SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2,
3066                           8),
3067   SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2,
3068                           8),
3069   SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2,
3070                           8),
3071   SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2,
3072                           8),
3073   SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2,
3074                           8),
3075   SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2,
3076                           8),
3077   SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2,
3078                           8),
3079   SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2,
3080                           8),
3081   SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2,
3082                           8),
3083   SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2,
3084                           8),
3085 
3086 #if !CONFIG_REALTIME_ONLY
3087   SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2,
3088                           12),
3089   SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2,
3090                           12),
3091   SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2,
3092                           12),
3093   SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2,
3094                           12),
3095   SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2,
3096                           12),
3097   // SubpelAvgVarianceParams(2, 4,
3098   // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12),
3099   SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2,
3100                           10),
3101   SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2,
3102                           10),
3103   SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2,
3104                           10),
3105   SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2,
3106                           10),
3107   SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2,
3108                           10),
3109   // SubpelAvgVarianceParams(2, 4,
3110   // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10),
3111   SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2,
3112                           8),
3113   SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2,
3114                           8),
3115   SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2,
3116                           8),
3117   SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2,
3118                           8),
3119   SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2,
3120                           8),
3121 // SubpelAvgVarianceParams(2, 4,
3122 // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8),
3123 #endif
3124 };
3125 
3126 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelAvgVarianceTest,
3127                          ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2));
3128 #endif  // HAVE_SSE2
3129 #endif  // CONFIG_AV1_HIGHBITDEPTH
3130 
3131 #if HAVE_SSSE3
3132 const SubpelVarianceParams kArraySubpelVariance_ssse3[] = {
3133   SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0),
3134   SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0),
3135   SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0),
3136   SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0),
3137   SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0),
3138   SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0),
3139   SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0),
3140   SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0),
3141   SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0),
3142   SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0),
3143   SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0),
3144   SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0),
3145   SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0),
3146   SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0),
3147   SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0),
3148   SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0),
3149 #if !CONFIG_REALTIME_ONLY
3150   SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0),
3151   SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0),
3152   SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0),
3153   SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0),
3154   SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0),
3155   SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0),
3156 #endif
3157 };
3158 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelVarianceTest,
3159                          ::testing::ValuesIn(kArraySubpelVariance_ssse3));
3160 
3161 const SubpelAvgVarianceParams kArraySubpelAvgVariance_ssse3[] = {
3162   SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3, 0),
3163   SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3, 0),
3164   SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3, 0),
3165   SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0),
3166   SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0),
3167   SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0),
3168   SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0),
3169   SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0),
3170   SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0),
3171   SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0),
3172   SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0),
3173   SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0),
3174   SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0),
3175   SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0),
3176   SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
3177   SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0),
3178 #if !CONFIG_REALTIME_ONLY
3179   SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3, 0),
3180   SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3, 0),
3181   SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0),
3182   SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0),
3183   SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0),
3184   SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3, 0),
3185 #endif
3186 };
3187 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelAvgVarianceTest,
3188                          ::testing::ValuesIn(kArraySubpelAvgVariance_ssse3));
3189 
3190 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_ssse3[] = {
3191   DistWtdSubpelAvgVarianceParams(
3192       7, 7, &aom_dist_wtd_sub_pixel_avg_variance128x128_ssse3, 0),
3193   DistWtdSubpelAvgVarianceParams(
3194       7, 6, &aom_dist_wtd_sub_pixel_avg_variance128x64_ssse3, 0),
3195   DistWtdSubpelAvgVarianceParams(
3196       6, 7, &aom_dist_wtd_sub_pixel_avg_variance64x128_ssse3, 0),
3197   DistWtdSubpelAvgVarianceParams(
3198       6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_ssse3, 0),
3199   DistWtdSubpelAvgVarianceParams(
3200       6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_ssse3, 0),
3201   DistWtdSubpelAvgVarianceParams(
3202       5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_ssse3, 0),
3203   DistWtdSubpelAvgVarianceParams(
3204       5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_ssse3, 0),
3205   DistWtdSubpelAvgVarianceParams(
3206       5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_ssse3, 0),
3207   DistWtdSubpelAvgVarianceParams(
3208       4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_ssse3, 0),
3209   DistWtdSubpelAvgVarianceParams(
3210       4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_ssse3, 0),
3211   DistWtdSubpelAvgVarianceParams(
3212       4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_ssse3, 0),
3213   DistWtdSubpelAvgVarianceParams(
3214       3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_ssse3, 0),
3215   DistWtdSubpelAvgVarianceParams(
3216       3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_ssse3, 0),
3217   DistWtdSubpelAvgVarianceParams(
3218       3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_ssse3, 0),
3219   DistWtdSubpelAvgVarianceParams(
3220       2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_ssse3, 0),
3221   DistWtdSubpelAvgVarianceParams(
3222       2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_ssse3, 0),
3223 #if !CONFIG_REALTIME_ONLY
3224   DistWtdSubpelAvgVarianceParams(
3225       6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_ssse3, 0),
3226   DistWtdSubpelAvgVarianceParams(
3227       4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_ssse3, 0),
3228   DistWtdSubpelAvgVarianceParams(
3229       5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_ssse3, 0),
3230   DistWtdSubpelAvgVarianceParams(
3231       3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_ssse3, 0),
3232   DistWtdSubpelAvgVarianceParams(
3233       4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_ssse3, 0),
3234   DistWtdSubpelAvgVarianceParams(
3235       2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_ssse3, 0),
3236 #endif
3237 };
3238 INSTANTIATE_TEST_SUITE_P(
3239     SSSE3, AvxDistWtdSubpelAvgVarianceTest,
3240     ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_ssse3));
3241 #endif  // HAVE_SSSE3
3242 
3243 #if HAVE_SSE4_1
3244 #if !CONFIG_REALTIME_ONLY
3245 INSTANTIATE_TEST_SUITE_P(
3246     SSE4_1, AvxObmcSubpelVarianceTest,
3247     ::testing::Values(
3248         ObmcSubpelVarianceParams(7, 7,
3249                                  &aom_obmc_sub_pixel_variance128x128_sse4_1, 0),
3250         ObmcSubpelVarianceParams(7, 6,
3251                                  &aom_obmc_sub_pixel_variance128x64_sse4_1, 0),
3252         ObmcSubpelVarianceParams(6, 7,
3253                                  &aom_obmc_sub_pixel_variance64x128_sse4_1, 0),
3254         ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1,
3255                                  0),
3256         ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1,
3257                                  0),
3258         ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1,
3259                                  0),
3260         ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1,
3261                                  0),
3262         ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1,
3263                                  0),
3264         ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1,
3265                                  0),
3266         ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1,
3267                                  0),
3268         ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1,
3269                                  0),
3270         ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1,
3271                                  0),
3272         ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1,
3273                                  0),
3274         ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1,
3275                                  0),
3276         ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1,
3277                                  0),
3278         ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1,
3279                                  0),
3280         ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1,
3281                                  0),
3282         ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1,
3283                                  0),
3284         ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1,
3285                                  0),
3286         ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1,
3287                                  0),
3288         ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1,
3289                                  0),
3290         ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1,
3291                                  0)));
3292 #endif
3293 #endif  // HAVE_SSE4_1
3294 
3295 #if HAVE_AVX2
3296 
3297 INSTANTIATE_TEST_SUITE_P(
3298     AVX2, MseWxHTest,
3299     ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_avx2, 8),
3300                       MseWxHParams(3, 2, &aom_mse_wxh_16bit_avx2, 8),
3301                       MseWxHParams(2, 3, &aom_mse_wxh_16bit_avx2, 8),
3302                       MseWxHParams(2, 2, &aom_mse_wxh_16bit_avx2, 8)));
3303 
3304 INSTANTIATE_TEST_SUITE_P(
3305     AVX2, Mse16xHTest,
3306     ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_avx2, 8),
3307                       Mse16xHParams(3, 2, &aom_mse_16xh_16bit_avx2, 8),
3308                       Mse16xHParams(2, 3, &aom_mse_16xh_16bit_avx2, 8),
3309                       Mse16xHParams(2, 2, &aom_mse_16xh_16bit_avx2, 8)));
3310 
3311 INSTANTIATE_TEST_SUITE_P(AVX2, AvxMseTest,
3312                          ::testing::Values(MseParams(4, 4,
3313                                                      &aom_mse16x16_avx2)));
3314 
3315 const VarianceParams kArrayVariance_avx2[] = {
3316   VarianceParams(7, 7, &aom_variance128x128_avx2),
3317   VarianceParams(7, 6, &aom_variance128x64_avx2),
3318   VarianceParams(6, 7, &aom_variance64x128_avx2),
3319   VarianceParams(6, 6, &aom_variance64x64_avx2),
3320   VarianceParams(6, 5, &aom_variance64x32_avx2),
3321   VarianceParams(5, 6, &aom_variance32x64_avx2),
3322   VarianceParams(5, 5, &aom_variance32x32_avx2),
3323   VarianceParams(5, 4, &aom_variance32x16_avx2),
3324   VarianceParams(4, 5, &aom_variance16x32_avx2),
3325   VarianceParams(4, 4, &aom_variance16x16_avx2),
3326   VarianceParams(4, 3, &aom_variance16x8_avx2),
3327 #if !CONFIG_REALTIME_ONLY
3328   VarianceParams(6, 4, &aom_variance64x16_avx2),
3329   VarianceParams(4, 6, &aom_variance16x64_avx2),
3330   VarianceParams(5, 3, &aom_variance32x8_avx2),
3331   VarianceParams(4, 2, &aom_variance16x4_avx2),
3332 #endif
3333 };
3334 INSTANTIATE_TEST_SUITE_P(AVX2, AvxVarianceTest,
3335                          ::testing::ValuesIn(kArrayVariance_avx2));
3336 
3337 const GetSseSumParams kArrayGetSseSum8x8Quad_avx2[] = {
3338   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3339   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3340   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3341   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_avx2, 0)
3342 };
3343 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum8x8QuadTest,
3344                          ::testing::ValuesIn(kArrayGetSseSum8x8Quad_avx2));
3345 
3346 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_avx2[] = {
3347   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3348   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3349   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3350   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_avx2, 0)
3351 };
3352 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum16x16DualTest,
3353                          ::testing::ValuesIn(kArrayGetSseSum16x16Dual_avx2));
3354 
3355 const SubpelVarianceParams kArraySubpelVariance_avx2[] = {
3356   SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0),
3357   SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0),
3358   SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0),
3359   SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0),
3360   SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0),
3361   SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0),
3362   SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0),
3363   SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0),
3364 
3365   SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_avx2, 0),
3366   SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_avx2, 0),
3367   SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_avx2, 0),
3368 #if !CONFIG_REALTIME_ONLY
3369   SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_avx2, 0),
3370   SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_avx2, 0),
3371 #endif
3372 };
3373 INSTANTIATE_TEST_SUITE_P(AVX2, AvxSubpelVarianceTest,
3374                          ::testing::ValuesIn(kArraySubpelVariance_avx2));
3375 
3376 INSTANTIATE_TEST_SUITE_P(
3377     AVX2, AvxSubpelAvgVarianceTest,
3378     ::testing::Values(
3379         SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2,
3380                                 0),
3381         SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2,
3382                                 0),
3383         SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2,
3384                                 0),
3385         SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0),
3386         SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0),
3387         SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0),
3388         SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0),
3389         SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2,
3390                                 0)));
3391 #endif  // HAVE_AVX2
3392 
3393 #if HAVE_NEON
3394 INSTANTIATE_TEST_SUITE_P(
3395     NEON, MseWxHTest,
3396     ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_neon, 8),
3397                       MseWxHParams(3, 2, &aom_mse_wxh_16bit_neon, 8),
3398                       MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8),
3399                       MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8)));
3400 
3401 INSTANTIATE_TEST_SUITE_P(
3402     NEON, Mse16xHTest,
3403     ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8),
3404                       Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8),
3405                       Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8),
3406                       Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8)));
3407 
3408 INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest,
3409                          ::testing::Values(aom_get_mb_ss_neon));
3410 
3411 INSTANTIATE_TEST_SUITE_P(NEON, AvxMseTest,
3412                          ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon),
3413                                            MseParams(3, 4, &aom_mse8x16_neon),
3414                                            MseParams(4, 4, &aom_mse16x16_neon),
3415                                            MseParams(4, 3, &aom_mse16x8_neon)));
3416 
3417 const VarianceParams kArrayVariance_neon[] = {
3418   VarianceParams(7, 7, &aom_variance128x128_neon),
3419   VarianceParams(6, 6, &aom_variance64x64_neon),
3420   VarianceParams(7, 6, &aom_variance128x64_neon),
3421   VarianceParams(6, 7, &aom_variance64x128_neon),
3422   VarianceParams(6, 6, &aom_variance64x64_neon),
3423   VarianceParams(6, 5, &aom_variance64x32_neon),
3424   VarianceParams(5, 6, &aom_variance32x64_neon),
3425   VarianceParams(5, 5, &aom_variance32x32_neon),
3426   VarianceParams(5, 4, &aom_variance32x16_neon),
3427   VarianceParams(4, 5, &aom_variance16x32_neon),
3428   VarianceParams(4, 4, &aom_variance16x16_neon),
3429   VarianceParams(4, 3, &aom_variance16x8_neon),
3430   VarianceParams(3, 4, &aom_variance8x16_neon),
3431   VarianceParams(3, 3, &aom_variance8x8_neon),
3432   VarianceParams(3, 2, &aom_variance8x4_neon),
3433   VarianceParams(2, 3, &aom_variance4x8_neon),
3434   VarianceParams(2, 2, &aom_variance4x4_neon),
3435 #if !CONFIG_REALTIME_ONLY
3436   VarianceParams(2, 4, &aom_variance4x16_neon),
3437   VarianceParams(4, 2, &aom_variance16x4_neon),
3438   VarianceParams(3, 5, &aom_variance8x32_neon),
3439   VarianceParams(5, 3, &aom_variance32x8_neon),
3440   VarianceParams(4, 6, &aom_variance16x64_neon),
3441   VarianceParams(6, 4, &aom_variance64x16_neon),
3442 #endif
3443 };
3444 
3445 INSTANTIATE_TEST_SUITE_P(NEON, AvxVarianceTest,
3446                          ::testing::ValuesIn(kArrayVariance_neon));
3447 
3448 const SubpelVarianceParams kArraySubpelVariance_neon[] = {
3449   SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_neon, 0),
3450   SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_neon, 0),
3451   SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_neon, 0),
3452   SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0),
3453   SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_neon, 0),
3454   SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_neon, 0),
3455   SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0),
3456   SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_neon, 0),
3457   SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_neon, 0),
3458   SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0),
3459   SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_neon, 0),
3460   SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_neon, 0),
3461   SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0),
3462   SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_neon, 0),
3463   SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_neon, 0),
3464   SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_neon, 0),
3465 #if !CONFIG_REALTIME_ONLY
3466   SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_neon, 0),
3467   SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_neon, 0),
3468   SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_neon, 0),
3469   SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_neon, 0),
3470   SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_neon, 0),
3471   SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_neon, 0),
3472 #endif
3473 };
3474 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelVarianceTest,
3475                          ::testing::ValuesIn(kArraySubpelVariance_neon));
3476 
3477 const SubpelAvgVarianceParams kArraySubpelAvgVariance_neon[] = {
3478   SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_neon, 0),
3479   SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_neon, 0),
3480   SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_neon, 0),
3481   SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_neon, 0),
3482   SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_neon, 0),
3483   SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_neon, 0),
3484   SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_neon, 0),
3485   SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_neon, 0),
3486   SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_neon, 0),
3487   SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_neon, 0),
3488   SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_neon, 0),
3489   SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_neon, 0),
3490   SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_neon, 0),
3491   SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_neon, 0),
3492   SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_neon, 0),
3493   SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_neon, 0),
3494 #if !CONFIG_REALTIME_ONLY
3495   SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_neon, 0),
3496   SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_neon, 0),
3497   SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_neon, 0),
3498   SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_neon, 0),
3499   SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_neon, 0),
3500   SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_neon, 0),
3501 #endif
3502 };
3503 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest,
3504                          ::testing::ValuesIn(kArraySubpelAvgVariance_neon));
3505 
3506 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_neon[] = {
3507   DistWtdSubpelAvgVarianceParams(
3508       6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_neon, 0),
3509   DistWtdSubpelAvgVarianceParams(
3510       6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_neon, 0),
3511   DistWtdSubpelAvgVarianceParams(
3512       5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_neon, 0),
3513   DistWtdSubpelAvgVarianceParams(
3514       5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_neon, 0),
3515   DistWtdSubpelAvgVarianceParams(
3516       5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_neon, 0),
3517   DistWtdSubpelAvgVarianceParams(
3518       4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_neon, 0),
3519   DistWtdSubpelAvgVarianceParams(
3520       4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_neon, 0),
3521   DistWtdSubpelAvgVarianceParams(
3522       4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_neon, 0),
3523   DistWtdSubpelAvgVarianceParams(
3524       3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_neon, 0),
3525   DistWtdSubpelAvgVarianceParams(
3526       3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_neon, 0),
3527   DistWtdSubpelAvgVarianceParams(
3528       3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_neon, 0),
3529   DistWtdSubpelAvgVarianceParams(
3530       2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_neon, 0),
3531   DistWtdSubpelAvgVarianceParams(
3532       2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_neon, 0),
3533 #if !CONFIG_REALTIME_ONLY
3534   DistWtdSubpelAvgVarianceParams(
3535       6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_neon, 0),
3536   DistWtdSubpelAvgVarianceParams(
3537       4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_neon, 0),
3538   DistWtdSubpelAvgVarianceParams(
3539       5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_neon, 0),
3540   DistWtdSubpelAvgVarianceParams(
3541       3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_neon, 0),
3542   DistWtdSubpelAvgVarianceParams(
3543       4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_neon, 0),
3544   DistWtdSubpelAvgVarianceParams(
3545       2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_neon, 0),
3546 #endif  // !CONFIG_REALTIME_ONLY
3547 };
3548 INSTANTIATE_TEST_SUITE_P(
3549     NEON, AvxDistWtdSubpelAvgVarianceTest,
3550     ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_neon));
3551 
3552 #if !CONFIG_REALTIME_ONLY
3553 const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = {
3554   ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0),
3555   ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0),
3556   ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0),
3557   ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0),
3558   ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0),
3559   ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0),
3560   ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0),
3561   ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0),
3562   ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0),
3563   ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0),
3564   ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0),
3565   ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0),
3566   ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0),
3567   ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0),
3568   ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0),
3569   ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0),
3570   ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0),
3571   ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0),
3572   ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0),
3573   ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0),
3574   ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0),
3575   ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0),
3576 };
3577 INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest,
3578                          ::testing::ValuesIn(kArrayObmcSubpelVariance_neon));
3579 #endif
3580 
3581 const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = {
3582   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3583   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3584   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3585   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon, 0)
3586 };
3587 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum8x8QuadTest,
3588                          ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon));
3589 
3590 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon[] = {
3591   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3592   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3593   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3594   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon, 0)
3595 };
3596 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum16x16DualTest,
3597                          ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon));
3598 
3599 #if CONFIG_AV1_HIGHBITDEPTH
3600 const VarianceParams kArrayHBDVariance_neon[] = {
3601   VarianceParams(7, 7, &aom_highbd_12_variance128x128_neon, 12),
3602   VarianceParams(7, 6, &aom_highbd_12_variance128x64_neon, 12),
3603   VarianceParams(6, 7, &aom_highbd_12_variance64x128_neon, 12),
3604   VarianceParams(6, 6, &aom_highbd_12_variance64x64_neon, 12),
3605   VarianceParams(6, 5, &aom_highbd_12_variance64x32_neon, 12),
3606   VarianceParams(5, 6, &aom_highbd_12_variance32x64_neon, 12),
3607   VarianceParams(5, 5, &aom_highbd_12_variance32x32_neon, 12),
3608   VarianceParams(5, 4, &aom_highbd_12_variance32x16_neon, 12),
3609   VarianceParams(4, 5, &aom_highbd_12_variance16x32_neon, 12),
3610   VarianceParams(4, 4, &aom_highbd_12_variance16x16_neon, 12),
3611   VarianceParams(4, 3, &aom_highbd_12_variance16x8_neon, 12),
3612   VarianceParams(3, 4, &aom_highbd_12_variance8x16_neon, 12),
3613   VarianceParams(3, 3, &aom_highbd_12_variance8x8_neon, 12),
3614   VarianceParams(3, 2, &aom_highbd_12_variance8x4_neon, 12),
3615   VarianceParams(2, 3, &aom_highbd_12_variance4x8_neon, 12),
3616   VarianceParams(2, 2, &aom_highbd_12_variance4x4_neon, 12),
3617   VarianceParams(7, 7, &aom_highbd_10_variance128x128_neon, 10),
3618   VarianceParams(7, 6, &aom_highbd_10_variance128x64_neon, 10),
3619   VarianceParams(6, 7, &aom_highbd_10_variance64x128_neon, 10),
3620   VarianceParams(6, 6, &aom_highbd_10_variance64x64_neon, 10),
3621   VarianceParams(6, 5, &aom_highbd_10_variance64x32_neon, 10),
3622   VarianceParams(5, 6, &aom_highbd_10_variance32x64_neon, 10),
3623   VarianceParams(5, 5, &aom_highbd_10_variance32x32_neon, 10),
3624   VarianceParams(5, 4, &aom_highbd_10_variance32x16_neon, 10),
3625   VarianceParams(4, 5, &aom_highbd_10_variance16x32_neon, 10),
3626   VarianceParams(4, 4, &aom_highbd_10_variance16x16_neon, 10),
3627   VarianceParams(4, 3, &aom_highbd_10_variance16x8_neon, 10),
3628   VarianceParams(3, 4, &aom_highbd_10_variance8x16_neon, 10),
3629   VarianceParams(3, 3, &aom_highbd_10_variance8x8_neon, 10),
3630   VarianceParams(3, 2, &aom_highbd_10_variance8x4_neon, 10),
3631   VarianceParams(2, 3, &aom_highbd_10_variance4x8_neon, 10),
3632   VarianceParams(2, 2, &aom_highbd_10_variance4x4_neon, 10),
3633   VarianceParams(7, 7, &aom_highbd_8_variance128x128_neon, 8),
3634   VarianceParams(7, 6, &aom_highbd_8_variance128x64_neon, 8),
3635   VarianceParams(6, 7, &aom_highbd_8_variance64x128_neon, 8),
3636   VarianceParams(6, 6, &aom_highbd_8_variance64x64_neon, 8),
3637   VarianceParams(6, 5, &aom_highbd_8_variance64x32_neon, 8),
3638   VarianceParams(5, 6, &aom_highbd_8_variance32x64_neon, 8),
3639   VarianceParams(5, 5, &aom_highbd_8_variance32x32_neon, 8),
3640   VarianceParams(5, 4, &aom_highbd_8_variance32x16_neon, 8),
3641   VarianceParams(4, 5, &aom_highbd_8_variance16x32_neon, 8),
3642   VarianceParams(4, 4, &aom_highbd_8_variance16x16_neon, 8),
3643   VarianceParams(4, 3, &aom_highbd_8_variance16x8_neon, 8),
3644   VarianceParams(3, 4, &aom_highbd_8_variance8x16_neon, 8),
3645   VarianceParams(3, 3, &aom_highbd_8_variance8x8_neon, 8),
3646   VarianceParams(3, 2, &aom_highbd_8_variance8x4_neon, 8),
3647   VarianceParams(2, 3, &aom_highbd_8_variance4x8_neon, 8),
3648   VarianceParams(2, 2, &aom_highbd_8_variance4x4_neon, 8),
3649 #if !CONFIG_REALTIME_ONLY
3650   VarianceParams(6, 4, &aom_highbd_12_variance64x16_neon, 12),
3651   VarianceParams(4, 6, &aom_highbd_12_variance16x64_neon, 12),
3652   VarianceParams(5, 3, &aom_highbd_12_variance32x8_neon, 12),
3653   VarianceParams(3, 5, &aom_highbd_12_variance8x32_neon, 12),
3654   VarianceParams(4, 2, &aom_highbd_12_variance16x4_neon, 12),
3655   VarianceParams(2, 4, &aom_highbd_12_variance4x16_neon, 12),
3656   VarianceParams(6, 4, &aom_highbd_10_variance64x16_neon, 10),
3657   VarianceParams(4, 6, &aom_highbd_10_variance16x64_neon, 10),
3658   VarianceParams(5, 3, &aom_highbd_10_variance32x8_neon, 10),
3659   VarianceParams(3, 5, &aom_highbd_10_variance8x32_neon, 10),
3660   VarianceParams(4, 2, &aom_highbd_10_variance16x4_neon, 10),
3661   VarianceParams(2, 4, &aom_highbd_10_variance4x16_neon, 10),
3662   VarianceParams(6, 4, &aom_highbd_8_variance64x16_neon, 8),
3663   VarianceParams(4, 6, &aom_highbd_8_variance16x64_neon, 8),
3664   VarianceParams(5, 3, &aom_highbd_8_variance32x8_neon, 8),
3665   VarianceParams(3, 5, &aom_highbd_8_variance8x32_neon, 8),
3666   VarianceParams(4, 2, &aom_highbd_8_variance16x4_neon, 8),
3667   VarianceParams(2, 4, &aom_highbd_8_variance4x16_neon, 8),
3668 #endif
3669 };
3670 
3671 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDVarianceTest,
3672                          ::testing::ValuesIn(kArrayHBDVariance_neon));
3673 
3674 const SubpelVarianceParams kArrayHBDSubpelVariance_neon[] = {
3675   SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_neon, 12),
3676   SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_neon, 12),
3677   SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_neon, 12),
3678   SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_neon, 12),
3679   SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_neon, 12),
3680   SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_neon, 12),
3681   SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_neon, 12),
3682   SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_neon, 12),
3683   SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_neon, 12),
3684   SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_neon, 12),
3685   SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_neon, 12),
3686   SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_neon, 12),
3687   SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_neon, 12),
3688   SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_neon, 10),
3689   SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_neon, 10),
3690   SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_neon, 10),
3691   SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_neon, 10),
3692   SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_neon, 10),
3693   SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_neon, 10),
3694   SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_neon, 10),
3695   SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_neon, 10),
3696   SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_neon, 10),
3697   SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_neon, 10),
3698   SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_neon, 10),
3699   SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_neon, 10),
3700   SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_neon, 10),
3701   SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_neon, 8),
3702   SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_neon, 8),
3703   SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_neon, 8),
3704   SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_neon, 8),
3705   SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_neon, 8),
3706   SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_neon, 8),
3707   SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_neon, 8),
3708   SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_neon, 8),
3709   SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_neon, 8),
3710   SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_neon, 8),
3711   SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_neon, 8),
3712   SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_neon, 8),
3713   SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_neon, 8),
3714 #if !CONFIG_REALTIME_ONLY
3715   SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_neon, 8),
3716   SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_neon, 8),
3717   SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_neon, 8),
3718   SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_neon, 8),
3719   SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_neon, 8),
3720   SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_neon, 8),
3721   SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_neon, 10),
3722   SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_neon, 10),
3723   SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_neon, 10),
3724   SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_neon, 10),
3725   SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_neon, 10),
3726   SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_neon, 10),
3727   SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_neon, 12),
3728   SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_neon, 12),
3729   SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_neon, 12),
3730   SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_neon, 12),
3731   SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_neon, 12),
3732   SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_neon, 12),
3733 #endif  //! CONFIG_REALTIME_ONLY
3734 };
3735 
3736 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelVarianceTest,
3737                          ::testing::ValuesIn(kArrayHBDSubpelVariance_neon));
3738 
3739 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_neon[] = {
3740   SubpelAvgVarianceParams(7, 7,
3741                           &aom_highbd_8_sub_pixel_avg_variance128x128_neon, 8),
3742   SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_neon,
3743                           8),
3744   SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_neon,
3745                           8),
3746   SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_neon,
3747                           8),
3748   SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_neon,
3749                           8),
3750   SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_neon,
3751                           8),
3752   SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_neon,
3753                           8),
3754   SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_neon,
3755                           8),
3756   SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_neon,
3757                           8),
3758   SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_neon,
3759                           8),
3760   SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_neon,
3761                           8),
3762   SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_neon,
3763                           8),
3764   SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_neon,
3765                           8),
3766   SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_neon,
3767                           8),
3768   SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_neon,
3769                           8),
3770   SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_neon,
3771                           8),
3772   SubpelAvgVarianceParams(
3773       7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_neon, 10),
3774   SubpelAvgVarianceParams(7, 6,
3775                           &aom_highbd_10_sub_pixel_avg_variance128x64_neon, 10),
3776   SubpelAvgVarianceParams(6, 7,
3777                           &aom_highbd_10_sub_pixel_avg_variance64x128_neon, 10),
3778   SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_neon,
3779                           10),
3780   SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_neon,
3781                           10),
3782   SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_neon,
3783                           10),
3784   SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_neon,
3785                           10),
3786   SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_neon,
3787                           10),
3788   SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_neon,
3789                           10),
3790   SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_neon,
3791                           10),
3792   SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_neon,
3793                           10),
3794   SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_neon,
3795                           10),
3796   SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_neon,
3797                           10),
3798   SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_neon,
3799                           10),
3800   SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_neon,
3801                           10),
3802   SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_neon,
3803                           10),
3804   SubpelAvgVarianceParams(
3805       7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_neon, 12),
3806   SubpelAvgVarianceParams(7, 6,
3807                           &aom_highbd_12_sub_pixel_avg_variance128x64_neon, 12),
3808   SubpelAvgVarianceParams(6, 7,
3809                           &aom_highbd_12_sub_pixel_avg_variance64x128_neon, 12),
3810   SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_neon,
3811                           12),
3812   SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_neon,
3813                           12),
3814   SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_neon,
3815                           12),
3816   SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_neon,
3817                           12),
3818   SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_neon,
3819                           12),
3820   SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_neon,
3821                           12),
3822   SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_neon,
3823                           12),
3824   SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_neon,
3825                           12),
3826   SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_neon,
3827                           12),
3828   SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_neon,
3829                           12),
3830   SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_neon,
3831                           12),
3832   SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_neon,
3833                           12),
3834   SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_neon,
3835                           12),
3836 
3837 #if !CONFIG_REALTIME_ONLY
3838   SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_neon,
3839                           8),
3840   SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_neon,
3841                           8),
3842   SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_neon,
3843                           8),
3844   SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_neon,
3845                           8),
3846   SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_neon,
3847                           8),
3848   SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_neon,
3849                           8),
3850   SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_neon,
3851                           10),
3852   SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_neon,
3853                           10),
3854   SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_neon,
3855                           10),
3856   SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_neon,
3857                           10),
3858   SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_neon,
3859                           10),
3860   SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_neon,
3861                           10),
3862   SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_neon,
3863                           12),
3864   SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_neon,
3865                           12),
3866   SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_neon,
3867                           12),
3868   SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_neon,
3869                           12),
3870   SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_neon,
3871                           12),
3872   SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_neon,
3873                           12),
3874 #endif
3875 };
3876 
3877 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelAvgVarianceTest,
3878                          ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_neon));
3879 
3880 const DistWtdSubpelAvgVarianceParams
3881     kArrayHBDDistWtdSubpelAvgVariance_neon[] = {
3882       DistWtdSubpelAvgVarianceParams(
3883           7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_neon, 8),
3884       DistWtdSubpelAvgVarianceParams(
3885           7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_neon, 8),
3886       DistWtdSubpelAvgVarianceParams(
3887           6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_neon, 8),
3888       DistWtdSubpelAvgVarianceParams(
3889           6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_neon, 8),
3890       DistWtdSubpelAvgVarianceParams(
3891           6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_neon, 8),
3892       DistWtdSubpelAvgVarianceParams(
3893           5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_neon, 8),
3894       DistWtdSubpelAvgVarianceParams(
3895           5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_neon, 8),
3896       DistWtdSubpelAvgVarianceParams(
3897           5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_neon, 8),
3898       DistWtdSubpelAvgVarianceParams(
3899           4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_neon, 8),
3900       DistWtdSubpelAvgVarianceParams(
3901           4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_neon, 8),
3902       DistWtdSubpelAvgVarianceParams(
3903           4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_neon, 8),
3904       DistWtdSubpelAvgVarianceParams(
3905           3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_neon, 8),
3906       DistWtdSubpelAvgVarianceParams(
3907           3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_neon, 8),
3908       DistWtdSubpelAvgVarianceParams(
3909           3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_neon, 8),
3910       DistWtdSubpelAvgVarianceParams(
3911           2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_neon, 8),
3912       DistWtdSubpelAvgVarianceParams(
3913           2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_neon, 8),
3914       DistWtdSubpelAvgVarianceParams(
3915           7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_neon, 10),
3916       DistWtdSubpelAvgVarianceParams(
3917           7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_neon, 10),
3918       DistWtdSubpelAvgVarianceParams(
3919           6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_neon, 10),
3920       DistWtdSubpelAvgVarianceParams(
3921           6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_neon, 10),
3922       DistWtdSubpelAvgVarianceParams(
3923           6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_neon, 10),
3924       DistWtdSubpelAvgVarianceParams(
3925           5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_neon, 10),
3926       DistWtdSubpelAvgVarianceParams(
3927           5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_neon, 10),
3928       DistWtdSubpelAvgVarianceParams(
3929           5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_neon, 10),
3930       DistWtdSubpelAvgVarianceParams(
3931           4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_neon, 10),
3932       DistWtdSubpelAvgVarianceParams(
3933           4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_neon, 10),
3934       DistWtdSubpelAvgVarianceParams(
3935           4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_neon, 10),
3936       DistWtdSubpelAvgVarianceParams(
3937           3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_neon, 10),
3938       DistWtdSubpelAvgVarianceParams(
3939           3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_neon, 10),
3940       DistWtdSubpelAvgVarianceParams(
3941           3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_neon, 10),
3942       DistWtdSubpelAvgVarianceParams(
3943           2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_neon, 10),
3944       DistWtdSubpelAvgVarianceParams(
3945           2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_neon, 10),
3946       DistWtdSubpelAvgVarianceParams(
3947           7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_neon, 12),
3948       DistWtdSubpelAvgVarianceParams(
3949           7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_neon, 12),
3950       DistWtdSubpelAvgVarianceParams(
3951           6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_neon, 12),
3952       DistWtdSubpelAvgVarianceParams(
3953           6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_neon, 12),
3954       DistWtdSubpelAvgVarianceParams(
3955           6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_neon, 12),
3956       DistWtdSubpelAvgVarianceParams(
3957           5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_neon, 12),
3958       DistWtdSubpelAvgVarianceParams(
3959           5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_neon, 12),
3960       DistWtdSubpelAvgVarianceParams(
3961           5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_neon, 12),
3962       DistWtdSubpelAvgVarianceParams(
3963           4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_neon, 12),
3964       DistWtdSubpelAvgVarianceParams(
3965           4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_neon, 12),
3966       DistWtdSubpelAvgVarianceParams(
3967           4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_neon, 12),
3968       DistWtdSubpelAvgVarianceParams(
3969           3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_neon, 12),
3970       DistWtdSubpelAvgVarianceParams(
3971           3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_neon, 12),
3972       DistWtdSubpelAvgVarianceParams(
3973           3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_neon, 12),
3974       DistWtdSubpelAvgVarianceParams(
3975           2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_neon, 12),
3976       DistWtdSubpelAvgVarianceParams(
3977           2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_neon, 12),
3978 
3979 #if !CONFIG_REALTIME_ONLY
3980       DistWtdSubpelAvgVarianceParams(
3981           6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_neon, 8),
3982       DistWtdSubpelAvgVarianceParams(
3983           4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_neon, 8),
3984       DistWtdSubpelAvgVarianceParams(
3985           5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_neon, 8),
3986       DistWtdSubpelAvgVarianceParams(
3987           3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_neon, 8),
3988       DistWtdSubpelAvgVarianceParams(
3989           4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_neon, 8),
3990       DistWtdSubpelAvgVarianceParams(
3991           2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_neon, 8),
3992       DistWtdSubpelAvgVarianceParams(
3993           6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_neon, 10),
3994       DistWtdSubpelAvgVarianceParams(
3995           4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_neon, 10),
3996       DistWtdSubpelAvgVarianceParams(
3997           5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_neon, 10),
3998       DistWtdSubpelAvgVarianceParams(
3999           3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_neon, 10),
4000       DistWtdSubpelAvgVarianceParams(
4001           4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_neon, 10),
4002       DistWtdSubpelAvgVarianceParams(
4003           2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_neon, 10),
4004       DistWtdSubpelAvgVarianceParams(
4005           6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_neon, 12),
4006       DistWtdSubpelAvgVarianceParams(
4007           4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_neon, 12),
4008       DistWtdSubpelAvgVarianceParams(
4009           5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_neon, 12),
4010       DistWtdSubpelAvgVarianceParams(
4011           3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_neon, 12),
4012       DistWtdSubpelAvgVarianceParams(
4013           4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_neon, 12),
4014       DistWtdSubpelAvgVarianceParams(
4015           2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_neon, 12),
4016 #endif  // !CONFIG_REALTIME_ONLY
4017     };
4018 INSTANTIATE_TEST_SUITE_P(
4019     NEON, AvxHBDDistWtdSubpelAvgVarianceTest,
4020     ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_neon));
4021 
4022 #if !CONFIG_REALTIME_ONLY
4023 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_neon[] = {
4024   ObmcSubpelVarianceParams(
4025       7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_neon, 12),
4026   ObmcSubpelVarianceParams(
4027       7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_neon, 12),
4028   ObmcSubpelVarianceParams(
4029       6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_neon, 12),
4030   ObmcSubpelVarianceParams(
4031       6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_neon, 12),
4032   ObmcSubpelVarianceParams(
4033       6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_neon, 12),
4034   ObmcSubpelVarianceParams(
4035       5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_neon, 12),
4036   ObmcSubpelVarianceParams(
4037       5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_neon, 12),
4038   ObmcSubpelVarianceParams(
4039       5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_neon, 12),
4040   ObmcSubpelVarianceParams(
4041       4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_neon, 12),
4042   ObmcSubpelVarianceParams(
4043       4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_neon, 12),
4044   ObmcSubpelVarianceParams(4, 3,
4045                            &aom_highbd_12_obmc_sub_pixel_variance16x8_neon, 12),
4046   ObmcSubpelVarianceParams(3, 4,
4047                            &aom_highbd_12_obmc_sub_pixel_variance8x16_neon, 12),
4048   ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_neon,
4049                            12),
4050   ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_neon,
4051                            12),
4052   ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_neon,
4053                            12),
4054   ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_neon,
4055                            12),
4056   ObmcSubpelVarianceParams(
4057       6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_neon, 12),
4058   ObmcSubpelVarianceParams(
4059       4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_neon, 12),
4060   ObmcSubpelVarianceParams(5, 3,
4061                            &aom_highbd_12_obmc_sub_pixel_variance32x8_neon, 12),
4062   ObmcSubpelVarianceParams(3, 5,
4063                            &aom_highbd_12_obmc_sub_pixel_variance8x32_neon, 12),
4064   ObmcSubpelVarianceParams(4, 2,
4065                            &aom_highbd_12_obmc_sub_pixel_variance16x4_neon, 12),
4066   ObmcSubpelVarianceParams(2, 4,
4067                            &aom_highbd_12_obmc_sub_pixel_variance4x16_neon, 12),
4068   ObmcSubpelVarianceParams(
4069       7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_neon, 10),
4070   ObmcSubpelVarianceParams(
4071       7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_neon, 10),
4072   ObmcSubpelVarianceParams(
4073       6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_neon, 10),
4074   ObmcSubpelVarianceParams(
4075       6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_neon, 10),
4076   ObmcSubpelVarianceParams(
4077       6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_neon, 10),
4078   ObmcSubpelVarianceParams(
4079       5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_neon, 10),
4080   ObmcSubpelVarianceParams(
4081       5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_neon, 10),
4082   ObmcSubpelVarianceParams(
4083       5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_neon, 10),
4084   ObmcSubpelVarianceParams(
4085       4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_neon, 10),
4086   ObmcSubpelVarianceParams(
4087       4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_neon, 10),
4088   ObmcSubpelVarianceParams(4, 3,
4089                            &aom_highbd_10_obmc_sub_pixel_variance16x8_neon, 10),
4090   ObmcSubpelVarianceParams(3, 4,
4091                            &aom_highbd_10_obmc_sub_pixel_variance8x16_neon, 10),
4092   ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_neon,
4093                            10),
4094   ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_neon,
4095                            10),
4096   ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_neon,
4097                            10),
4098   ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_neon,
4099                            10),
4100   ObmcSubpelVarianceParams(
4101       6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_neon, 10),
4102   ObmcSubpelVarianceParams(
4103       4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_neon, 10),
4104   ObmcSubpelVarianceParams(5, 3,
4105                            &aom_highbd_10_obmc_sub_pixel_variance32x8_neon, 10),
4106   ObmcSubpelVarianceParams(3, 5,
4107                            &aom_highbd_10_obmc_sub_pixel_variance8x32_neon, 10),
4108   ObmcSubpelVarianceParams(4, 2,
4109                            &aom_highbd_10_obmc_sub_pixel_variance16x4_neon, 10),
4110   ObmcSubpelVarianceParams(2, 4,
4111                            &aom_highbd_10_obmc_sub_pixel_variance4x16_neon, 10),
4112   ObmcSubpelVarianceParams(
4113       7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_neon, 8),
4114   ObmcSubpelVarianceParams(7, 6,
4115                            &aom_highbd_8_obmc_sub_pixel_variance128x64_neon, 8),
4116   ObmcSubpelVarianceParams(6, 7,
4117                            &aom_highbd_8_obmc_sub_pixel_variance64x128_neon, 8),
4118   ObmcSubpelVarianceParams(6, 6,
4119                            &aom_highbd_8_obmc_sub_pixel_variance64x64_neon, 8),
4120   ObmcSubpelVarianceParams(6, 5,
4121                            &aom_highbd_8_obmc_sub_pixel_variance64x32_neon, 8),
4122   ObmcSubpelVarianceParams(5, 6,
4123                            &aom_highbd_8_obmc_sub_pixel_variance32x64_neon, 8),
4124   ObmcSubpelVarianceParams(5, 5,
4125                            &aom_highbd_8_obmc_sub_pixel_variance32x32_neon, 8),
4126   ObmcSubpelVarianceParams(5, 4,
4127                            &aom_highbd_8_obmc_sub_pixel_variance32x16_neon, 8),
4128   ObmcSubpelVarianceParams(4, 5,
4129                            &aom_highbd_8_obmc_sub_pixel_variance16x32_neon, 8),
4130   ObmcSubpelVarianceParams(4, 4,
4131                            &aom_highbd_8_obmc_sub_pixel_variance16x16_neon, 8),
4132   ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_neon,
4133                            8),
4134   ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_neon,
4135                            8),
4136   ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_neon,
4137                            8),
4138   ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_neon,
4139                            8),
4140   ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_neon,
4141                            8),
4142   ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_neon,
4143                            8),
4144   ObmcSubpelVarianceParams(6, 4,
4145                            &aom_highbd_8_obmc_sub_pixel_variance64x16_neon, 8),
4146   ObmcSubpelVarianceParams(4, 6,
4147                            &aom_highbd_8_obmc_sub_pixel_variance16x64_neon, 8),
4148   ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_neon,
4149                            8),
4150   ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_neon,
4151                            8),
4152   ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_neon,
4153                            8),
4154   ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_neon,
4155                            8),
4156 };
4157 
4158 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDObmcSubpelVarianceTest,
4159                          ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_neon));
4160 #endif  // !CONFIG_REALTIME_ONLY
4161 
4162 #endif  // CONFIG_AV1_HIGHBITDEPTH
4163 
4164 #endif  // HAVE_NEON
4165 
4166 #if HAVE_NEON_DOTPROD
4167 
4168 const VarianceParams kArrayVariance_neon_dotprod[] = {
4169   VarianceParams(7, 7, &aom_variance128x128_neon_dotprod),
4170   VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
4171   VarianceParams(7, 6, &aom_variance128x64_neon_dotprod),
4172   VarianceParams(6, 7, &aom_variance64x128_neon_dotprod),
4173   VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
4174   VarianceParams(6, 5, &aom_variance64x32_neon_dotprod),
4175   VarianceParams(5, 6, &aom_variance32x64_neon_dotprod),
4176   VarianceParams(5, 5, &aom_variance32x32_neon_dotprod),
4177   VarianceParams(5, 4, &aom_variance32x16_neon_dotprod),
4178   VarianceParams(4, 5, &aom_variance16x32_neon_dotprod),
4179   VarianceParams(4, 4, &aom_variance16x16_neon_dotprod),
4180   VarianceParams(4, 3, &aom_variance16x8_neon_dotprod),
4181   VarianceParams(3, 4, &aom_variance8x16_neon_dotprod),
4182   VarianceParams(3, 3, &aom_variance8x8_neon_dotprod),
4183   VarianceParams(3, 2, &aom_variance8x4_neon_dotprod),
4184   VarianceParams(2, 3, &aom_variance4x8_neon_dotprod),
4185   VarianceParams(2, 2, &aom_variance4x4_neon_dotprod),
4186 #if !CONFIG_REALTIME_ONLY
4187   VarianceParams(2, 4, &aom_variance4x16_neon_dotprod),
4188   VarianceParams(4, 2, &aom_variance16x4_neon_dotprod),
4189   VarianceParams(3, 5, &aom_variance8x32_neon_dotprod),
4190   VarianceParams(5, 3, &aom_variance32x8_neon_dotprod),
4191   VarianceParams(4, 6, &aom_variance16x64_neon_dotprod),
4192   VarianceParams(6, 4, &aom_variance64x16_neon_dotprod),
4193 #endif
4194 };
4195 
4196 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AvxVarianceTest,
4197                          ::testing::ValuesIn(kArrayVariance_neon_dotprod));
4198 
4199 const GetSseSumParams kArrayGetSseSum8x8Quad_neon_dotprod[] = {
4200   GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4201   GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4202   GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4203   GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0)
4204 };
4205 INSTANTIATE_TEST_SUITE_P(
4206     NEON_DOTPROD, GetSseSum8x8QuadTest,
4207     ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon_dotprod));
4208 
4209 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon_dotprod[] = {
4210   GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4211   GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4212   GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4213   GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0)
4214 };
4215 INSTANTIATE_TEST_SUITE_P(
4216     NEON_DOTPROD, GetSseSum16x16DualTest,
4217     ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon_dotprod));
4218 
4219 INSTANTIATE_TEST_SUITE_P(
4220     NEON_DOTPROD, AvxMseTest,
4221     ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon_dotprod),
4222                       MseParams(3, 4, &aom_mse8x16_neon_dotprod),
4223                       MseParams(4, 4, &aom_mse16x16_neon_dotprod),
4224                       MseParams(4, 3, &aom_mse16x8_neon_dotprod)));
4225 
4226 #endif  // HAVE_NEON_DOTPROD
4227 
4228 #if HAVE_SVE
4229 
4230 #if CONFIG_AV1_HIGHBITDEPTH
4231 const VarianceParams kArrayHBDVariance_sve[] = {
4232   VarianceParams(7, 7, &aom_highbd_12_variance128x128_sve, 12),
4233   VarianceParams(7, 6, &aom_highbd_12_variance128x64_sve, 12),
4234   VarianceParams(6, 7, &aom_highbd_12_variance64x128_sve, 12),
4235   VarianceParams(6, 6, &aom_highbd_12_variance64x64_sve, 12),
4236   VarianceParams(6, 5, &aom_highbd_12_variance64x32_sve, 12),
4237   VarianceParams(5, 6, &aom_highbd_12_variance32x64_sve, 12),
4238   VarianceParams(5, 5, &aom_highbd_12_variance32x32_sve, 12),
4239   VarianceParams(5, 4, &aom_highbd_12_variance32x16_sve, 12),
4240   VarianceParams(4, 5, &aom_highbd_12_variance16x32_sve, 12),
4241   VarianceParams(4, 4, &aom_highbd_12_variance16x16_sve, 12),
4242   VarianceParams(4, 3, &aom_highbd_12_variance16x8_sve, 12),
4243   VarianceParams(3, 4, &aom_highbd_12_variance8x16_sve, 12),
4244   VarianceParams(3, 3, &aom_highbd_12_variance8x8_sve, 12),
4245   VarianceParams(3, 2, &aom_highbd_12_variance8x4_sve, 12),
4246   VarianceParams(2, 3, &aom_highbd_12_variance4x8_sve, 12),
4247   VarianceParams(2, 2, &aom_highbd_12_variance4x4_sve, 12),
4248   VarianceParams(7, 7, &aom_highbd_10_variance128x128_sve, 10),
4249   VarianceParams(7, 6, &aom_highbd_10_variance128x64_sve, 10),
4250   VarianceParams(6, 7, &aom_highbd_10_variance64x128_sve, 10),
4251   VarianceParams(6, 6, &aom_highbd_10_variance64x64_sve, 10),
4252   VarianceParams(6, 5, &aom_highbd_10_variance64x32_sve, 10),
4253   VarianceParams(5, 6, &aom_highbd_10_variance32x64_sve, 10),
4254   VarianceParams(5, 5, &aom_highbd_10_variance32x32_sve, 10),
4255   VarianceParams(5, 4, &aom_highbd_10_variance32x16_sve, 10),
4256   VarianceParams(4, 5, &aom_highbd_10_variance16x32_sve, 10),
4257   VarianceParams(4, 4, &aom_highbd_10_variance16x16_sve, 10),
4258   VarianceParams(4, 3, &aom_highbd_10_variance16x8_sve, 10),
4259   VarianceParams(3, 4, &aom_highbd_10_variance8x16_sve, 10),
4260   VarianceParams(3, 3, &aom_highbd_10_variance8x8_sve, 10),
4261   VarianceParams(3, 2, &aom_highbd_10_variance8x4_sve, 10),
4262   VarianceParams(2, 3, &aom_highbd_10_variance4x8_sve, 10),
4263   VarianceParams(2, 2, &aom_highbd_10_variance4x4_sve, 10),
4264   VarianceParams(7, 7, &aom_highbd_8_variance128x128_sve, 8),
4265   VarianceParams(7, 6, &aom_highbd_8_variance128x64_sve, 8),
4266   VarianceParams(6, 7, &aom_highbd_8_variance64x128_sve, 8),
4267   VarianceParams(6, 6, &aom_highbd_8_variance64x64_sve, 8),
4268   VarianceParams(6, 5, &aom_highbd_8_variance64x32_sve, 8),
4269   VarianceParams(5, 6, &aom_highbd_8_variance32x64_sve, 8),
4270   VarianceParams(5, 5, &aom_highbd_8_variance32x32_sve, 8),
4271   VarianceParams(5, 4, &aom_highbd_8_variance32x16_sve, 8),
4272   VarianceParams(4, 5, &aom_highbd_8_variance16x32_sve, 8),
4273   VarianceParams(4, 4, &aom_highbd_8_variance16x16_sve, 8),
4274   VarianceParams(4, 3, &aom_highbd_8_variance16x8_sve, 8),
4275   VarianceParams(3, 4, &aom_highbd_8_variance8x16_sve, 8),
4276   VarianceParams(3, 3, &aom_highbd_8_variance8x8_sve, 8),
4277   VarianceParams(3, 2, &aom_highbd_8_variance8x4_sve, 8),
4278   VarianceParams(2, 3, &aom_highbd_8_variance4x8_sve, 8),
4279   VarianceParams(2, 2, &aom_highbd_8_variance4x4_sve, 8),
4280 #if !CONFIG_REALTIME_ONLY
4281   VarianceParams(6, 4, &aom_highbd_12_variance64x16_sve, 12),
4282   VarianceParams(4, 6, &aom_highbd_12_variance16x64_sve, 12),
4283   VarianceParams(5, 3, &aom_highbd_12_variance32x8_sve, 12),
4284   VarianceParams(3, 5, &aom_highbd_12_variance8x32_sve, 12),
4285   VarianceParams(4, 2, &aom_highbd_12_variance16x4_sve, 12),
4286   VarianceParams(2, 4, &aom_highbd_12_variance4x16_sve, 12),
4287   VarianceParams(6, 4, &aom_highbd_10_variance64x16_sve, 10),
4288   VarianceParams(4, 6, &aom_highbd_10_variance16x64_sve, 10),
4289   VarianceParams(5, 3, &aom_highbd_10_variance32x8_sve, 10),
4290   VarianceParams(3, 5, &aom_highbd_10_variance8x32_sve, 10),
4291   VarianceParams(4, 2, &aom_highbd_10_variance16x4_sve, 10),
4292   VarianceParams(2, 4, &aom_highbd_10_variance4x16_sve, 10),
4293   VarianceParams(6, 4, &aom_highbd_8_variance64x16_sve, 8),
4294   VarianceParams(4, 6, &aom_highbd_8_variance16x64_sve, 8),
4295   VarianceParams(5, 3, &aom_highbd_8_variance32x8_sve, 8),
4296   VarianceParams(3, 5, &aom_highbd_8_variance8x32_sve, 8),
4297   VarianceParams(4, 2, &aom_highbd_8_variance16x4_sve, 8),
4298   VarianceParams(2, 4, &aom_highbd_8_variance4x16_sve, 8),
4299 #endif
4300 };
4301 
4302 INSTANTIATE_TEST_SUITE_P(SVE, AvxHBDVarianceTest,
4303                          ::testing::ValuesIn(kArrayHBDVariance_sve));
4304 
4305 #endif  // CONFIG_AV1_HIGHBITDEPTH
4306 #endif  // HAVE_SVE
4307 
4308 }  // namespace
4309