1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <cstdlib>
13 #include <new>
14 #include <ostream>
15 #include <tuple>
16
17 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
18
19 #include "config/aom_config.h"
20 #include "config/aom_dsp_rtcd.h"
21
22 #include "test/acm_random.h"
23 #include "test/register_state_check.h"
24 #include "aom/aom_codec.h"
25 #include "aom/aom_integer.h"
26 #include "aom_mem/aom_mem.h"
27 #include "aom_ports/aom_timer.h"
28 #include "aom_ports/mem.h"
29 #include "av1/common/cdef_block.h"
30
31 namespace {
32
33 typedef uint64_t (*MseWxH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
34 int sstride, int w, int h);
35 typedef uint64_t (*Mse16xH16bitFunc)(uint8_t *dst, int dstride, uint16_t *src,
36 int w, int h);
37 typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
38 const uint8_t *b, int b_stride,
39 unsigned int *sse);
40 typedef void (*GetSseSum8x8QuadFunc)(const uint8_t *a, int a_stride,
41 const uint8_t *b, int b_stride,
42 uint32_t *sse8x8, int *sum8x8,
43 unsigned int *tot_sse, int *tot_sum,
44 uint32_t *var8x8);
45 typedef void (*GetSseSum16x16DualFunc)(const uint8_t *a, int a_stride,
46 const uint8_t *b, int b_stride,
47 uint32_t *sse16x16,
48 unsigned int *tot_sse, int *tot_sum,
49 uint32_t *var16x16);
50 typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
51 int xoffset, int yoffset,
52 const uint8_t *b, int b_stride,
53 unsigned int *sse);
54 typedef unsigned int (*SubpixAvgVarMxNFunc)(const uint8_t *a, int a_stride,
55 int xoffset, int yoffset,
56 const uint8_t *b, int b_stride,
57 uint32_t *sse,
58 const uint8_t *second_pred);
59 typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src);
60 typedef unsigned int (*DistWtdSubpixAvgVarMxNFunc)(
61 const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b,
62 int b_stride, uint32_t *sse, const uint8_t *second_pred,
63 const DIST_WTD_COMP_PARAMS *jcp_param);
64
65 #if !CONFIG_REALTIME_ONLY
66 typedef uint32_t (*ObmcSubpelVarFunc)(const uint8_t *pre, int pre_stride,
67 int xoffset, int yoffset,
68 const int32_t *wsrc, const int32_t *mask,
69 unsigned int *sse);
70 #endif
71
72 using libaom_test::ACMRandom;
73
74 // Truncate high bit depth results by downshifting (with rounding) by:
75 // 2 * (bit_depth - 8) for sse
76 // (bit_depth - 8) for se
RoundHighBitDepth(int bit_depth,int64_t * se,uint64_t * sse)77 static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
78 switch (bit_depth) {
79 case AOM_BITS_12:
80 *sse = (*sse + 128) >> 8;
81 *se = (*se + 8) >> 4;
82 break;
83 case AOM_BITS_10:
84 *sse = (*sse + 8) >> 4;
85 *se = (*se + 2) >> 2;
86 break;
87 case AOM_BITS_8:
88 default: break;
89 }
90 }
91
mb_ss_ref(const int16_t * src)92 static unsigned int mb_ss_ref(const int16_t *src) {
93 unsigned int res = 0;
94 for (int i = 0; i < 256; ++i) {
95 res += src[i] * src[i];
96 }
97 return res;
98 }
99
100 /* Note:
101 * Our codebase calculates the "diff" value in the variance algorithm by
102 * (src - ref).
103 */
variance_ref(const uint8_t * src,const uint8_t * ref,int l2w,int l2h,int src_stride,int ref_stride,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)104 static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w,
105 int l2h, int src_stride, int ref_stride,
106 uint32_t *sse_ptr, bool use_high_bit_depth_,
107 aom_bit_depth_t bit_depth) {
108 int64_t se = 0;
109 uint64_t sse = 0;
110 const int w = 1 << l2w;
111 const int h = 1 << l2h;
112 for (int y = 0; y < h; y++) {
113 for (int x = 0; x < w; x++) {
114 int diff;
115 if (!use_high_bit_depth_) {
116 diff = src[y * src_stride + x] - ref[y * ref_stride + x];
117 se += diff;
118 sse += diff * diff;
119 } else {
120 diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] -
121 CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x];
122 se += diff;
123 sse += diff * diff;
124 }
125 }
126 }
127 RoundHighBitDepth(bit_depth, &se, &sse);
128 *sse_ptr = static_cast<uint32_t>(sse);
129 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
130 }
131
132 /* The subpel reference functions differ from the codec version in one aspect:
133 * they calculate the bilinear factors directly instead of using a lookup table
134 * and therefore upshift xoff and yoff by 1. Only every other calculated value
135 * is used so the codec version shrinks the table to save space.
136 */
subpel_variance_ref(const uint8_t * ref,const uint8_t * src,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)137 static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
138 int l2w, int l2h, int xoff, int yoff,
139 uint32_t *sse_ptr, bool use_high_bit_depth_,
140 aom_bit_depth_t bit_depth) {
141 int64_t se = 0;
142 uint64_t sse = 0;
143 const int w = 1 << l2w;
144 const int h = 1 << l2h;
145
146 xoff <<= 1;
147 yoff <<= 1;
148
149 for (int y = 0; y < h; y++) {
150 for (int x = 0; x < w; x++) {
151 // Bilinear interpolation at a 16th pel step.
152 if (!use_high_bit_depth_) {
153 const int a1 = ref[(w + 1) * (y + 0) + x + 0];
154 const int a2 = ref[(w + 1) * (y + 0) + x + 1];
155 const int b1 = ref[(w + 1) * (y + 1) + x + 0];
156 const int b2 = ref[(w + 1) * (y + 1) + x + 1];
157 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
158 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
159 const int r = a + (((b - a) * yoff + 8) >> 4);
160 const int diff = r - src[w * y + x];
161 se += diff;
162 sse += diff * diff;
163 } else {
164 uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
165 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
166 const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
167 const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
168 const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
169 const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
170 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
171 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
172 const int r = a + (((b - a) * yoff + 8) >> 4);
173 const int diff = r - src16[w * y + x];
174 se += diff;
175 sse += diff * diff;
176 }
177 }
178 }
179 RoundHighBitDepth(bit_depth, &se, &sse);
180 *sse_ptr = static_cast<uint32_t>(sse);
181 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
182 }
183
subpel_avg_variance_ref(const uint8_t * ref,const uint8_t * src,const uint8_t * second_pred,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth,aom_bit_depth_t bit_depth)184 static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src,
185 const uint8_t *second_pred, int l2w,
186 int l2h, int xoff, int yoff,
187 uint32_t *sse_ptr,
188 bool use_high_bit_depth,
189 aom_bit_depth_t bit_depth) {
190 int64_t se = 0;
191 uint64_t sse = 0;
192 const int w = 1 << l2w;
193 const int h = 1 << l2h;
194
195 xoff <<= 1;
196 yoff <<= 1;
197
198 for (int y = 0; y < h; y++) {
199 for (int x = 0; x < w; x++) {
200 // bilinear interpolation at a 16th pel step
201 if (!use_high_bit_depth) {
202 const int a1 = ref[(w + 1) * (y + 0) + x + 0];
203 const int a2 = ref[(w + 1) * (y + 0) + x + 1];
204 const int b1 = ref[(w + 1) * (y + 1) + x + 0];
205 const int b2 = ref[(w + 1) * (y + 1) + x + 1];
206 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
207 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
208 const int r = a + (((b - a) * yoff + 8) >> 4);
209 const int diff =
210 ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
211 se += diff;
212 sse += diff * diff;
213 } else {
214 const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
215 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
216 const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
217 const int a1 = ref16[(w + 1) * (y + 0) + x + 0];
218 const int a2 = ref16[(w + 1) * (y + 0) + x + 1];
219 const int b1 = ref16[(w + 1) * (y + 1) + x + 0];
220 const int b2 = ref16[(w + 1) * (y + 1) + x + 1];
221 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
222 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
223 const int r = a + (((b - a) * yoff + 8) >> 4);
224 const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
225 se += diff;
226 sse += diff * diff;
227 }
228 }
229 }
230 RoundHighBitDepth(bit_depth, &se, &sse);
231 *sse_ptr = static_cast<uint32_t>(sse);
232 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
233 }
234
dist_wtd_subpel_avg_variance_ref(const uint8_t * ref,const uint8_t * src,const uint8_t * second_pred,int l2w,int l2h,int xoff,int yoff,uint32_t * sse_ptr,bool use_high_bit_depth,aom_bit_depth_t bit_depth,DIST_WTD_COMP_PARAMS * jcp_param)235 static uint32_t dist_wtd_subpel_avg_variance_ref(
236 const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w,
237 int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth,
238 aom_bit_depth_t bit_depth, DIST_WTD_COMP_PARAMS *jcp_param) {
239 int64_t se = 0;
240 uint64_t sse = 0;
241 const int w = 1 << l2w;
242 const int h = 1 << l2h;
243
244 xoff <<= 1;
245 yoff <<= 1;
246
247 for (int y = 0; y < h; y++) {
248 for (int x = 0; x < w; x++) {
249 // bilinear interpolation at a 16th pel step
250 if (!use_high_bit_depth) {
251 const int a1 = ref[(w + 0) * (y + 0) + x + 0];
252 const int a2 = ref[(w + 0) * (y + 0) + x + 1];
253 const int b1 = ref[(w + 0) * (y + 1) + x + 0];
254 const int b2 = ref[(w + 0) * (y + 1) + x + 1];
255 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
256 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
257 const int r = a + (((b - a) * yoff + 8) >> 4);
258 const int avg = ROUND_POWER_OF_TWO(
259 r * jcp_param->fwd_offset +
260 second_pred[w * y + x] * jcp_param->bck_offset,
261 DIST_PRECISION_BITS);
262 const int diff = avg - src[w * y + x];
263
264 se += diff;
265 sse += diff * diff;
266 } else {
267 const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref);
268 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
269 const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred);
270 const int a1 = ref16[(w + 0) * (y + 0) + x + 0];
271 const int a2 = ref16[(w + 0) * (y + 0) + x + 1];
272 const int b1 = ref16[(w + 0) * (y + 1) + x + 0];
273 const int b2 = ref16[(w + 0) * (y + 1) + x + 1];
274 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
275 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
276 const int r = a + (((b - a) * yoff + 8) >> 4);
277 const int avg =
278 ROUND_POWER_OF_TWO(r * jcp_param->fwd_offset +
279 sec16[w * y + x] * jcp_param->bck_offset,
280 DIST_PRECISION_BITS);
281 const int diff = avg - src16[w * y + x];
282
283 se += diff;
284 sse += diff * diff;
285 }
286 }
287 }
288 RoundHighBitDepth(bit_depth, &se, &sse);
289 *sse_ptr = static_cast<uint32_t>(sse);
290 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
291 }
292
293 #if !CONFIG_REALTIME_ONLY
obmc_subpel_variance_ref(const uint8_t * pre,int l2w,int l2h,int xoff,int yoff,const int32_t * wsrc,const int32_t * mask,uint32_t * sse_ptr,bool use_high_bit_depth_,aom_bit_depth_t bit_depth)294 static uint32_t obmc_subpel_variance_ref(const uint8_t *pre, int l2w, int l2h,
295 int xoff, int yoff,
296 const int32_t *wsrc,
297 const int32_t *mask, uint32_t *sse_ptr,
298 bool use_high_bit_depth_,
299 aom_bit_depth_t bit_depth) {
300 int64_t se = 0;
301 uint64_t sse = 0;
302 const int w = 1 << l2w;
303 const int h = 1 << l2h;
304
305 xoff <<= 1;
306 yoff <<= 1;
307
308 for (int y = 0; y < h; y++) {
309 for (int x = 0; x < w; x++) {
310 // Bilinear interpolation at a 16th pel step.
311 if (!use_high_bit_depth_) {
312 const int a1 = pre[(w + 1) * (y + 0) + x + 0];
313 const int a2 = pre[(w + 1) * (y + 0) + x + 1];
314 const int b1 = pre[(w + 1) * (y + 1) + x + 0];
315 const int b2 = pre[(w + 1) * (y + 1) + x + 1];
316 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
317 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
318 const int r = a + (((b - a) * yoff + 8) >> 4);
319 const int diff = ROUND_POWER_OF_TWO_SIGNED(
320 wsrc[w * y + x] - r * mask[w * y + x], 12);
321 se += diff;
322 sse += diff * diff;
323 } else {
324 uint16_t *pre16 = CONVERT_TO_SHORTPTR(pre);
325 const int a1 = pre16[(w + 1) * (y + 0) + x + 0];
326 const int a2 = pre16[(w + 1) * (y + 0) + x + 1];
327 const int b1 = pre16[(w + 1) * (y + 1) + x + 0];
328 const int b2 = pre16[(w + 1) * (y + 1) + x + 1];
329 const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
330 const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
331 const int r = a + (((b - a) * yoff + 8) >> 4);
332 const int diff = ROUND_POWER_OF_TWO_SIGNED(
333 wsrc[w * y + x] - r * mask[w * y + x], 12);
334 se += diff;
335 sse += diff * diff;
336 }
337 }
338 }
339 RoundHighBitDepth(bit_depth, &se, &sse);
340 *sse_ptr = static_cast<uint32_t>(sse);
341 return static_cast<uint32_t>(sse - ((se * se) >> (l2w + l2h)));
342 }
343 #endif
344
345 ////////////////////////////////////////////////////////////////////////////////
346
347 class SumOfSquaresTest : public ::testing::TestWithParam<SumOfSquaresFunction> {
348 public:
SumOfSquaresTest()349 SumOfSquaresTest() : func_(GetParam()) {}
350
351 ~SumOfSquaresTest() override = default;
352
353 protected:
354 void ConstTest();
355 void RefTest();
356
357 SumOfSquaresFunction func_;
358 ACMRandom rnd_;
359 };
360
ConstTest()361 void SumOfSquaresTest::ConstTest() {
362 int16_t mem[256];
363 unsigned int res;
364 for (int v = 0; v < 256; ++v) {
365 for (int i = 0; i < 256; ++i) {
366 mem[i] = v;
367 }
368 API_REGISTER_STATE_CHECK(res = func_(mem));
369 EXPECT_EQ(256u * (v * v), res);
370 }
371 }
372
RefTest()373 void SumOfSquaresTest::RefTest() {
374 int16_t mem[256];
375 for (int i = 0; i < 100; ++i) {
376 for (int j = 0; j < 256; ++j) {
377 mem[j] = rnd_.Rand8() - rnd_.Rand8();
378 }
379
380 const unsigned int expected = mb_ss_ref(mem);
381 unsigned int res;
382 API_REGISTER_STATE_CHECK(res = func_(mem));
383 EXPECT_EQ(expected, res);
384 }
385 }
386
387 ////////////////////////////////////////////////////////////////////////////////
388 // Encapsulating struct to store the function to test along with
389 // some testing context.
390 // Can be used for MSE, SSE, Variance, etc.
391
392 template <typename Func>
393 struct TestParams {
TestParams__anon1c7945d30111::TestParams394 TestParams(int log2w = 0, int log2h = 0, Func function = nullptr,
395 int bit_depth_value = 0)
396 : log2width(log2w), log2height(log2h), func(function) {
397 use_high_bit_depth = (bit_depth_value > 0);
398 if (use_high_bit_depth) {
399 bit_depth = static_cast<aom_bit_depth_t>(bit_depth_value);
400 } else {
401 bit_depth = AOM_BITS_8;
402 }
403 width = 1 << log2width;
404 height = 1 << log2height;
405 block_size = width * height;
406 mask = (1u << bit_depth) - 1;
407 }
408
409 int log2width, log2height;
410 int width, height;
411 int block_size;
412 Func func;
413 aom_bit_depth_t bit_depth;
414 bool use_high_bit_depth;
415 uint32_t mask;
416 };
417
418 template <typename Func>
operator <<(std::ostream & os,const TestParams<Func> & p)419 std::ostream &operator<<(std::ostream &os, const TestParams<Func> &p) {
420 return os << "width/height:" << p.width << "/" << p.height
421 << " function:" << reinterpret_cast<const void *>(p.func)
422 << " bit-depth:" << p.bit_depth;
423 }
424
425 // Main class for testing a function type
426 template <typename FunctionType>
427 class MseWxHTestClass
428 : public ::testing::TestWithParam<TestParams<FunctionType> > {
429 public:
SetUp()430 void SetUp() override {
431 params_ = this->GetParam();
432
433 rnd_.Reset(ACMRandom::DeterministicSeed());
434 src_ = reinterpret_cast<uint16_t *>(
435 aom_memalign(16, block_size() * sizeof(src_)));
436 dst_ = reinterpret_cast<uint8_t *>(
437 aom_memalign(16, block_size() * sizeof(dst_)));
438 ASSERT_NE(src_, nullptr);
439 ASSERT_NE(dst_, nullptr);
440 }
441
TearDown()442 void TearDown() override {
443 aom_free(src_);
444 aom_free(dst_);
445 src_ = nullptr;
446 dst_ = nullptr;
447 }
448
449 protected:
450 void RefMatchTestMse();
451 void SpeedTest();
452
453 protected:
454 ACMRandom rnd_;
455 uint8_t *dst_;
456 uint16_t *src_;
457 TestParams<FunctionType> params_;
458
459 // some relay helpers
block_size() const460 int block_size() const { return params_.block_size; }
width() const461 int width() const { return params_.width; }
height() const462 int height() const { return params_.height; }
d_stride() const463 int d_stride() const { return params_.width; } // stride is same as width
s_stride() const464 int s_stride() const { return params_.width; } // stride is same as width
465 };
466
467 template <typename MseWxHFunctionType>
SpeedTest()468 void MseWxHTestClass<MseWxHFunctionType>::SpeedTest() {
469 aom_usec_timer ref_timer, test_timer;
470 double elapsed_time_c = 0;
471 double elapsed_time_simd = 0;
472 int run_time = 10000000;
473 int w = width();
474 int h = height();
475 int dstride = d_stride();
476 int sstride = s_stride();
477
478 for (int k = 0; k < block_size(); ++k) {
479 dst_[k] = rnd_.Rand8();
480 src_[k] = rnd_.Rand8();
481 }
482 aom_usec_timer_start(&ref_timer);
483 for (int i = 0; i < run_time; i++) {
484 aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h);
485 }
486 aom_usec_timer_mark(&ref_timer);
487 elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
488
489 aom_usec_timer_start(&test_timer);
490 for (int i = 0; i < run_time; i++) {
491 params_.func(dst_, dstride, src_, sstride, w, h);
492 }
493 aom_usec_timer_mark(&test_timer);
494 elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
495
496 printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
497 elapsed_time_c, elapsed_time_simd,
498 (elapsed_time_c / elapsed_time_simd));
499 }
500
501 template <typename MseWxHFunctionType>
RefMatchTestMse()502 void MseWxHTestClass<MseWxHFunctionType>::RefMatchTestMse() {
503 uint64_t mse_ref = 0;
504 uint64_t mse_mod = 0;
505 int w = width();
506 int h = height();
507 int dstride = d_stride();
508 int sstride = s_stride();
509
510 for (int i = 0; i < 10; i++) {
511 for (int k = 0; k < block_size(); ++k) {
512 dst_[k] = rnd_.Rand8();
513 src_[k] = rnd_.Rand8();
514 }
515 API_REGISTER_STATE_CHECK(
516 mse_ref = aom_mse_wxh_16bit_c(dst_, dstride, src_, sstride, w, h));
517 API_REGISTER_STATE_CHECK(
518 mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
519 EXPECT_EQ(mse_ref, mse_mod)
520 << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
521 }
522 }
523
524 template <typename FunctionType>
525 class Mse16xHTestClass
526 : public ::testing::TestWithParam<TestParams<FunctionType> > {
527 public:
528 // Memory required to compute mse of two 8x8 and four 4x4 blocks assigned for
529 // maximum width 16 and maximum height 8.
530 int mem_size = 16 * 8;
SetUp()531 void SetUp() override {
532 params_ = this->GetParam();
533 rnd_.Reset(ACMRandom::DeterministicSeed());
534 src_ = reinterpret_cast<uint16_t *>(
535 aom_memalign(16, mem_size * sizeof(*src_)));
536 dst_ =
537 reinterpret_cast<uint8_t *>(aom_memalign(16, mem_size * sizeof(*dst_)));
538 ASSERT_NE(src_, nullptr);
539 ASSERT_NE(dst_, nullptr);
540 }
541
TearDown()542 void TearDown() override {
543 aom_free(src_);
544 aom_free(dst_);
545 src_ = nullptr;
546 dst_ = nullptr;
547 }
548
RandBool()549 uint8_t RandBool() {
550 const uint32_t value = rnd_.Rand8();
551 return (value & 0x1);
552 }
553
554 protected:
555 void RefMatchExtremeTestMse();
556 void RefMatchTestMse();
557 void SpeedTest();
558
559 protected:
560 ACMRandom rnd_;
561 uint8_t *dst_;
562 uint16_t *src_;
563 TestParams<FunctionType> params_;
564
565 // some relay helpers
width() const566 int width() const { return params_.width; }
height() const567 int height() const { return params_.height; }
d_stride() const568 int d_stride() const { return params_.width; }
569 };
570
571 template <typename Mse16xHFunctionType>
SpeedTest()572 void Mse16xHTestClass<Mse16xHFunctionType>::SpeedTest() {
573 aom_usec_timer ref_timer, test_timer;
574 double elapsed_time_c = 0.0;
575 double elapsed_time_simd = 0.0;
576 const int loop_count = 10000000;
577 const int w = width();
578 const int h = height();
579 const int dstride = d_stride();
580
581 for (int k = 0; k < mem_size; ++k) {
582 dst_[k] = rnd_.Rand8();
583 // Right shift by 6 is done to generate more input in range of [0,255] than
584 // CDEF_VERY_LARGE
585 int rnd_i10 = rnd_.Rand16() >> 6;
586 src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
587 }
588
589 aom_usec_timer_start(&ref_timer);
590 for (int i = 0; i < loop_count; i++) {
591 aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h);
592 }
593 aom_usec_timer_mark(&ref_timer);
594 elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
595
596 aom_usec_timer_start(&test_timer);
597 for (int i = 0; i < loop_count; i++) {
598 params_.func(dst_, dstride, src_, w, h);
599 }
600 aom_usec_timer_mark(&test_timer);
601 elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
602
603 printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%.31f\n", width(),
604 height(), elapsed_time_c, elapsed_time_simd,
605 (elapsed_time_c / elapsed_time_simd));
606 }
607
608 template <typename Mse16xHFunctionType>
RefMatchTestMse()609 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchTestMse() {
610 uint64_t mse_ref = 0;
611 uint64_t mse_mod = 0;
612 const int w = width();
613 const int h = height();
614 const int dstride = d_stride();
615
616 for (int i = 0; i < 10; i++) {
617 for (int k = 0; k < mem_size; ++k) {
618 dst_[k] = rnd_.Rand8();
619 // Right shift by 6 is done to generate more input in range of [0,255]
620 // than CDEF_VERY_LARGE
621 int rnd_i10 = rnd_.Rand16() >> 6;
622 src_[k] = (rnd_i10 < 256) ? rnd_i10 : CDEF_VERY_LARGE;
623 }
624
625 API_REGISTER_STATE_CHECK(
626 mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
627 API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
628 EXPECT_EQ(mse_ref, mse_mod)
629 << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
630 }
631 }
632
633 template <typename Mse16xHFunctionType>
RefMatchExtremeTestMse()634 void Mse16xHTestClass<Mse16xHFunctionType>::RefMatchExtremeTestMse() {
635 uint64_t mse_ref = 0;
636 uint64_t mse_mod = 0;
637 const int w = width();
638 const int h = height();
639 const int dstride = d_stride();
640 const int iter = 10;
641
642 // Fill the buffers with extreme values
643 for (int i = 0; i < iter; i++) {
644 for (int k = 0; k < mem_size; ++k) {
645 dst_[k] = static_cast<uint8_t>(RandBool() ? 0 : 255);
646 src_[k] = static_cast<uint16_t>(RandBool() ? 0 : CDEF_VERY_LARGE);
647 }
648
649 API_REGISTER_STATE_CHECK(
650 mse_ref = aom_mse_16xh_16bit_c(dst_, dstride, src_, w, h));
651 API_REGISTER_STATE_CHECK(mse_mod = params_.func(dst_, dstride, src_, w, h));
652 EXPECT_EQ(mse_ref, mse_mod)
653 << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
654 }
655 }
656
657 // Main class for testing a function type
658 template <typename FunctionType>
659 class MainTestClass
660 : public ::testing::TestWithParam<TestParams<FunctionType> > {
661 public:
SetUp()662 void SetUp() override {
663 params_ = this->GetParam();
664
665 rnd_.Reset(ACMRandom::DeterministicSeed());
666 const size_t unit =
667 use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t);
668 src_ = reinterpret_cast<uint8_t *>(aom_memalign(16, block_size() * unit));
669 ref_ = new uint8_t[block_size() * unit];
670 ASSERT_NE(src_, nullptr);
671 ASSERT_NE(ref_, nullptr);
672 memset(src_, 0, block_size() * sizeof(src_[0]));
673 memset(ref_, 0, block_size() * sizeof(ref_[0]));
674 if (use_high_bit_depth()) {
675 // TODO(skal): remove!
676 src_ = CONVERT_TO_BYTEPTR(src_);
677 ref_ = CONVERT_TO_BYTEPTR(ref_);
678 }
679 }
680
TearDown()681 void TearDown() override {
682 if (use_high_bit_depth()) {
683 // TODO(skal): remove!
684 src_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(src_));
685 ref_ = reinterpret_cast<uint8_t *>(CONVERT_TO_SHORTPTR(ref_));
686 }
687
688 aom_free(src_);
689 delete[] ref_;
690 src_ = nullptr;
691 ref_ = nullptr;
692 }
693
694 protected:
695 // We could sub-class MainTestClass into dedicated class for Variance
696 // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing
697 // to access top class fields xxx. That's cumbersome, so for now we'll just
698 // implement the testing methods here:
699
700 // Variance tests
701 void ZeroTest();
702 void RefTest();
703 void RefStrideTest();
704 void OneQuarterTest();
705 void SpeedTest();
706
707 // SSE&SUM tests
708 void RefTestSseSum();
709 void MinTestSseSum();
710 void MaxTestSseSum();
711 void SseSum_SpeedTest();
712
713 // SSE&SUM dual tests
714 void RefTestSseSumDual();
715 void MinTestSseSumDual();
716 void MaxTestSseSumDual();
717 void SseSum_SpeedTestDual();
718
719 // MSE/SSE tests
720 void RefTestMse();
721 void RefTestSse();
722 void MaxTestMse();
723 void MaxTestSse();
724
725 protected:
726 ACMRandom rnd_;
727 uint8_t *src_;
728 uint8_t *ref_;
729 TestParams<FunctionType> params_;
730
731 // some relay helpers
use_high_bit_depth() const732 bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const733 int byte_shift() const { return params_.bit_depth - 8; }
block_size() const734 int block_size() const { return params_.block_size; }
width() const735 int width() const { return params_.width; }
height() const736 int height() const { return params_.height; }
mask() const737 uint32_t mask() const { return params_.mask; }
738 };
739
740 ////////////////////////////////////////////////////////////////////////////////
741 // Tests related to variance.
742
743 template <typename VarianceFunctionType>
ZeroTest()744 void MainTestClass<VarianceFunctionType>::ZeroTest() {
745 for (int i = 0; i <= 255; ++i) {
746 if (!use_high_bit_depth()) {
747 memset(src_, i, block_size());
748 } else {
749 uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_);
750 for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift();
751 }
752 for (int j = 0; j <= 255; ++j) {
753 if (!use_high_bit_depth()) {
754 memset(ref_, j, block_size());
755 } else {
756 uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_);
757 for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift();
758 }
759 unsigned int sse, var;
760 API_REGISTER_STATE_CHECK(
761 var = params_.func(src_, width(), ref_, width(), &sse));
762 EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j;
763 }
764 }
765 }
766
767 template <typename VarianceFunctionType>
RefTest()768 void MainTestClass<VarianceFunctionType>::RefTest() {
769 for (int i = 0; i < 10; ++i) {
770 for (int j = 0; j < block_size(); j++) {
771 if (!use_high_bit_depth()) {
772 src_[j] = rnd_.Rand8();
773 ref_[j] = rnd_.Rand8();
774 } else {
775 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
776 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
777 }
778 }
779 unsigned int sse1, sse2, var1, var2;
780 const int stride = width();
781 API_REGISTER_STATE_CHECK(
782 var1 = params_.func(src_, stride, ref_, stride, &sse1));
783 var2 =
784 variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
785 stride, &sse2, use_high_bit_depth(), params_.bit_depth);
786 EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
787 EXPECT_EQ(var1, var2) << "Error at test index: " << i;
788 }
789 }
790
791 template <typename VarianceFunctionType>
RefStrideTest()792 void MainTestClass<VarianceFunctionType>::RefStrideTest() {
793 for (int i = 0; i < 10; ++i) {
794 const int ref_stride = (i & 1) * width();
795 const int src_stride = ((i >> 1) & 1) * width();
796 for (int j = 0; j < block_size(); j++) {
797 const int ref_ind = (j / width()) * ref_stride + j % width();
798 const int src_ind = (j / width()) * src_stride + j % width();
799 if (!use_high_bit_depth()) {
800 src_[src_ind] = rnd_.Rand8();
801 ref_[ref_ind] = rnd_.Rand8();
802 } else {
803 CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask();
804 CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask();
805 }
806 }
807 unsigned int sse1, sse2;
808 unsigned int var1, var2;
809
810 API_REGISTER_STATE_CHECK(
811 var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1));
812 var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height,
813 src_stride, ref_stride, &sse2, use_high_bit_depth(),
814 params_.bit_depth);
815 EXPECT_EQ(sse1, sse2) << "Error at test index: " << i;
816 EXPECT_EQ(var1, var2) << "Error at test index: " << i;
817 }
818 }
819
820 template <typename VarianceFunctionType>
OneQuarterTest()821 void MainTestClass<VarianceFunctionType>::OneQuarterTest() {
822 const int half = block_size() / 2;
823 if (!use_high_bit_depth()) {
824 memset(src_, 255, block_size());
825 memset(ref_, 255, half);
826 memset(ref_ + half, 0, half);
827 } else {
828 aom_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size());
829 aom_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half);
830 aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
831 }
832 unsigned int sse, var, expected;
833 API_REGISTER_STATE_CHECK(
834 var = params_.func(src_, width(), ref_, width(), &sse));
835 expected = block_size() * 255 * 255 / 4;
836 EXPECT_EQ(expected, var);
837 }
838
839 template <typename VarianceFunctionType>
SpeedTest()840 void MainTestClass<VarianceFunctionType>::SpeedTest() {
841 for (int j = 0; j < block_size(); j++) {
842 if (!use_high_bit_depth()) {
843 src_[j] = rnd_.Rand8();
844 ref_[j] = rnd_.Rand8();
845 #if CONFIG_AV1_HIGHBITDEPTH
846 } else {
847 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
848 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
849 #endif // CONFIG_AV1_HIGHBITDEPTH
850 }
851 }
852 unsigned int sse;
853 const int stride = width();
854 int run_time = 1000000000 / block_size();
855 aom_usec_timer timer;
856 aom_usec_timer_start(&timer);
857 for (int i = 0; i < run_time; ++i) {
858 params_.func(src_, stride, ref_, stride, &sse);
859 }
860
861 aom_usec_timer_mark(&timer);
862 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
863 printf("Variance %dx%d : %d us\n", width(), height(), elapsed_time);
864 }
865
866 template <typename GetSseSum8x8QuadFuncType>
RefTestSseSum()867 void MainTestClass<GetSseSum8x8QuadFuncType>::RefTestSseSum() {
868 for (int i = 0; i < 10; ++i) {
869 for (int j = 0; j < block_size(); ++j) {
870 src_[j] = rnd_.Rand8();
871 ref_[j] = rnd_.Rand8();
872 }
873 unsigned int sse1[256] = { 0 };
874 unsigned int sse2[256] = { 0 };
875 unsigned int var1[256] = { 0 };
876 unsigned int var2[256] = { 0 };
877 int sum1[256] = { 0 };
878 int sum2[256] = { 0 };
879 unsigned int sse_tot_c = 0;
880 unsigned int sse_tot_simd = 0;
881 int sum_tot_c = 0;
882 int sum_tot_simd = 0;
883 const int stride = width();
884 int k = 0;
885
886 for (int row = 0; row < height(); row += 8) {
887 for (int col = 0; col < width(); col += 32) {
888 API_REGISTER_STATE_CHECK(params_.func(src_ + stride * row + col, stride,
889 ref_ + stride * row + col, stride,
890 &sse1[k], &sum1[k], &sse_tot_simd,
891 &sum_tot_simd, &var1[k]));
892 aom_get_var_sse_sum_8x8_quad_c(
893 src_ + stride * row + col, stride, ref_ + stride * row + col,
894 stride, &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
895 k += 4;
896 }
897 }
898 EXPECT_EQ(sse_tot_c, sse_tot_simd);
899 EXPECT_EQ(sum_tot_c, sum_tot_simd);
900 for (int p = 0; p < 256; p++) {
901 EXPECT_EQ(sse1[p], sse2[p]);
902 EXPECT_EQ(sum1[p], sum2[p]);
903 EXPECT_EQ(var1[p], var2[p]);
904 }
905 }
906 }
907
908 template <typename GetSseSum8x8QuadFuncType>
MinTestSseSum()909 void MainTestClass<GetSseSum8x8QuadFuncType>::MinTestSseSum() {
910 memset(src_, 0, block_size());
911 memset(ref_, 255, block_size());
912 unsigned int sse1[256] = { 0 };
913 unsigned int sse2[256] = { 0 };
914 unsigned int var1[256] = { 0 };
915 unsigned int var2[256] = { 0 };
916 int sum1[256] = { 0 };
917 int sum2[256] = { 0 };
918 unsigned int sse_tot_c = 0;
919 unsigned int sse_tot_simd = 0;
920 int sum_tot_c = 0;
921 int sum_tot_simd = 0;
922 const int stride = width();
923 int k = 0;
924
925 for (int i = 0; i < height(); i += 8) {
926 for (int j = 0; j < width(); j += 32) {
927 API_REGISTER_STATE_CHECK(params_.func(
928 src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
929 &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
930 aom_get_var_sse_sum_8x8_quad_c(
931 src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
932 &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
933 k += 4;
934 }
935 }
936 EXPECT_EQ(sse_tot_simd, sse_tot_c);
937 EXPECT_EQ(sum_tot_simd, sum_tot_c);
938 for (int p = 0; p < 256; p++) {
939 EXPECT_EQ(sse1[p], sse2[p]);
940 EXPECT_EQ(sum1[p], sum2[p]);
941 EXPECT_EQ(var1[p], var2[p]);
942 }
943 }
944
945 template <typename GetSseSum8x8QuadFuncType>
MaxTestSseSum()946 void MainTestClass<GetSseSum8x8QuadFuncType>::MaxTestSseSum() {
947 memset(src_, 255, block_size());
948 memset(ref_, 0, block_size());
949 unsigned int sse1[256] = { 0 };
950 unsigned int sse2[256] = { 0 };
951 unsigned int var1[256] = { 0 };
952 unsigned int var2[256] = { 0 };
953 int sum1[256] = { 0 };
954 int sum2[256] = { 0 };
955 unsigned int sse_tot_c = 0;
956 unsigned int sse_tot_simd = 0;
957 int sum_tot_c = 0;
958 int sum_tot_simd = 0;
959 const int stride = width();
960 int k = 0;
961
962 for (int i = 0; i < height(); i += 8) {
963 for (int j = 0; j < width(); j += 32) {
964 API_REGISTER_STATE_CHECK(params_.func(
965 src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
966 &sse1[k], &sum1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
967 aom_get_var_sse_sum_8x8_quad_c(
968 src_ + stride * i + j, stride, ref_ + stride * i + j, stride,
969 &sse2[k], &sum2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
970 k += 4;
971 }
972 }
973 EXPECT_EQ(sse_tot_c, sse_tot_simd);
974 EXPECT_EQ(sum_tot_c, sum_tot_simd);
975
976 for (int p = 0; p < 256; p++) {
977 EXPECT_EQ(sse1[p], sse2[p]);
978 EXPECT_EQ(sum1[p], sum2[p]);
979 EXPECT_EQ(var1[p], var2[p]);
980 }
981 }
982
983 template <typename GetSseSum8x8QuadFuncType>
SseSum_SpeedTest()984 void MainTestClass<GetSseSum8x8QuadFuncType>::SseSum_SpeedTest() {
985 const int loop_count = 1000000000 / block_size();
986 for (int j = 0; j < block_size(); ++j) {
987 src_[j] = rnd_.Rand8();
988 ref_[j] = rnd_.Rand8();
989 }
990
991 unsigned int sse1[4] = { 0 };
992 unsigned int sse2[4] = { 0 };
993 unsigned int var1[4] = { 0 };
994 unsigned int var2[4] = { 0 };
995 int sum1[4] = { 0 };
996 int sum2[4] = { 0 };
997 unsigned int sse_tot_c = 0;
998 unsigned int sse_tot_simd = 0;
999 int sum_tot_c = 0;
1000 int sum_tot_simd = 0;
1001 const int stride = width();
1002
1003 aom_usec_timer timer;
1004 aom_usec_timer_start(&timer);
1005 for (int r = 0; r < loop_count; ++r) {
1006 for (int i = 0; i < height(); i += 8) {
1007 for (int j = 0; j < width(); j += 32) {
1008 aom_get_var_sse_sum_8x8_quad_c(src_ + stride * i + j, stride,
1009 ref_ + stride * i + j, stride, sse2,
1010 sum2, &sse_tot_c, &sum_tot_c, var2);
1011 }
1012 }
1013 }
1014 aom_usec_timer_mark(&timer);
1015 const double elapsed_time_ref =
1016 static_cast<double>(aom_usec_timer_elapsed(&timer));
1017
1018 aom_usec_timer_start(&timer);
1019 for (int r = 0; r < loop_count; ++r) {
1020 for (int i = 0; i < height(); i += 8) {
1021 for (int j = 0; j < width(); j += 32) {
1022 params_.func(src_ + stride * i + j, stride, ref_ + stride * i + j,
1023 stride, sse1, sum1, &sse_tot_simd, &sum_tot_simd, var1);
1024 }
1025 }
1026 }
1027 aom_usec_timer_mark(&timer);
1028 const double elapsed_time_simd =
1029 static_cast<double>(aom_usec_timer_elapsed(&timer));
1030
1031 printf(
1032 "aom_getvar_8x8_quad for block=%dx%d : ref_time=%lf \t simd_time=%lf \t "
1033 "gain=%lf \n",
1034 width(), height(), elapsed_time_ref, elapsed_time_simd,
1035 elapsed_time_ref / elapsed_time_simd);
1036 }
1037
1038 template <typename GetSseSum16x16DualFuncType>
RefTestSseSumDual()1039 void MainTestClass<GetSseSum16x16DualFuncType>::RefTestSseSumDual() {
1040 for (int iter = 0; iter < 10; ++iter) {
1041 for (int idx = 0; idx < block_size(); ++idx) {
1042 src_[idx] = rnd_.Rand8();
1043 ref_[idx] = rnd_.Rand8();
1044 }
1045 unsigned int sse1[64] = { 0 };
1046 unsigned int sse2[64] = { 0 };
1047 unsigned int var1[64] = { 0 };
1048 unsigned int var2[64] = { 0 };
1049 unsigned int sse_tot_c = 0;
1050 unsigned int sse_tot_simd = 0;
1051 int sum_tot_c = 0;
1052 int sum_tot_simd = 0;
1053 const int stride = width();
1054 int k = 0;
1055
1056 for (int row = 0; row < height(); row += 16) {
1057 for (int col = 0; col < width(); col += 32) {
1058 API_REGISTER_STATE_CHECK(params_.func(
1059 src_ + stride * row + col, stride, ref_ + stride * row + col,
1060 stride, &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1061 aom_get_var_sse_sum_16x16_dual_c(
1062 src_ + stride * row + col, stride, ref_ + stride * row + col,
1063 stride, &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1064 k += 2;
1065 }
1066 }
1067 EXPECT_EQ(sse_tot_c, sse_tot_simd);
1068 EXPECT_EQ(sum_tot_c, sum_tot_simd);
1069 for (int p = 0; p < 64; p++) {
1070 EXPECT_EQ(sse1[p], sse2[p]);
1071 EXPECT_EQ(sse_tot_simd, sse_tot_c);
1072 EXPECT_EQ(sum_tot_simd, sum_tot_c);
1073 EXPECT_EQ(var1[p], var2[p]);
1074 }
1075 }
1076 }
1077
1078 template <typename GetSseSum16x16DualFuncType>
MinTestSseSumDual()1079 void MainTestClass<GetSseSum16x16DualFuncType>::MinTestSseSumDual() {
1080 memset(src_, 0, block_size());
1081 memset(ref_, 255, block_size());
1082 unsigned int sse1[64] = { 0 };
1083 unsigned int sse2[64] = { 0 };
1084 unsigned int var1[64] = { 0 };
1085 unsigned int var2[64] = { 0 };
1086 unsigned int sse_tot_c = 0;
1087 unsigned int sse_tot_simd = 0;
1088 int sum_tot_c = 0;
1089 int sum_tot_simd = 0;
1090 const int stride = width();
1091 int k = 0;
1092
1093 for (int row = 0; row < height(); row += 16) {
1094 for (int col = 0; col < width(); col += 32) {
1095 API_REGISTER_STATE_CHECK(params_.func(
1096 src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1097 &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1098 aom_get_var_sse_sum_16x16_dual_c(
1099 src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1100 &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1101 k += 2;
1102 }
1103 }
1104 EXPECT_EQ(sse_tot_simd, sse_tot_c);
1105 EXPECT_EQ(sum_tot_simd, sum_tot_c);
1106 for (int p = 0; p < 64; p++) {
1107 EXPECT_EQ(sse1[p], sse2[p]);
1108 EXPECT_EQ(var1[p], var2[p]);
1109 }
1110 }
1111
1112 template <typename GetSseSum16x16DualFuncType>
MaxTestSseSumDual()1113 void MainTestClass<GetSseSum16x16DualFuncType>::MaxTestSseSumDual() {
1114 memset(src_, 255, block_size());
1115 memset(ref_, 0, block_size());
1116 unsigned int sse1[64] = { 0 };
1117 unsigned int sse2[64] = { 0 };
1118 unsigned int var1[64] = { 0 };
1119 unsigned int var2[64] = { 0 };
1120 unsigned int sse_tot_c = 0;
1121 unsigned int sse_tot_simd = 0;
1122 int sum_tot_c = 0;
1123 int sum_tot_simd = 0;
1124 const int stride = width();
1125 int k = 0;
1126
1127 for (int row = 0; row < height(); row += 16) {
1128 for (int col = 0; col < width(); col += 32) {
1129 API_REGISTER_STATE_CHECK(params_.func(
1130 src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1131 &sse1[k], &sse_tot_simd, &sum_tot_simd, &var1[k]));
1132 aom_get_var_sse_sum_16x16_dual_c(
1133 src_ + stride * row + col, stride, ref_ + stride * row + col, stride,
1134 &sse2[k], &sse_tot_c, &sum_tot_c, &var2[k]);
1135 k += 2;
1136 }
1137 }
1138 EXPECT_EQ(sse_tot_c, sse_tot_simd);
1139 EXPECT_EQ(sum_tot_c, sum_tot_simd);
1140
1141 for (int p = 0; p < 64; p++) {
1142 EXPECT_EQ(sse1[p], sse2[p]);
1143 EXPECT_EQ(var1[p], var2[p]);
1144 }
1145 }
1146
1147 template <typename GetSseSum16x16DualFuncType>
SseSum_SpeedTestDual()1148 void MainTestClass<GetSseSum16x16DualFuncType>::SseSum_SpeedTestDual() {
1149 const int loop_count = 1000000000 / block_size();
1150 for (int idx = 0; idx < block_size(); ++idx) {
1151 src_[idx] = rnd_.Rand8();
1152 ref_[idx] = rnd_.Rand8();
1153 }
1154
1155 unsigned int sse1[2] = { 0 };
1156 unsigned int sse2[2] = { 0 };
1157 unsigned int var1[2] = { 0 };
1158 unsigned int var2[2] = { 0 };
1159 unsigned int sse_tot_c = 0;
1160 unsigned int sse_tot_simd = 0;
1161 int sum_tot_c = 0;
1162 int sum_tot_simd = 0;
1163 const int stride = width();
1164
1165 aom_usec_timer timer;
1166 aom_usec_timer_start(&timer);
1167 for (int r = 0; r < loop_count; ++r) {
1168 for (int row = 0; row < height(); row += 16) {
1169 for (int col = 0; col < width(); col += 32) {
1170 aom_get_var_sse_sum_16x16_dual_c(src_ + stride * row + col, stride,
1171 ref_ + stride * row + col, stride,
1172 sse2, &sse_tot_c, &sum_tot_c, var2);
1173 }
1174 }
1175 }
1176 aom_usec_timer_mark(&timer);
1177 const double elapsed_time_ref =
1178 static_cast<double>(aom_usec_timer_elapsed(&timer));
1179
1180 aom_usec_timer_start(&timer);
1181 for (int r = 0; r < loop_count; ++r) {
1182 for (int row = 0; row < height(); row += 16) {
1183 for (int col = 0; col < width(); col += 32) {
1184 params_.func(src_ + stride * row + col, stride,
1185 ref_ + stride * row + col, stride, sse1, &sse_tot_simd,
1186 &sum_tot_simd, var1);
1187 }
1188 }
1189 }
1190 aom_usec_timer_mark(&timer);
1191 const double elapsed_time_simd =
1192 static_cast<double>(aom_usec_timer_elapsed(&timer));
1193
1194 printf(
1195 "aom_getvar_16x16_dual for block=%dx%d : ref_time=%lf \t simd_time=%lf "
1196 "\t "
1197 "gain=%lf \n",
1198 width(), height(), elapsed_time_ref, elapsed_time_simd,
1199 elapsed_time_ref / elapsed_time_simd);
1200 }
1201
1202 ////////////////////////////////////////////////////////////////////////////////
1203 // Tests related to MSE / SSE.
1204
1205 template <typename FunctionType>
RefTestMse()1206 void MainTestClass<FunctionType>::RefTestMse() {
1207 for (int i = 0; i < 10; ++i) {
1208 for (int j = 0; j < block_size(); ++j) {
1209 if (!use_high_bit_depth()) {
1210 src_[j] = rnd_.Rand8();
1211 ref_[j] = rnd_.Rand8();
1212 #if CONFIG_AV1_HIGHBITDEPTH
1213 } else {
1214 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1215 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1216 #endif // CONFIG_AV1_HIGHBITDEPTH
1217 }
1218 }
1219 unsigned int sse1, sse2;
1220 const int stride = width();
1221 API_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1));
1222 variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
1223 stride, &sse2, use_high_bit_depth(), params_.bit_depth);
1224 EXPECT_EQ(sse1, sse2);
1225 }
1226 }
1227
1228 template <typename FunctionType>
RefTestSse()1229 void MainTestClass<FunctionType>::RefTestSse() {
1230 for (int i = 0; i < 10; ++i) {
1231 for (int j = 0; j < block_size(); ++j) {
1232 src_[j] = rnd_.Rand8();
1233 ref_[j] = rnd_.Rand8();
1234 }
1235 unsigned int sse2;
1236 unsigned int var1;
1237 const int stride = width();
1238 API_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride));
1239 variance_ref(src_, ref_, params_.log2width, params_.log2height, stride,
1240 stride, &sse2, false, AOM_BITS_8);
1241 EXPECT_EQ(var1, sse2);
1242 }
1243 }
1244
1245 template <typename FunctionType>
MaxTestMse()1246 void MainTestClass<FunctionType>::MaxTestMse() {
1247 int max_value = (1 << params_.bit_depth) - 1;
1248 if (!use_high_bit_depth()) {
1249 memset(src_, max_value, block_size());
1250 memset(ref_, 0, block_size());
1251 #if CONFIG_AV1_HIGHBITDEPTH
1252 } else {
1253 aom_memset16(CONVERT_TO_SHORTPTR(src_), max_value, block_size());
1254 aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, block_size());
1255 #endif // CONFIG_AV1_HIGHBITDEPTH
1256 }
1257 unsigned int sse;
1258 API_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse));
1259 unsigned int expected = (unsigned int)block_size() * max_value * max_value;
1260 switch (params_.bit_depth) {
1261 case AOM_BITS_12: expected = ROUND_POWER_OF_TWO(expected, 8); break;
1262 case AOM_BITS_10: expected = ROUND_POWER_OF_TWO(expected, 4); break;
1263 case AOM_BITS_8:
1264 default: break;
1265 }
1266 EXPECT_EQ(expected, sse);
1267 }
1268
1269 template <typename FunctionType>
MaxTestSse()1270 void MainTestClass<FunctionType>::MaxTestSse() {
1271 memset(src_, 255, block_size());
1272 memset(ref_, 0, block_size());
1273 unsigned int var;
1274 API_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width()));
1275 const unsigned int expected = block_size() * 255 * 255;
1276 EXPECT_EQ(expected, var);
1277 }
1278
1279 ////////////////////////////////////////////////////////////////////////////////
1280
1281 using std::get;
1282 using std::make_tuple;
1283 using std::tuple;
1284
1285 template <typename FunctionType>
1286 class SubpelVarianceTest
1287 : public ::testing::TestWithParam<TestParams<FunctionType> > {
1288 public:
SetUp()1289 void SetUp() override {
1290 params_ = this->GetParam();
1291
1292 rnd_.Reset(ACMRandom::DeterministicSeed());
1293 if (!use_high_bit_depth()) {
1294 src_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
1295 sec_ = reinterpret_cast<uint8_t *>(aom_memalign(32, block_size()));
1296 ref_ = reinterpret_cast<uint8_t *>(
1297 aom_memalign(32, block_size() + width() + height() + 1));
1298 } else {
1299 src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
1300 aom_memalign(32, block_size() * sizeof(uint16_t))));
1301 sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(
1302 aom_memalign(32, block_size() * sizeof(uint16_t))));
1303 ref_ = CONVERT_TO_BYTEPTR(aom_memalign(
1304 32, (block_size() + width() + height() + 1) * sizeof(uint16_t)));
1305 }
1306 ASSERT_NE(src_, nullptr);
1307 ASSERT_NE(sec_, nullptr);
1308 ASSERT_NE(ref_, nullptr);
1309 }
1310
TearDown()1311 void TearDown() override {
1312 if (!use_high_bit_depth()) {
1313 aom_free(src_);
1314 aom_free(ref_);
1315 aom_free(sec_);
1316 } else {
1317 aom_free(CONVERT_TO_SHORTPTR(src_));
1318 aom_free(CONVERT_TO_SHORTPTR(ref_));
1319 aom_free(CONVERT_TO_SHORTPTR(sec_));
1320 }
1321 }
1322
1323 protected:
1324 void RefTest();
1325 void ExtremeRefTest();
1326 void SpeedTest();
1327
1328 ACMRandom rnd_;
1329 uint8_t *src_;
1330 uint8_t *ref_;
1331 uint8_t *sec_;
1332 TestParams<FunctionType> params_;
1333 DIST_WTD_COMP_PARAMS jcp_param_;
1334
1335 // some relay helpers
use_high_bit_depth() const1336 bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const1337 int byte_shift() const { return params_.bit_depth - 8; }
block_size() const1338 int block_size() const { return params_.block_size; }
width() const1339 int width() const { return params_.width; }
height() const1340 int height() const { return params_.height; }
mask() const1341 uint32_t mask() const { return params_.mask; }
1342 };
1343
1344 template <typename SubpelVarianceFunctionType>
RefTest()1345 void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
1346 for (int x = 0; x < 8; ++x) {
1347 for (int y = 0; y < 8; ++y) {
1348 if (!use_high_bit_depth()) {
1349 for (int j = 0; j < block_size(); j++) {
1350 src_[j] = rnd_.Rand8();
1351 }
1352 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1353 ref_[j] = rnd_.Rand8();
1354 }
1355 } else {
1356 for (int j = 0; j < block_size(); j++) {
1357 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1358 }
1359 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1360 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1361 }
1362 }
1363 unsigned int sse1, sse2;
1364 unsigned int var1;
1365 API_REGISTER_STATE_CHECK(
1366 var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
1367 const unsigned int var2 = subpel_variance_ref(
1368 ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
1369 use_high_bit_depth(), params_.bit_depth);
1370 EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1371 EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1372 }
1373 }
1374 }
1375
1376 template <typename SubpelVarianceFunctionType>
ExtremeRefTest()1377 void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
1378 // Compare against reference.
1379 // Src: Set the first half of values to 0, the second half to the maximum.
1380 // Ref: Set the first half of values to the maximum, the second half to 0.
1381 for (int x = 0; x < 8; ++x) {
1382 for (int y = 0; y < 8; ++y) {
1383 const int half = block_size() / 2;
1384 if (!use_high_bit_depth()) {
1385 memset(src_, 0, half);
1386 memset(src_ + half, 255, half);
1387 memset(ref_, 255, half);
1388 memset(ref_ + half, 0, half + width() + height() + 1);
1389 } else {
1390 aom_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half);
1391 aom_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
1392 aom_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
1393 aom_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(),
1394 half + width() + height() + 1);
1395 }
1396 unsigned int sse1, sse2;
1397 unsigned int var1;
1398 API_REGISTER_STATE_CHECK(
1399 var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1));
1400 const unsigned int var2 = subpel_variance_ref(
1401 ref_, src_, params_.log2width, params_.log2height, x, y, &sse2,
1402 use_high_bit_depth(), params_.bit_depth);
1403 EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1404 EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1405 }
1406 }
1407 }
1408
1409 template <typename SubpelVarianceFunctionType>
SpeedTest()1410 void SubpelVarianceTest<SubpelVarianceFunctionType>::SpeedTest() {
1411 if (!use_high_bit_depth()) {
1412 for (int j = 0; j < block_size(); j++) {
1413 src_[j] = rnd_.Rand8();
1414 }
1415 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1416 ref_[j] = rnd_.Rand8();
1417 }
1418 } else {
1419 for (int j = 0; j < block_size(); j++) {
1420 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1421 }
1422 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1423 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1424 }
1425 }
1426
1427 unsigned int sse1, sse2;
1428 int run_time = 1000000000 / block_size();
1429 aom_usec_timer timer;
1430
1431 aom_usec_timer_start(&timer);
1432 for (int i = 0; i < run_time; ++i) {
1433 int x = rnd_(8);
1434 int y = rnd_(8);
1435 params_.func(ref_, width() + 1, x, y, src_, width(), &sse1);
1436 }
1437 aom_usec_timer_mark(&timer);
1438
1439 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
1440
1441 aom_usec_timer timer_c;
1442
1443 aom_usec_timer_start(&timer_c);
1444 for (int i = 0; i < run_time; ++i) {
1445 int x = rnd_(8);
1446 int y = rnd_(8);
1447 subpel_variance_ref(ref_, src_, params_.log2width, params_.log2height, x, y,
1448 &sse2, use_high_bit_depth(), params_.bit_depth);
1449 }
1450 aom_usec_timer_mark(&timer_c);
1451
1452 const int elapsed_time_c = static_cast<int>(aom_usec_timer_elapsed(&timer_c));
1453
1454 printf(
1455 "sub_pixel_variance_%dx%d_%d: ref_time=%d us opt_time=%d us gain=%d \n",
1456 width(), height(), params_.bit_depth, elapsed_time_c, elapsed_time,
1457 elapsed_time_c / elapsed_time);
1458 }
1459
1460 template <>
RefTest()1461 void SubpelVarianceTest<SubpixAvgVarMxNFunc>::RefTest() {
1462 for (int x = 0; x < 8; ++x) {
1463 for (int y = 0; y < 8; ++y) {
1464 if (!use_high_bit_depth()) {
1465 for (int j = 0; j < block_size(); j++) {
1466 src_[j] = rnd_.Rand8();
1467 sec_[j] = rnd_.Rand8();
1468 }
1469 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1470 ref_[j] = rnd_.Rand8();
1471 }
1472 } else {
1473 for (int j = 0; j < block_size(); j++) {
1474 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1475 CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
1476 }
1477 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1478 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1479 }
1480 }
1481 uint32_t sse1, sse2;
1482 uint32_t var1, var2;
1483 API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y,
1484 src_, width(), &sse1, sec_));
1485 var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width,
1486 params_.log2height, x, y, &sse2,
1487 use_high_bit_depth(), params_.bit_depth);
1488 EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1489 EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1490 }
1491 }
1492 }
1493
1494 template <>
RefTest()1495 void SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>::RefTest() {
1496 for (int x = 0; x < 8; ++x) {
1497 for (int y = 0; y < 8; ++y) {
1498 if (!use_high_bit_depth()) {
1499 for (int j = 0; j < block_size(); j++) {
1500 src_[j] = rnd_.Rand8();
1501 sec_[j] = rnd_.Rand8();
1502 }
1503 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1504 ref_[j] = rnd_.Rand8();
1505 }
1506 } else {
1507 for (int j = 0; j < block_size(); j++) {
1508 CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask();
1509 CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask();
1510 }
1511 for (int j = 0; j < block_size() + width() + height() + 1; j++) {
1512 CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask();
1513 }
1514 }
1515 for (int x0 = 0; x0 < 2; ++x0) {
1516 for (int y0 = 0; y0 < 4; ++y0) {
1517 uint32_t sse1, sse2;
1518 uint32_t var1, var2;
1519 jcp_param_.fwd_offset = quant_dist_lookup_table[y0][x0];
1520 jcp_param_.bck_offset = quant_dist_lookup_table[y0][1 - x0];
1521 API_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 0, x, y,
1522 src_, width(), &sse1,
1523 sec_, &jcp_param_));
1524 var2 = dist_wtd_subpel_avg_variance_ref(
1525 ref_, src_, sec_, params_.log2width, params_.log2height, x, y,
1526 &sse2, use_high_bit_depth(), params_.bit_depth, &jcp_param_);
1527 EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
1528 EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
1529 }
1530 }
1531 }
1532 }
1533 }
1534
1535 ////////////////////////////////////////////////////////////////////////////////
1536
1537 #if !CONFIG_REALTIME_ONLY
1538
1539 static const int kMaskMax = 64;
1540
1541 typedef TestParams<ObmcSubpelVarFunc> ObmcSubpelVarianceParams;
1542
1543 template <typename FunctionType>
1544 class ObmcVarianceTest
1545 : public ::testing::TestWithParam<TestParams<FunctionType> > {
1546 public:
SetUp()1547 void SetUp() override {
1548 params_ = this->GetParam();
1549
1550 rnd_.Reset(ACMRandom::DeterministicSeed());
1551 if (!use_high_bit_depth()) {
1552 pre_ = reinterpret_cast<uint8_t *>(
1553 aom_memalign(32, block_size() + width() + height() + 1));
1554 } else {
1555 pre_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(aom_memalign(
1556 32, (block_size() + width() + height() + 1) * sizeof(uint16_t))));
1557 }
1558 wsrc_ = reinterpret_cast<int32_t *>(
1559 aom_memalign(32, block_size() * sizeof(uint32_t)));
1560 mask_ = reinterpret_cast<int32_t *>(
1561 aom_memalign(32, block_size() * sizeof(uint32_t)));
1562 ASSERT_NE(pre_, nullptr);
1563 ASSERT_NE(wsrc_, nullptr);
1564 ASSERT_NE(mask_, nullptr);
1565 }
1566
TearDown()1567 void TearDown() override {
1568 if (!use_high_bit_depth()) {
1569 aom_free(pre_);
1570 } else {
1571 aom_free(CONVERT_TO_SHORTPTR(pre_));
1572 }
1573 aom_free(wsrc_);
1574 aom_free(mask_);
1575 }
1576
1577 protected:
1578 void RefTest();
1579 void ExtremeRefTest();
1580 void SpeedTest();
1581
1582 ACMRandom rnd_;
1583 uint8_t *pre_;
1584 int32_t *wsrc_;
1585 int32_t *mask_;
1586 TestParams<FunctionType> params_;
1587
1588 // some relay helpers
use_high_bit_depth() const1589 bool use_high_bit_depth() const { return params_.use_high_bit_depth; }
byte_shift() const1590 int byte_shift() const { return params_.bit_depth - 8; }
block_size() const1591 int block_size() const { return params_.block_size; }
width() const1592 int width() const { return params_.width; }
height() const1593 int height() const { return params_.height; }
bd_mask() const1594 uint32_t bd_mask() const { return params_.mask; }
1595 };
1596
1597 template <>
RefTest()1598 void ObmcVarianceTest<ObmcSubpelVarFunc>::RefTest() {
1599 for (int x = 0; x < 8; ++x) {
1600 for (int y = 0; y < 8; ++y) {
1601 if (!use_high_bit_depth())
1602 for (int j = 0; j < block_size() + width() + height() + 1; j++)
1603 pre_[j] = rnd_.Rand8();
1604 else
1605 for (int j = 0; j < block_size() + width() + height() + 1; j++)
1606 CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
1607 for (int j = 0; j < block_size(); j++) {
1608 wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
1609 mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
1610 }
1611
1612 uint32_t sse1, sse2;
1613 uint32_t var1, var2;
1614 API_REGISTER_STATE_CHECK(
1615 var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
1616 var2 = obmc_subpel_variance_ref(
1617 pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
1618 &sse2, use_high_bit_depth(), params_.bit_depth);
1619 EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1620 EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1621 }
1622 }
1623 }
1624
1625 template <>
ExtremeRefTest()1626 void ObmcVarianceTest<ObmcSubpelVarFunc>::ExtremeRefTest() {
1627 // Pre: Set the first half of values to the maximum, the second half to 0.
1628 // Mask: same as above
1629 // WSrc: Set the first half of values to 0, the second half to the maximum.
1630 for (int x = 0; x < 8; ++x) {
1631 for (int y = 0; y < 8; ++y) {
1632 const int half = block_size() / 2;
1633 if (!use_high_bit_depth()) {
1634 memset(pre_, 255, half);
1635 memset(pre_ + half, 0, half + width() + height() + 1);
1636 } else {
1637 aom_memset16(CONVERT_TO_SHORTPTR(pre_), bd_mask(), half);
1638 aom_memset16(CONVERT_TO_SHORTPTR(pre_) + half, 0,
1639 half + width() + height() + 1);
1640 }
1641 for (int j = 0; j < half; j++) {
1642 wsrc_[j] = bd_mask() * kMaskMax * kMaskMax;
1643 mask_[j] = 0;
1644 }
1645 for (int j = half; j < block_size(); j++) {
1646 wsrc_[j] = 0;
1647 mask_[j] = kMaskMax * kMaskMax;
1648 }
1649
1650 uint32_t sse1, sse2;
1651 uint32_t var1, var2;
1652 API_REGISTER_STATE_CHECK(
1653 var1 = params_.func(pre_, width() + 1, x, y, wsrc_, mask_, &sse1));
1654 var2 = obmc_subpel_variance_ref(
1655 pre_, params_.log2width, params_.log2height, x, y, wsrc_, mask_,
1656 &sse2, use_high_bit_depth(), params_.bit_depth);
1657 EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
1658 EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
1659 }
1660 }
1661 }
1662
1663 template <>
SpeedTest()1664 void ObmcVarianceTest<ObmcSubpelVarFunc>::SpeedTest() {
1665 if (!use_high_bit_depth())
1666 for (int j = 0; j < block_size() + width() + height() + 1; j++)
1667 pre_[j] = rnd_.Rand8();
1668 else
1669 for (int j = 0; j < block_size() + width() + height() + 1; j++)
1670 CONVERT_TO_SHORTPTR(pre_)[j] = rnd_.Rand16() & bd_mask();
1671 for (int j = 0; j < block_size(); j++) {
1672 wsrc_[j] = (rnd_.Rand16() & bd_mask()) * rnd_(kMaskMax * kMaskMax + 1);
1673 mask_[j] = rnd_(kMaskMax * kMaskMax + 1);
1674 }
1675 unsigned int sse1;
1676 const int stride = width() + 1;
1677 int run_time = 1000000000 / block_size();
1678 aom_usec_timer timer;
1679
1680 aom_usec_timer_start(&timer);
1681 for (int i = 0; i < run_time; ++i) {
1682 int x = rnd_(8);
1683 int y = rnd_(8);
1684 API_REGISTER_STATE_CHECK(
1685 params_.func(pre_, stride, x, y, wsrc_, mask_, &sse1));
1686 }
1687 aom_usec_timer_mark(&timer);
1688
1689 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
1690 printf("obmc_sub_pixel_variance_%dx%d_%d: %d us\n", width(), height(),
1691 params_.bit_depth, elapsed_time);
1692 }
1693
1694 #endif // !CONFIG_REALTIME_ONLY
1695
1696 typedef MseWxHTestClass<MseWxH16bitFunc> MseWxHTest;
1697 typedef Mse16xHTestClass<Mse16xH16bitFunc> Mse16xHTest;
1698 typedef MainTestClass<VarianceMxNFunc> AvxMseTest;
1699 typedef MainTestClass<VarianceMxNFunc> AvxVarianceTest;
1700 typedef MainTestClass<GetSseSum8x8QuadFunc> GetSseSum8x8QuadTest;
1701 typedef MainTestClass<GetSseSum16x16DualFunc> GetSseSum16x16DualTest;
1702 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxSubpelVarianceTest;
1703 typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxSubpelAvgVarianceTest;
1704 typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
1705 AvxDistWtdSubpelAvgVarianceTest;
1706 #if !CONFIG_REALTIME_ONLY
1707 typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxObmcSubpelVarianceTest;
1708 #endif
1709 typedef TestParams<MseWxH16bitFunc> MseWxHParams;
1710 typedef TestParams<Mse16xH16bitFunc> Mse16xHParams;
1711
TEST_P(MseWxHTest,RefMse)1712 TEST_P(MseWxHTest, RefMse) { RefMatchTestMse(); }
TEST_P(MseWxHTest,DISABLED_SpeedMse)1713 TEST_P(MseWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(Mse16xHTest,RefMse)1714 TEST_P(Mse16xHTest, RefMse) { RefMatchTestMse(); }
TEST_P(Mse16xHTest,RefMseExtreme)1715 TEST_P(Mse16xHTest, RefMseExtreme) { RefMatchExtremeTestMse(); }
TEST_P(Mse16xHTest,DISABLED_SpeedMse)1716 TEST_P(Mse16xHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxMseTest,RefMse)1717 TEST_P(AvxMseTest, RefMse) { RefTestMse(); }
TEST_P(AvxMseTest,MaxMse)1718 TEST_P(AvxMseTest, MaxMse) { MaxTestMse(); }
TEST_P(AvxVarianceTest,Zero)1719 TEST_P(AvxVarianceTest, Zero) { ZeroTest(); }
TEST_P(AvxVarianceTest,Ref)1720 TEST_P(AvxVarianceTest, Ref) { RefTest(); }
TEST_P(AvxVarianceTest,RefStride)1721 TEST_P(AvxVarianceTest, RefStride) { RefStrideTest(); }
TEST_P(AvxVarianceTest,OneQuarter)1722 TEST_P(AvxVarianceTest, OneQuarter) { OneQuarterTest(); }
TEST_P(AvxVarianceTest,DISABLED_Speed)1723 TEST_P(AvxVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(GetSseSum8x8QuadTest,RefMseSum)1724 TEST_P(GetSseSum8x8QuadTest, RefMseSum) { RefTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,MinSseSum)1725 TEST_P(GetSseSum8x8QuadTest, MinSseSum) { MinTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,MaxMseSum)1726 TEST_P(GetSseSum8x8QuadTest, MaxMseSum) { MaxTestSseSum(); }
TEST_P(GetSseSum8x8QuadTest,DISABLED_Speed)1727 TEST_P(GetSseSum8x8QuadTest, DISABLED_Speed) { SseSum_SpeedTest(); }
TEST_P(GetSseSum16x16DualTest,RefMseSum)1728 TEST_P(GetSseSum16x16DualTest, RefMseSum) { RefTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,MinSseSum)1729 TEST_P(GetSseSum16x16DualTest, MinSseSum) { MinTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,MaxMseSum)1730 TEST_P(GetSseSum16x16DualTest, MaxMseSum) { MaxTestSseSumDual(); }
TEST_P(GetSseSum16x16DualTest,DISABLED_Speed)1731 TEST_P(GetSseSum16x16DualTest, DISABLED_Speed) { SseSum_SpeedTestDual(); }
TEST_P(SumOfSquaresTest,Const)1732 TEST_P(SumOfSquaresTest, Const) { ConstTest(); }
TEST_P(SumOfSquaresTest,Ref)1733 TEST_P(SumOfSquaresTest, Ref) { RefTest(); }
TEST_P(AvxSubpelVarianceTest,Ref)1734 TEST_P(AvxSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxSubpelVarianceTest,ExtremeRef)1735 TEST_P(AvxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxSubpelVarianceTest,DISABLED_Speed)1736 TEST_P(AvxSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxSubpelAvgVarianceTest,Ref)1737 TEST_P(AvxSubpelAvgVarianceTest, Ref) { RefTest(); }
TEST_P(AvxDistWtdSubpelAvgVarianceTest,Ref)1738 TEST_P(AvxDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
1739 #if !CONFIG_REALTIME_ONLY
TEST_P(AvxObmcSubpelVarianceTest,Ref)1740 TEST_P(AvxObmcSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxObmcSubpelVarianceTest,ExtremeRef)1741 TEST_P(AvxObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxObmcSubpelVarianceTest,DISABLED_Speed)1742 TEST_P(AvxObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
1743 #endif
1744
1745 INSTANTIATE_TEST_SUITE_P(
1746 C, MseWxHTest,
1747 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_c, 8),
1748 MseWxHParams(3, 2, &aom_mse_wxh_16bit_c, 8),
1749 MseWxHParams(2, 3, &aom_mse_wxh_16bit_c, 8),
1750 MseWxHParams(2, 2, &aom_mse_wxh_16bit_c, 8)));
1751
1752 INSTANTIATE_TEST_SUITE_P(
1753 C, Mse16xHTest,
1754 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_c, 8),
1755 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_c, 8),
1756 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_c, 8),
1757 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_c, 8)));
1758
1759 INSTANTIATE_TEST_SUITE_P(C, SumOfSquaresTest,
1760 ::testing::Values(aom_get_mb_ss_c));
1761
1762 typedef TestParams<VarianceMxNFunc> MseParams;
1763 INSTANTIATE_TEST_SUITE_P(C, AvxMseTest,
1764 ::testing::Values(MseParams(4, 4, &aom_mse16x16_c),
1765 MseParams(4, 3, &aom_mse16x8_c),
1766 MseParams(3, 4, &aom_mse8x16_c),
1767 MseParams(3, 3, &aom_mse8x8_c)));
1768
1769 typedef TestParams<VarianceMxNFunc> VarianceParams;
1770 const VarianceParams kArrayVariance_c[] = {
1771 VarianceParams(7, 7, &aom_variance128x128_c),
1772 VarianceParams(7, 6, &aom_variance128x64_c),
1773 VarianceParams(6, 7, &aom_variance64x128_c),
1774 VarianceParams(6, 6, &aom_variance64x64_c),
1775 VarianceParams(6, 5, &aom_variance64x32_c),
1776 VarianceParams(5, 6, &aom_variance32x64_c),
1777 VarianceParams(5, 5, &aom_variance32x32_c),
1778 VarianceParams(5, 4, &aom_variance32x16_c),
1779 VarianceParams(4, 5, &aom_variance16x32_c),
1780 VarianceParams(4, 4, &aom_variance16x16_c),
1781 VarianceParams(4, 3, &aom_variance16x8_c),
1782 VarianceParams(3, 4, &aom_variance8x16_c),
1783 VarianceParams(3, 3, &aom_variance8x8_c),
1784 VarianceParams(3, 2, &aom_variance8x4_c),
1785 VarianceParams(2, 3, &aom_variance4x8_c),
1786 VarianceParams(2, 2, &aom_variance4x4_c),
1787 #if !CONFIG_REALTIME_ONLY
1788 VarianceParams(6, 4, &aom_variance64x16_c),
1789 VarianceParams(4, 6, &aom_variance16x64_c),
1790 VarianceParams(5, 3, &aom_variance32x8_c),
1791 VarianceParams(3, 5, &aom_variance8x32_c),
1792 VarianceParams(4, 2, &aom_variance16x4_c),
1793 VarianceParams(2, 4, &aom_variance4x16_c),
1794 #endif
1795 };
1796 INSTANTIATE_TEST_SUITE_P(C, AvxVarianceTest,
1797 ::testing::ValuesIn(kArrayVariance_c));
1798
1799 typedef TestParams<GetSseSum8x8QuadFunc> GetSseSumParams;
1800 const GetSseSumParams kArrayGetSseSum8x8Quad_c[] = {
1801 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_c, 0),
1802 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_c, 0),
1803 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_c, 0),
1804 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_c, 0)
1805 };
1806 INSTANTIATE_TEST_SUITE_P(C, GetSseSum8x8QuadTest,
1807 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_c));
1808
1809 typedef TestParams<GetSseSum16x16DualFunc> GetSseSumParamsDual;
1810 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_c[] = {
1811 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_c, 0),
1812 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_c, 0),
1813 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_c, 0),
1814 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_c, 0)
1815 };
1816
1817 INSTANTIATE_TEST_SUITE_P(C, GetSseSum16x16DualTest,
1818 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_c));
1819
1820 typedef TestParams<SubpixVarMxNFunc> SubpelVarianceParams;
1821 const SubpelVarianceParams kArraySubpelVariance_c[] = {
1822 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_c, 0),
1823 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_c, 0),
1824 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_c, 0),
1825 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_c, 0),
1826 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_c, 0),
1827 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_c, 0),
1828 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_c, 0),
1829 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_c, 0),
1830 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_c, 0),
1831 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_c, 0),
1832 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_c, 0),
1833 SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_c, 0),
1834 SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_c, 0),
1835 SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_c, 0),
1836 SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_c, 0),
1837 SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_c, 0),
1838 #if !CONFIG_REALTIME_ONLY
1839 SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_c, 0),
1840 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_c, 0),
1841 SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_c, 0),
1842 SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_c, 0),
1843 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_c, 0),
1844 SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_c, 0),
1845 #endif
1846 };
1847 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelVarianceTest,
1848 ::testing::ValuesIn(kArraySubpelVariance_c));
1849
1850 typedef TestParams<SubpixAvgVarMxNFunc> SubpelAvgVarianceParams;
1851 const SubpelAvgVarianceParams kArraySubpelAvgVariance_c[] = {
1852 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_c, 0),
1853 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_c, 0),
1854 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_c, 0),
1855 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_c, 0),
1856 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_c, 0),
1857 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_c, 0),
1858 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_c, 0),
1859 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_c, 0),
1860 SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_c, 0),
1861 SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_c, 0),
1862 SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_c, 0),
1863 SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_c, 0),
1864 SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_c, 0),
1865 SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_c, 0),
1866 SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_c, 0),
1867 SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_c, 0),
1868 #if !CONFIG_REALTIME_ONLY
1869 SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_c, 0),
1870 SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_c, 0),
1871 SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_c, 0),
1872 SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_c, 0),
1873 SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_c, 0),
1874 SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_c, 0),
1875 #endif
1876 };
1877 INSTANTIATE_TEST_SUITE_P(C, AvxSubpelAvgVarianceTest,
1878 ::testing::ValuesIn(kArraySubpelAvgVariance_c));
1879
1880 typedef TestParams<DistWtdSubpixAvgVarMxNFunc> DistWtdSubpelAvgVarianceParams;
1881 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_c[] = {
1882 DistWtdSubpelAvgVarianceParams(
1883 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_c, 0),
1884 DistWtdSubpelAvgVarianceParams(
1885 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_c, 0),
1886 DistWtdSubpelAvgVarianceParams(
1887 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_c, 0),
1888 DistWtdSubpelAvgVarianceParams(
1889 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_c, 0),
1890 DistWtdSubpelAvgVarianceParams(
1891 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_c, 0),
1892 DistWtdSubpelAvgVarianceParams(
1893 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_c, 0),
1894 DistWtdSubpelAvgVarianceParams(
1895 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_c, 0),
1896 DistWtdSubpelAvgVarianceParams(4, 3,
1897 &aom_dist_wtd_sub_pixel_avg_variance16x8_c, 0),
1898 DistWtdSubpelAvgVarianceParams(3, 4,
1899 &aom_dist_wtd_sub_pixel_avg_variance8x16_c, 0),
1900 DistWtdSubpelAvgVarianceParams(3, 3,
1901 &aom_dist_wtd_sub_pixel_avg_variance8x8_c, 0),
1902 DistWtdSubpelAvgVarianceParams(3, 2,
1903 &aom_dist_wtd_sub_pixel_avg_variance8x4_c, 0),
1904 DistWtdSubpelAvgVarianceParams(2, 3,
1905 &aom_dist_wtd_sub_pixel_avg_variance4x8_c, 0),
1906 DistWtdSubpelAvgVarianceParams(2, 2,
1907 &aom_dist_wtd_sub_pixel_avg_variance4x4_c, 0),
1908 #if !CONFIG_REALTIME_ONLY
1909
1910 DistWtdSubpelAvgVarianceParams(
1911 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_c, 0),
1912 DistWtdSubpelAvgVarianceParams(
1913 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_c, 0),
1914 DistWtdSubpelAvgVarianceParams(5, 3,
1915 &aom_dist_wtd_sub_pixel_avg_variance32x8_c, 0),
1916 DistWtdSubpelAvgVarianceParams(3, 5,
1917 &aom_dist_wtd_sub_pixel_avg_variance8x32_c, 0),
1918 DistWtdSubpelAvgVarianceParams(4, 2,
1919 &aom_dist_wtd_sub_pixel_avg_variance16x4_c, 0),
1920 DistWtdSubpelAvgVarianceParams(2, 4,
1921 &aom_dist_wtd_sub_pixel_avg_variance4x16_c, 0),
1922 #endif
1923 };
1924 INSTANTIATE_TEST_SUITE_P(C, AvxDistWtdSubpelAvgVarianceTest,
1925 ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_c));
1926
1927 #if !CONFIG_REALTIME_ONLY
1928 INSTANTIATE_TEST_SUITE_P(
1929 C, AvxObmcSubpelVarianceTest,
1930 ::testing::Values(
1931 ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_c,
1932 0),
1933 ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_c, 0),
1934 ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_c, 0),
1935 ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_c, 0),
1936 ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_c, 0),
1937 ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_c, 0),
1938 ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_c, 0),
1939 ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_c, 0),
1940 ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_c, 0),
1941 ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_c, 0),
1942 ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_c, 0),
1943 ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_c, 0),
1944 ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_c, 0),
1945 ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_c, 0),
1946 ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_c, 0),
1947 ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_c, 0),
1948
1949 ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_c, 0),
1950 ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_c, 0),
1951 ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_c, 0),
1952 ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_c, 0),
1953 ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_c, 0),
1954 ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_c, 0)));
1955 #endif
1956
1957 #if CONFIG_AV1_HIGHBITDEPTH
1958 typedef uint64_t (*MseHBDWxH16bitFunc)(uint16_t *dst, int dstride,
1959 uint16_t *src, int sstride, int w,
1960 int h);
1961
1962 template <typename FunctionType>
1963 class MseHBDWxHTestClass
1964 : public ::testing::TestWithParam<TestParams<FunctionType> > {
1965 public:
SetUp()1966 void SetUp() override {
1967 params_ = this->GetParam();
1968
1969 rnd_.Reset(ACMRandom::DeterministicSeed());
1970 src_ = reinterpret_cast<uint16_t *>(
1971 aom_memalign(16, block_size() * sizeof(src_)));
1972 dst_ = reinterpret_cast<uint16_t *>(
1973 aom_memalign(16, block_size() * sizeof(dst_)));
1974 ASSERT_NE(src_, nullptr);
1975 ASSERT_NE(dst_, nullptr);
1976 }
1977
TearDown()1978 void TearDown() override {
1979 aom_free(src_);
1980 aom_free(dst_);
1981 src_ = nullptr;
1982 dst_ = nullptr;
1983 }
1984
1985 protected:
1986 void RefMatchTestMse();
1987 void SpeedTest();
1988
1989 protected:
1990 ACMRandom rnd_;
1991 uint16_t *dst_;
1992 uint16_t *src_;
1993 TestParams<FunctionType> params_;
1994
1995 // some relay helpers
block_size() const1996 int block_size() const { return params_.block_size; }
width() const1997 int width() const { return params_.width; }
d_stride() const1998 int d_stride() const { return params_.width; } // stride is same as width
s_stride() const1999 int s_stride() const { return params_.width; } // stride is same as width
height() const2000 int height() const { return params_.height; }
mask() const2001 int mask() const { return params_.mask; }
2002 };
2003
2004 template <typename MseHBDWxHFunctionType>
SpeedTest()2005 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::SpeedTest() {
2006 aom_usec_timer ref_timer, test_timer;
2007 double elapsed_time_c = 0;
2008 double elapsed_time_simd = 0;
2009 int run_time = 10000000;
2010 int w = width();
2011 int h = height();
2012 int dstride = d_stride();
2013 int sstride = s_stride();
2014 for (int k = 0; k < block_size(); ++k) {
2015 dst_[k] = rnd_.Rand16() & mask();
2016 src_[k] = rnd_.Rand16() & mask();
2017 }
2018 aom_usec_timer_start(&ref_timer);
2019 for (int i = 0; i < run_time; i++) {
2020 aom_mse_wxh_16bit_highbd_c(dst_, dstride, src_, sstride, w, h);
2021 }
2022 aom_usec_timer_mark(&ref_timer);
2023 elapsed_time_c = static_cast<double>(aom_usec_timer_elapsed(&ref_timer));
2024
2025 aom_usec_timer_start(&test_timer);
2026 for (int i = 0; i < run_time; i++) {
2027 params_.func(dst_, dstride, src_, sstride, w, h);
2028 }
2029 aom_usec_timer_mark(&test_timer);
2030 elapsed_time_simd = static_cast<double>(aom_usec_timer_elapsed(&test_timer));
2031
2032 printf("%dx%d\tc_time=%lf \t simd_time=%lf \t gain=%lf\n", width(), height(),
2033 elapsed_time_c, elapsed_time_simd,
2034 (elapsed_time_c / elapsed_time_simd));
2035 }
2036
2037 template <typename MseHBDWxHFunctionType>
RefMatchTestMse()2038 void MseHBDWxHTestClass<MseHBDWxHFunctionType>::RefMatchTestMse() {
2039 uint64_t mse_ref = 0;
2040 uint64_t mse_mod = 0;
2041 int w = width();
2042 int h = height();
2043 int dstride = d_stride();
2044 int sstride = s_stride();
2045 for (int i = 0; i < 10; i++) {
2046 for (int k = 0; k < block_size(); ++k) {
2047 dst_[k] = rnd_.Rand16() & mask();
2048 src_[k] = rnd_.Rand16() & mask();
2049 }
2050 API_REGISTER_STATE_CHECK(mse_ref = aom_mse_wxh_16bit_highbd_c(
2051 dst_, dstride, src_, sstride, w, h));
2052 API_REGISTER_STATE_CHECK(
2053 mse_mod = params_.func(dst_, dstride, src_, sstride, w, h));
2054 EXPECT_EQ(mse_ref, mse_mod)
2055 << "ref mse: " << mse_ref << " mod mse: " << mse_mod;
2056 }
2057 }
2058
2059 typedef TestParams<MseHBDWxH16bitFunc> MseHBDWxHParams;
2060 typedef MseHBDWxHTestClass<MseHBDWxH16bitFunc> MseHBDWxHTest;
2061 typedef MainTestClass<VarianceMxNFunc> AvxHBDMseTest;
2062 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDMseTest);
2063 typedef MainTestClass<VarianceMxNFunc> AvxHBDVarianceTest;
2064 typedef SubpelVarianceTest<SubpixVarMxNFunc> AvxHBDSubpelVarianceTest;
2065 typedef SubpelVarianceTest<SubpixAvgVarMxNFunc> AvxHBDSubpelAvgVarianceTest;
2066 typedef SubpelVarianceTest<DistWtdSubpixAvgVarMxNFunc>
2067 AvxHBDDistWtdSubpelAvgVarianceTest;
2068 #if !CONFIG_REALTIME_ONLY
2069 typedef ObmcVarianceTest<ObmcSubpelVarFunc> AvxHBDObmcSubpelVarianceTest;
2070 #endif
2071 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AvxHBDObmcSubpelVarianceTest);
2072
TEST_P(MseHBDWxHTest,RefMse)2073 TEST_P(MseHBDWxHTest, RefMse) { RefMatchTestMse(); }
TEST_P(MseHBDWxHTest,DISABLED_SpeedMse)2074 TEST_P(MseHBDWxHTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxHBDMseTest,RefMse)2075 TEST_P(AvxHBDMseTest, RefMse) { RefTestMse(); }
TEST_P(AvxHBDMseTest,MaxMse)2076 TEST_P(AvxHBDMseTest, MaxMse) { MaxTestMse(); }
TEST_P(AvxHBDMseTest,DISABLED_SpeedMse)2077 TEST_P(AvxHBDMseTest, DISABLED_SpeedMse) { SpeedTest(); }
TEST_P(AvxHBDVarianceTest,Zero)2078 TEST_P(AvxHBDVarianceTest, Zero) { ZeroTest(); }
TEST_P(AvxHBDVarianceTest,Ref)2079 TEST_P(AvxHBDVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDVarianceTest,RefStride)2080 TEST_P(AvxHBDVarianceTest, RefStride) { RefStrideTest(); }
TEST_P(AvxHBDVarianceTest,OneQuarter)2081 TEST_P(AvxHBDVarianceTest, OneQuarter) { OneQuarterTest(); }
TEST_P(AvxHBDVarianceTest,DISABLED_Speed)2082 TEST_P(AvxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxHBDSubpelVarianceTest,Ref)2083 TEST_P(AvxHBDSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDSubpelVarianceTest,ExtremeRef)2084 TEST_P(AvxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxHBDSubpelVarianceTest,DISABLED_Speed)2085 TEST_P(AvxHBDSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
TEST_P(AvxHBDSubpelAvgVarianceTest,Ref)2086 TEST_P(AvxHBDSubpelAvgVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest,Ref)2087 TEST_P(AvxHBDDistWtdSubpelAvgVarianceTest, Ref) { RefTest(); }
2088 #if !CONFIG_REALTIME_ONLY
TEST_P(AvxHBDObmcSubpelVarianceTest,Ref)2089 TEST_P(AvxHBDObmcSubpelVarianceTest, Ref) { RefTest(); }
TEST_P(AvxHBDObmcSubpelVarianceTest,ExtremeRef)2090 TEST_P(AvxHBDObmcSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
TEST_P(AvxHBDObmcSubpelVarianceTest,DISABLED_Speed)2091 TEST_P(AvxHBDObmcSubpelVarianceTest, DISABLED_Speed) { SpeedTest(); }
2092 #endif
2093
2094 INSTANTIATE_TEST_SUITE_P(
2095 C, MseHBDWxHTest,
2096 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_c, 10),
2097 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_c, 10),
2098 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_c, 10),
2099 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_c, 10)));
2100
2101 INSTANTIATE_TEST_SUITE_P(
2102 C, AvxHBDMseTest,
2103 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_c, 12),
2104 MseParams(4, 3, &aom_highbd_12_mse16x8_c, 12),
2105 MseParams(3, 4, &aom_highbd_12_mse8x16_c, 12),
2106 MseParams(3, 3, &aom_highbd_12_mse8x8_c, 12),
2107 MseParams(4, 4, &aom_highbd_10_mse16x16_c, 10),
2108 MseParams(4, 3, &aom_highbd_10_mse16x8_c, 10),
2109 MseParams(3, 4, &aom_highbd_10_mse8x16_c, 10),
2110 MseParams(3, 3, &aom_highbd_10_mse8x8_c, 10),
2111 MseParams(4, 4, &aom_highbd_8_mse16x16_c, 8),
2112 MseParams(4, 3, &aom_highbd_8_mse16x8_c, 8),
2113 MseParams(3, 4, &aom_highbd_8_mse8x16_c, 8),
2114 MseParams(3, 3, &aom_highbd_8_mse8x8_c, 8)));
2115
2116 #if HAVE_NEON
2117 INSTANTIATE_TEST_SUITE_P(
2118 NEON, MseHBDWxHTest,
2119 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
2120 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_neon, 10),
2121 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_neon, 10),
2122 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_neon,
2123 10)));
2124
2125 INSTANTIATE_TEST_SUITE_P(
2126 NEON, AvxHBDMseTest,
2127 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_neon, 12),
2128 MseParams(4, 3, &aom_highbd_12_mse16x8_neon, 12),
2129 MseParams(3, 4, &aom_highbd_12_mse8x16_neon, 12),
2130 MseParams(3, 3, &aom_highbd_12_mse8x8_neon, 12),
2131 MseParams(4, 4, &aom_highbd_10_mse16x16_neon, 10),
2132 MseParams(4, 3, &aom_highbd_10_mse16x8_neon, 10),
2133 MseParams(3, 4, &aom_highbd_10_mse8x16_neon, 10),
2134 MseParams(3, 3, &aom_highbd_10_mse8x8_neon, 10),
2135 MseParams(4, 4, &aom_highbd_8_mse16x16_neon, 8),
2136 MseParams(4, 3, &aom_highbd_8_mse16x8_neon, 8),
2137 MseParams(3, 4, &aom_highbd_8_mse8x16_neon, 8),
2138 MseParams(3, 3, &aom_highbd_8_mse8x8_neon, 8)));
2139 #endif // HAVE_NEON
2140
2141 #if HAVE_NEON_DOTPROD
2142 INSTANTIATE_TEST_SUITE_P(
2143 NEON_DOTPROD, AvxHBDMseTest,
2144 ::testing::Values(MseParams(4, 4, &aom_highbd_8_mse16x16_neon_dotprod, 8),
2145 MseParams(4, 3, &aom_highbd_8_mse16x8_neon_dotprod, 8),
2146 MseParams(3, 4, &aom_highbd_8_mse8x16_neon_dotprod, 8),
2147 MseParams(3, 3, &aom_highbd_8_mse8x8_neon_dotprod, 8)));
2148 #endif // HAVE_NEON_DOTPROD
2149
2150 #if HAVE_SVE
2151 INSTANTIATE_TEST_SUITE_P(
2152 SVE, MseHBDWxHTest,
2153 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
2154 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sve, 10),
2155 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sve, 10),
2156 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sve,
2157 10)));
2158
2159 INSTANTIATE_TEST_SUITE_P(
2160 SVE, AvxHBDMseTest,
2161 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sve, 12),
2162 MseParams(4, 3, &aom_highbd_12_mse16x8_sve, 12),
2163 MseParams(3, 4, &aom_highbd_12_mse8x16_sve, 12),
2164 MseParams(3, 3, &aom_highbd_12_mse8x8_sve, 12),
2165 MseParams(4, 4, &aom_highbd_10_mse16x16_sve, 10),
2166 MseParams(4, 3, &aom_highbd_10_mse16x8_sve, 10),
2167 MseParams(3, 4, &aom_highbd_10_mse8x16_sve, 10),
2168 MseParams(3, 3, &aom_highbd_10_mse8x8_sve, 10)));
2169 #endif // HAVE_SVE
2170
2171 const VarianceParams kArrayHBDVariance_c[] = {
2172 VarianceParams(7, 7, &aom_highbd_12_variance128x128_c, 12),
2173 VarianceParams(7, 6, &aom_highbd_12_variance128x64_c, 12),
2174 VarianceParams(6, 7, &aom_highbd_12_variance64x128_c, 12),
2175 VarianceParams(6, 6, &aom_highbd_12_variance64x64_c, 12),
2176 VarianceParams(6, 5, &aom_highbd_12_variance64x32_c, 12),
2177 VarianceParams(5, 6, &aom_highbd_12_variance32x64_c, 12),
2178 VarianceParams(5, 5, &aom_highbd_12_variance32x32_c, 12),
2179 VarianceParams(5, 4, &aom_highbd_12_variance32x16_c, 12),
2180 VarianceParams(4, 5, &aom_highbd_12_variance16x32_c, 12),
2181 VarianceParams(4, 4, &aom_highbd_12_variance16x16_c, 12),
2182 VarianceParams(4, 3, &aom_highbd_12_variance16x8_c, 12),
2183 VarianceParams(3, 4, &aom_highbd_12_variance8x16_c, 12),
2184 VarianceParams(3, 3, &aom_highbd_12_variance8x8_c, 12),
2185 VarianceParams(3, 2, &aom_highbd_12_variance8x4_c, 12),
2186 VarianceParams(2, 3, &aom_highbd_12_variance4x8_c, 12),
2187 VarianceParams(2, 2, &aom_highbd_12_variance4x4_c, 12),
2188 VarianceParams(7, 7, &aom_highbd_10_variance128x128_c, 10),
2189 VarianceParams(7, 6, &aom_highbd_10_variance128x64_c, 10),
2190 VarianceParams(6, 7, &aom_highbd_10_variance64x128_c, 10),
2191 VarianceParams(6, 6, &aom_highbd_10_variance64x64_c, 10),
2192 VarianceParams(6, 5, &aom_highbd_10_variance64x32_c, 10),
2193 VarianceParams(5, 6, &aom_highbd_10_variance32x64_c, 10),
2194 VarianceParams(5, 5, &aom_highbd_10_variance32x32_c, 10),
2195 VarianceParams(5, 4, &aom_highbd_10_variance32x16_c, 10),
2196 VarianceParams(4, 5, &aom_highbd_10_variance16x32_c, 10),
2197 VarianceParams(4, 4, &aom_highbd_10_variance16x16_c, 10),
2198 VarianceParams(4, 3, &aom_highbd_10_variance16x8_c, 10),
2199 VarianceParams(3, 4, &aom_highbd_10_variance8x16_c, 10),
2200 VarianceParams(3, 3, &aom_highbd_10_variance8x8_c, 10),
2201 VarianceParams(3, 2, &aom_highbd_10_variance8x4_c, 10),
2202 VarianceParams(2, 3, &aom_highbd_10_variance4x8_c, 10),
2203 VarianceParams(2, 2, &aom_highbd_10_variance4x4_c, 10),
2204 VarianceParams(7, 7, &aom_highbd_8_variance128x128_c, 8),
2205 VarianceParams(7, 6, &aom_highbd_8_variance128x64_c, 8),
2206 VarianceParams(6, 7, &aom_highbd_8_variance64x128_c, 8),
2207 VarianceParams(6, 6, &aom_highbd_8_variance64x64_c, 8),
2208 VarianceParams(6, 5, &aom_highbd_8_variance64x32_c, 8),
2209 VarianceParams(5, 6, &aom_highbd_8_variance32x64_c, 8),
2210 VarianceParams(5, 5, &aom_highbd_8_variance32x32_c, 8),
2211 VarianceParams(5, 4, &aom_highbd_8_variance32x16_c, 8),
2212 VarianceParams(4, 5, &aom_highbd_8_variance16x32_c, 8),
2213 VarianceParams(4, 4, &aom_highbd_8_variance16x16_c, 8),
2214 VarianceParams(4, 3, &aom_highbd_8_variance16x8_c, 8),
2215 VarianceParams(3, 4, &aom_highbd_8_variance8x16_c, 8),
2216 VarianceParams(3, 3, &aom_highbd_8_variance8x8_c, 8),
2217 VarianceParams(3, 2, &aom_highbd_8_variance8x4_c, 8),
2218 VarianceParams(2, 3, &aom_highbd_8_variance4x8_c, 8),
2219 VarianceParams(2, 2, &aom_highbd_8_variance4x4_c, 8),
2220 #if !CONFIG_REALTIME_ONLY
2221 VarianceParams(6, 4, &aom_highbd_12_variance64x16_c, 12),
2222 VarianceParams(4, 6, &aom_highbd_12_variance16x64_c, 12),
2223 VarianceParams(5, 3, &aom_highbd_12_variance32x8_c, 12),
2224 VarianceParams(3, 5, &aom_highbd_12_variance8x32_c, 12),
2225 VarianceParams(4, 2, &aom_highbd_12_variance16x4_c, 12),
2226 VarianceParams(2, 4, &aom_highbd_12_variance4x16_c, 12),
2227 VarianceParams(6, 4, &aom_highbd_10_variance64x16_c, 10),
2228 VarianceParams(4, 6, &aom_highbd_10_variance16x64_c, 10),
2229 VarianceParams(5, 3, &aom_highbd_10_variance32x8_c, 10),
2230 VarianceParams(3, 5, &aom_highbd_10_variance8x32_c, 10),
2231 VarianceParams(4, 2, &aom_highbd_10_variance16x4_c, 10),
2232 VarianceParams(2, 4, &aom_highbd_10_variance4x16_c, 10),
2233 VarianceParams(6, 4, &aom_highbd_8_variance64x16_c, 8),
2234 VarianceParams(4, 6, &aom_highbd_8_variance16x64_c, 8),
2235 VarianceParams(5, 3, &aom_highbd_8_variance32x8_c, 8),
2236 VarianceParams(3, 5, &aom_highbd_8_variance8x32_c, 8),
2237 VarianceParams(4, 2, &aom_highbd_8_variance16x4_c, 8),
2238 VarianceParams(2, 4, &aom_highbd_8_variance4x16_c, 8),
2239 #endif
2240 };
2241 INSTANTIATE_TEST_SUITE_P(C, AvxHBDVarianceTest,
2242 ::testing::ValuesIn(kArrayHBDVariance_c));
2243
2244 #if HAVE_SSE4_1
2245 INSTANTIATE_TEST_SUITE_P(
2246 SSE4_1, AvxHBDVarianceTest,
2247 ::testing::Values(
2248 VarianceParams(2, 2, &aom_highbd_8_variance4x4_sse4_1, 8),
2249 VarianceParams(2, 2, &aom_highbd_10_variance4x4_sse4_1, 10),
2250 VarianceParams(2, 2, &aom_highbd_12_variance4x4_sse4_1, 12)));
2251 #endif // HAVE_SSE4_1
2252
2253 const SubpelVarianceParams kArrayHBDSubpelVariance_c[] = {
2254 SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_c, 8),
2255 SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_c, 8),
2256 SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_c, 8),
2257 SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_c, 8),
2258 SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_c, 8),
2259 SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_c, 8),
2260 SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_c, 8),
2261 SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_c, 8),
2262 SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_c, 8),
2263 SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_c, 8),
2264 SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_c, 8),
2265 SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_c, 8),
2266 SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_c, 8),
2267 SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_c, 8),
2268 SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_c, 8),
2269 SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_c, 8),
2270 SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_c, 10),
2271 SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_c, 10),
2272 SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_c, 10),
2273 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_c, 10),
2274 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_c, 10),
2275 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_c, 10),
2276 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_c, 10),
2277 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_c, 10),
2278 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_c, 10),
2279 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_c, 10),
2280 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_c, 10),
2281 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_c, 10),
2282 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_c, 10),
2283 SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_c, 10),
2284 SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_c, 10),
2285 SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_c, 10),
2286 SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_c, 12),
2287 SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_c, 12),
2288 SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_c, 12),
2289 SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_c, 12),
2290 SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_c, 12),
2291 SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_c, 12),
2292 SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_c, 12),
2293 SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_c, 12),
2294 SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_c, 12),
2295 SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_c, 12),
2296 SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_c, 12),
2297 SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_c, 12),
2298 SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_c, 12),
2299 SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_c, 12),
2300 SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_c, 12),
2301 SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_c, 12),
2302 #if !CONFIG_REALTIME_ONLY
2303 SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_c, 8),
2304 SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_c, 8),
2305 SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_c, 8),
2306 SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_c, 8),
2307 SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_c, 8),
2308 SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_c, 8),
2309 SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_c, 10),
2310 SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_c, 10),
2311 SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_c, 10),
2312 SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_c, 10),
2313 SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_c, 10),
2314 SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_c, 10),
2315 SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_c, 12),
2316 SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_c, 12),
2317 SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_c, 12),
2318 SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_c, 12),
2319 SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_c, 12),
2320 SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_c, 12),
2321 #endif
2322 };
2323 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelVarianceTest,
2324 ::testing::ValuesIn(kArrayHBDSubpelVariance_c));
2325
2326 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_c[] = {
2327 SubpelAvgVarianceParams(7, 7, &aom_highbd_8_sub_pixel_avg_variance128x128_c,
2328 8),
2329 SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_c,
2330 8),
2331 SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_c,
2332 8),
2333 SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_c, 8),
2334 SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_c, 8),
2335 SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_c, 8),
2336 SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_c, 8),
2337 SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_c, 8),
2338 SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_c, 8),
2339 SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_c, 8),
2340 SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_c, 8),
2341 SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_c, 8),
2342 SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_c, 8),
2343 SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_c, 8),
2344 SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_c, 8),
2345 SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_c, 8),
2346 SubpelAvgVarianceParams(7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_c,
2347 10),
2348 SubpelAvgVarianceParams(7, 6, &aom_highbd_10_sub_pixel_avg_variance128x64_c,
2349 10),
2350 SubpelAvgVarianceParams(6, 7, &aom_highbd_10_sub_pixel_avg_variance64x128_c,
2351 10),
2352 SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_c,
2353 10),
2354 SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_c,
2355 10),
2356 SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_c,
2357 10),
2358 SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_c,
2359 10),
2360 SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_c,
2361 10),
2362 SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_c,
2363 10),
2364 SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_c,
2365 10),
2366 SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_c,
2367 10),
2368 SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_c,
2369 10),
2370 SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_c, 10),
2371 SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_c, 10),
2372 SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_c, 10),
2373 SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_c, 10),
2374 SubpelAvgVarianceParams(7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_c,
2375 12),
2376 SubpelAvgVarianceParams(7, 6, &aom_highbd_12_sub_pixel_avg_variance128x64_c,
2377 12),
2378 SubpelAvgVarianceParams(6, 7, &aom_highbd_12_sub_pixel_avg_variance64x128_c,
2379 12),
2380 SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_c,
2381 12),
2382 SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_c,
2383 12),
2384 SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_c,
2385 12),
2386 SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_c,
2387 12),
2388 SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_c,
2389 12),
2390 SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_c,
2391 12),
2392 SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_c,
2393 12),
2394 SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_c,
2395 12),
2396 SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_c,
2397 12),
2398 SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_c, 12),
2399 SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_c, 12),
2400 SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_c, 12),
2401 SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_c, 12),
2402
2403 #if !CONFIG_REALTIME_ONLY
2404 SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_c, 8),
2405 SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_c, 8),
2406 SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_c, 8),
2407 SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_c, 8),
2408 SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_c, 8),
2409 SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_c, 8),
2410 SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_c,
2411 10),
2412 SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_c,
2413 10),
2414 SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_c,
2415 10),
2416 SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_c,
2417 10),
2418 SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_c,
2419 10),
2420 SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_c,
2421 10),
2422 SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_c,
2423 12),
2424 SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_c,
2425 12),
2426 SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_c,
2427 12),
2428 SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_c,
2429 12),
2430 SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_c,
2431 12),
2432 SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_c,
2433 12),
2434 #endif
2435 };
2436 INSTANTIATE_TEST_SUITE_P(C, AvxHBDSubpelAvgVarianceTest,
2437 ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
2438
2439 const DistWtdSubpelAvgVarianceParams kArrayHBDDistWtdSubpelAvgVariance_c[] = {
2440 DistWtdSubpelAvgVarianceParams(
2441 7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_c, 8),
2442 DistWtdSubpelAvgVarianceParams(
2443 7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_c, 8),
2444 DistWtdSubpelAvgVarianceParams(
2445 6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_c, 8),
2446 DistWtdSubpelAvgVarianceParams(
2447 6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_c, 8),
2448 DistWtdSubpelAvgVarianceParams(
2449 6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_c, 8),
2450 DistWtdSubpelAvgVarianceParams(
2451 5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_c, 8),
2452 DistWtdSubpelAvgVarianceParams(
2453 5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_c, 8),
2454 DistWtdSubpelAvgVarianceParams(
2455 5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_c, 8),
2456 DistWtdSubpelAvgVarianceParams(
2457 4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_c, 8),
2458 DistWtdSubpelAvgVarianceParams(
2459 4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_c, 8),
2460 DistWtdSubpelAvgVarianceParams(
2461 4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_c, 8),
2462 DistWtdSubpelAvgVarianceParams(
2463 3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_c, 8),
2464 DistWtdSubpelAvgVarianceParams(
2465 3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_c, 8),
2466 DistWtdSubpelAvgVarianceParams(
2467 3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_c, 8),
2468 DistWtdSubpelAvgVarianceParams(
2469 2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_c, 8),
2470 DistWtdSubpelAvgVarianceParams(
2471 2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_c, 8),
2472 DistWtdSubpelAvgVarianceParams(
2473 7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_c, 10),
2474 DistWtdSubpelAvgVarianceParams(
2475 7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_c, 10),
2476 DistWtdSubpelAvgVarianceParams(
2477 6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_c, 10),
2478 DistWtdSubpelAvgVarianceParams(
2479 6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_c, 10),
2480 DistWtdSubpelAvgVarianceParams(
2481 6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_c, 10),
2482 DistWtdSubpelAvgVarianceParams(
2483 5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_c, 10),
2484 DistWtdSubpelAvgVarianceParams(
2485 5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_c, 10),
2486 DistWtdSubpelAvgVarianceParams(
2487 5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_c, 10),
2488 DistWtdSubpelAvgVarianceParams(
2489 4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_c, 10),
2490 DistWtdSubpelAvgVarianceParams(
2491 4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_c, 10),
2492 DistWtdSubpelAvgVarianceParams(
2493 4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_c, 10),
2494 DistWtdSubpelAvgVarianceParams(
2495 3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_c, 10),
2496 DistWtdSubpelAvgVarianceParams(
2497 3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_c, 10),
2498 DistWtdSubpelAvgVarianceParams(
2499 3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_c, 10),
2500 DistWtdSubpelAvgVarianceParams(
2501 2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_c, 10),
2502 DistWtdSubpelAvgVarianceParams(
2503 2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_c, 10),
2504 DistWtdSubpelAvgVarianceParams(
2505 7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_c, 12),
2506 DistWtdSubpelAvgVarianceParams(
2507 7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_c, 12),
2508 DistWtdSubpelAvgVarianceParams(
2509 6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_c, 12),
2510 DistWtdSubpelAvgVarianceParams(
2511 6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_c, 12),
2512 DistWtdSubpelAvgVarianceParams(
2513 6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_c, 12),
2514 DistWtdSubpelAvgVarianceParams(
2515 5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_c, 12),
2516 DistWtdSubpelAvgVarianceParams(
2517 5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_c, 12),
2518 DistWtdSubpelAvgVarianceParams(
2519 5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_c, 12),
2520 DistWtdSubpelAvgVarianceParams(
2521 4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_c, 12),
2522 DistWtdSubpelAvgVarianceParams(
2523 4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_c, 12),
2524 DistWtdSubpelAvgVarianceParams(
2525 4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_c, 12),
2526 DistWtdSubpelAvgVarianceParams(
2527 3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_c, 12),
2528 DistWtdSubpelAvgVarianceParams(
2529 3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_c, 12),
2530 DistWtdSubpelAvgVarianceParams(
2531 3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_c, 12),
2532 DistWtdSubpelAvgVarianceParams(
2533 2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_c, 12),
2534 DistWtdSubpelAvgVarianceParams(
2535 2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_c, 12),
2536
2537 #if !CONFIG_REALTIME_ONLY
2538 DistWtdSubpelAvgVarianceParams(
2539 6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_c, 8),
2540 DistWtdSubpelAvgVarianceParams(
2541 4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_c, 8),
2542 DistWtdSubpelAvgVarianceParams(
2543 5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_c, 8),
2544 DistWtdSubpelAvgVarianceParams(
2545 3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_c, 8),
2546 DistWtdSubpelAvgVarianceParams(
2547 4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_c, 8),
2548 DistWtdSubpelAvgVarianceParams(
2549 2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_c, 8),
2550 DistWtdSubpelAvgVarianceParams(
2551 6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_c, 10),
2552 DistWtdSubpelAvgVarianceParams(
2553 4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_c, 10),
2554 DistWtdSubpelAvgVarianceParams(
2555 5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_c, 10),
2556 DistWtdSubpelAvgVarianceParams(
2557 3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_c, 10),
2558 DistWtdSubpelAvgVarianceParams(
2559 4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_c, 10),
2560 DistWtdSubpelAvgVarianceParams(
2561 2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_c, 10),
2562 DistWtdSubpelAvgVarianceParams(
2563 6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_c, 12),
2564 DistWtdSubpelAvgVarianceParams(
2565 4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_c, 12),
2566 DistWtdSubpelAvgVarianceParams(
2567 5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_c, 12),
2568 DistWtdSubpelAvgVarianceParams(
2569 3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_c, 12),
2570 DistWtdSubpelAvgVarianceParams(
2571 4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_c, 12),
2572 DistWtdSubpelAvgVarianceParams(
2573 2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_c, 12),
2574 #endif
2575 };
2576 INSTANTIATE_TEST_SUITE_P(
2577 C, AvxHBDDistWtdSubpelAvgVarianceTest,
2578 ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_c));
2579
2580 #if !CONFIG_REALTIME_ONLY
2581 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_c[] = {
2582 ObmcSubpelVarianceParams(7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_c,
2583 8),
2584 ObmcSubpelVarianceParams(7, 6, &aom_highbd_8_obmc_sub_pixel_variance128x64_c,
2585 8),
2586 ObmcSubpelVarianceParams(6, 7, &aom_highbd_8_obmc_sub_pixel_variance64x128_c,
2587 8),
2588 ObmcSubpelVarianceParams(6, 6, &aom_highbd_8_obmc_sub_pixel_variance64x64_c,
2589 8),
2590 ObmcSubpelVarianceParams(6, 5, &aom_highbd_8_obmc_sub_pixel_variance64x32_c,
2591 8),
2592 ObmcSubpelVarianceParams(5, 6, &aom_highbd_8_obmc_sub_pixel_variance32x64_c,
2593 8),
2594 ObmcSubpelVarianceParams(5, 5, &aom_highbd_8_obmc_sub_pixel_variance32x32_c,
2595 8),
2596 ObmcSubpelVarianceParams(5, 4, &aom_highbd_8_obmc_sub_pixel_variance32x16_c,
2597 8),
2598 ObmcSubpelVarianceParams(4, 5, &aom_highbd_8_obmc_sub_pixel_variance16x32_c,
2599 8),
2600 ObmcSubpelVarianceParams(4, 4, &aom_highbd_8_obmc_sub_pixel_variance16x16_c,
2601 8),
2602 ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_c,
2603 8),
2604 ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_c,
2605 8),
2606 ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_c, 8),
2607 ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_c, 8),
2608 ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_c, 8),
2609 ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_c, 8),
2610 ObmcSubpelVarianceParams(7, 7,
2611 &aom_highbd_10_obmc_sub_pixel_variance128x128_c, 10),
2612 ObmcSubpelVarianceParams(7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_c,
2613 10),
2614 ObmcSubpelVarianceParams(6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_c,
2615 10),
2616 ObmcSubpelVarianceParams(6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_c,
2617 10),
2618 ObmcSubpelVarianceParams(6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_c,
2619 10),
2620 ObmcSubpelVarianceParams(5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_c,
2621 10),
2622 ObmcSubpelVarianceParams(5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_c,
2623 10),
2624 ObmcSubpelVarianceParams(5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_c,
2625 10),
2626 ObmcSubpelVarianceParams(4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_c,
2627 10),
2628 ObmcSubpelVarianceParams(4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_c,
2629 10),
2630 ObmcSubpelVarianceParams(4, 3, &aom_highbd_10_obmc_sub_pixel_variance16x8_c,
2631 10),
2632 ObmcSubpelVarianceParams(3, 4, &aom_highbd_10_obmc_sub_pixel_variance8x16_c,
2633 10),
2634 ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_c,
2635 10),
2636 ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_c,
2637 10),
2638 ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_c,
2639 10),
2640 ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_c,
2641 10),
2642 ObmcSubpelVarianceParams(7, 7,
2643 &aom_highbd_12_obmc_sub_pixel_variance128x128_c, 12),
2644 ObmcSubpelVarianceParams(7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_c,
2645 12),
2646 ObmcSubpelVarianceParams(6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_c,
2647 12),
2648 ObmcSubpelVarianceParams(6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_c,
2649 12),
2650 ObmcSubpelVarianceParams(6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_c,
2651 12),
2652 ObmcSubpelVarianceParams(5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_c,
2653 12),
2654 ObmcSubpelVarianceParams(5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_c,
2655 12),
2656 ObmcSubpelVarianceParams(5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_c,
2657 12),
2658 ObmcSubpelVarianceParams(4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_c,
2659 12),
2660 ObmcSubpelVarianceParams(4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_c,
2661 12),
2662 ObmcSubpelVarianceParams(4, 3, &aom_highbd_12_obmc_sub_pixel_variance16x8_c,
2663 12),
2664 ObmcSubpelVarianceParams(3, 4, &aom_highbd_12_obmc_sub_pixel_variance8x16_c,
2665 12),
2666 ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_c,
2667 12),
2668 ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_c,
2669 12),
2670 ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_c,
2671 12),
2672 ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_c,
2673 12),
2674
2675 ObmcSubpelVarianceParams(6, 4, &aom_highbd_8_obmc_sub_pixel_variance64x16_c,
2676 8),
2677 ObmcSubpelVarianceParams(4, 6, &aom_highbd_8_obmc_sub_pixel_variance16x64_c,
2678 8),
2679 ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_c,
2680 8),
2681 ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_c,
2682 8),
2683 ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_c,
2684 8),
2685 ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_c,
2686 8),
2687 ObmcSubpelVarianceParams(6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_c,
2688 10),
2689 ObmcSubpelVarianceParams(4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_c,
2690 10),
2691 ObmcSubpelVarianceParams(5, 3, &aom_highbd_10_obmc_sub_pixel_variance32x8_c,
2692 10),
2693 ObmcSubpelVarianceParams(3, 5, &aom_highbd_10_obmc_sub_pixel_variance8x32_c,
2694 10),
2695 ObmcSubpelVarianceParams(4, 2, &aom_highbd_10_obmc_sub_pixel_variance16x4_c,
2696 10),
2697 ObmcSubpelVarianceParams(2, 4, &aom_highbd_10_obmc_sub_pixel_variance4x16_c,
2698 10),
2699 ObmcSubpelVarianceParams(6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_c,
2700 12),
2701 ObmcSubpelVarianceParams(4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_c,
2702 12),
2703 ObmcSubpelVarianceParams(5, 3, &aom_highbd_12_obmc_sub_pixel_variance32x8_c,
2704 12),
2705 ObmcSubpelVarianceParams(3, 5, &aom_highbd_12_obmc_sub_pixel_variance8x32_c,
2706 12),
2707 ObmcSubpelVarianceParams(4, 2, &aom_highbd_12_obmc_sub_pixel_variance16x4_c,
2708 12),
2709 ObmcSubpelVarianceParams(2, 4, &aom_highbd_12_obmc_sub_pixel_variance4x16_c,
2710 12),
2711 };
2712 INSTANTIATE_TEST_SUITE_P(C, AvxHBDObmcSubpelVarianceTest,
2713 ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_c));
2714 #endif // !CONFIG_REALTIME_ONLY
2715 #endif // CONFIG_AV1_HIGHBITDEPTH
2716
2717 #if HAVE_SSE2
2718 INSTANTIATE_TEST_SUITE_P(
2719 SSE2, MseWxHTest,
2720 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_sse2, 8),
2721 MseWxHParams(3, 2, &aom_mse_wxh_16bit_sse2, 8),
2722 MseWxHParams(2, 3, &aom_mse_wxh_16bit_sse2, 8),
2723 MseWxHParams(2, 2, &aom_mse_wxh_16bit_sse2, 8)));
2724
2725 INSTANTIATE_TEST_SUITE_P(
2726 SSE2, Mse16xHTest,
2727 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_sse2, 8),
2728 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_sse2, 8),
2729 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_sse2, 8),
2730 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_sse2, 8)));
2731
2732 INSTANTIATE_TEST_SUITE_P(SSE2, SumOfSquaresTest,
2733 ::testing::Values(aom_get_mb_ss_sse2));
2734
2735 INSTANTIATE_TEST_SUITE_P(SSE2, AvxMseTest,
2736 ::testing::Values(MseParams(4, 4, &aom_mse16x16_sse2),
2737 MseParams(4, 3, &aom_mse16x8_sse2),
2738 MseParams(3, 4, &aom_mse8x16_sse2),
2739 MseParams(3, 3, &aom_mse8x8_sse2)));
2740
2741 const VarianceParams kArrayVariance_sse2[] = {
2742 VarianceParams(7, 7, &aom_variance128x128_sse2),
2743 VarianceParams(7, 6, &aom_variance128x64_sse2),
2744 VarianceParams(6, 7, &aom_variance64x128_sse2),
2745 VarianceParams(6, 6, &aom_variance64x64_sse2),
2746 VarianceParams(6, 5, &aom_variance64x32_sse2),
2747 VarianceParams(5, 6, &aom_variance32x64_sse2),
2748 VarianceParams(5, 5, &aom_variance32x32_sse2),
2749 VarianceParams(5, 4, &aom_variance32x16_sse2),
2750 VarianceParams(4, 5, &aom_variance16x32_sse2),
2751 VarianceParams(4, 4, &aom_variance16x16_sse2),
2752 VarianceParams(4, 3, &aom_variance16x8_sse2),
2753 VarianceParams(3, 4, &aom_variance8x16_sse2),
2754 VarianceParams(3, 3, &aom_variance8x8_sse2),
2755 VarianceParams(3, 2, &aom_variance8x4_sse2),
2756 VarianceParams(2, 3, &aom_variance4x8_sse2),
2757 VarianceParams(2, 2, &aom_variance4x4_sse2),
2758 #if !CONFIG_REALTIME_ONLY
2759 VarianceParams(6, 4, &aom_variance64x16_sse2),
2760 VarianceParams(5, 3, &aom_variance32x8_sse2),
2761 VarianceParams(4, 6, &aom_variance16x64_sse2),
2762 VarianceParams(4, 2, &aom_variance16x4_sse2),
2763 VarianceParams(3, 5, &aom_variance8x32_sse2),
2764 VarianceParams(2, 4, &aom_variance4x16_sse2),
2765 #endif
2766 };
2767 INSTANTIATE_TEST_SUITE_P(SSE2, AvxVarianceTest,
2768 ::testing::ValuesIn(kArrayVariance_sse2));
2769
2770 const GetSseSumParams kArrayGetSseSum8x8Quad_sse2[] = {
2771 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2772 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2773 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_sse2, 0),
2774 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_sse2, 0)
2775 };
2776 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum8x8QuadTest,
2777 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_sse2));
2778
2779 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_sse2[] = {
2780 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2781 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2782 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_sse2, 0),
2783 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_sse2, 0)
2784 };
2785 INSTANTIATE_TEST_SUITE_P(SSE2, GetSseSum16x16DualTest,
2786 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_sse2));
2787
2788 #if CONFIG_AV1_HIGHBITDEPTH
2789 #if HAVE_SSE2
2790 INSTANTIATE_TEST_SUITE_P(
2791 SSE2, MseHBDWxHTest,
2792 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
2793 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_sse2, 10),
2794 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_sse2, 10),
2795 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_sse2,
2796 10)));
2797 #endif // HAVE_SSE2
2798 #if HAVE_SSE4_1
2799 INSTANTIATE_TEST_SUITE_P(
2800 SSE4_1, AvxSubpelVarianceTest,
2801 ::testing::Values(
2802 SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_sse4_1,
2803 8),
2804 SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_sse4_1,
2805 10),
2806 SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_sse4_1,
2807 12)));
2808
2809 INSTANTIATE_TEST_SUITE_P(
2810 SSE4_1, AvxSubpelAvgVarianceTest,
2811 ::testing::Values(
2812 SubpelAvgVarianceParams(2, 2,
2813 &aom_highbd_8_sub_pixel_avg_variance4x4_sse4_1,
2814 8),
2815 SubpelAvgVarianceParams(2, 2,
2816 &aom_highbd_10_sub_pixel_avg_variance4x4_sse4_1,
2817 10),
2818 SubpelAvgVarianceParams(2, 2,
2819 &aom_highbd_12_sub_pixel_avg_variance4x4_sse4_1,
2820 12)));
2821 #endif // HAVE_SSE4_1
2822
2823 INSTANTIATE_TEST_SUITE_P(
2824 SSE2, AvxHBDMseTest,
2825 ::testing::Values(MseParams(4, 4, &aom_highbd_12_mse16x16_sse2, 12),
2826 MseParams(3, 3, &aom_highbd_12_mse8x8_sse2, 12),
2827 MseParams(4, 4, &aom_highbd_10_mse16x16_sse2, 10),
2828 MseParams(3, 3, &aom_highbd_10_mse8x8_sse2, 10),
2829 MseParams(4, 4, &aom_highbd_8_mse16x16_sse2, 8),
2830 MseParams(3, 3, &aom_highbd_8_mse8x8_sse2, 8)));
2831
2832 const VarianceParams kArrayHBDVariance_sse2[] = {
2833 VarianceParams(7, 7, &aom_highbd_12_variance128x128_sse2, 12),
2834 VarianceParams(7, 6, &aom_highbd_12_variance128x64_sse2, 12),
2835 VarianceParams(6, 7, &aom_highbd_12_variance64x128_sse2, 12),
2836 VarianceParams(6, 6, &aom_highbd_12_variance64x64_sse2, 12),
2837 VarianceParams(6, 5, &aom_highbd_12_variance64x32_sse2, 12),
2838 VarianceParams(5, 6, &aom_highbd_12_variance32x64_sse2, 12),
2839 VarianceParams(5, 5, &aom_highbd_12_variance32x32_sse2, 12),
2840 VarianceParams(5, 4, &aom_highbd_12_variance32x16_sse2, 12),
2841 VarianceParams(4, 5, &aom_highbd_12_variance16x32_sse2, 12),
2842 VarianceParams(4, 4, &aom_highbd_12_variance16x16_sse2, 12),
2843 VarianceParams(4, 3, &aom_highbd_12_variance16x8_sse2, 12),
2844 VarianceParams(3, 4, &aom_highbd_12_variance8x16_sse2, 12),
2845 VarianceParams(3, 3, &aom_highbd_12_variance8x8_sse2, 12),
2846 VarianceParams(7, 7, &aom_highbd_10_variance128x128_sse2, 10),
2847 VarianceParams(7, 6, &aom_highbd_10_variance128x64_sse2, 10),
2848 VarianceParams(6, 7, &aom_highbd_10_variance64x128_sse2, 10),
2849 VarianceParams(6, 6, &aom_highbd_10_variance64x64_sse2, 10),
2850 VarianceParams(6, 5, &aom_highbd_10_variance64x32_sse2, 10),
2851 VarianceParams(5, 6, &aom_highbd_10_variance32x64_sse2, 10),
2852 VarianceParams(5, 5, &aom_highbd_10_variance32x32_sse2, 10),
2853 VarianceParams(5, 4, &aom_highbd_10_variance32x16_sse2, 10),
2854 VarianceParams(4, 5, &aom_highbd_10_variance16x32_sse2, 10),
2855 VarianceParams(4, 4, &aom_highbd_10_variance16x16_sse2, 10),
2856 VarianceParams(4, 3, &aom_highbd_10_variance16x8_sse2, 10),
2857 VarianceParams(3, 4, &aom_highbd_10_variance8x16_sse2, 10),
2858 VarianceParams(3, 3, &aom_highbd_10_variance8x8_sse2, 10),
2859 VarianceParams(7, 7, &aom_highbd_8_variance128x128_sse2, 8),
2860 VarianceParams(7, 6, &aom_highbd_8_variance128x64_sse2, 8),
2861 VarianceParams(6, 7, &aom_highbd_8_variance64x128_sse2, 8),
2862 VarianceParams(6, 6, &aom_highbd_8_variance64x64_sse2, 8),
2863 VarianceParams(6, 5, &aom_highbd_8_variance64x32_sse2, 8),
2864 VarianceParams(5, 6, &aom_highbd_8_variance32x64_sse2, 8),
2865 VarianceParams(5, 5, &aom_highbd_8_variance32x32_sse2, 8),
2866 VarianceParams(5, 4, &aom_highbd_8_variance32x16_sse2, 8),
2867 VarianceParams(4, 5, &aom_highbd_8_variance16x32_sse2, 8),
2868 VarianceParams(4, 4, &aom_highbd_8_variance16x16_sse2, 8),
2869 VarianceParams(4, 3, &aom_highbd_8_variance16x8_sse2, 8),
2870 VarianceParams(3, 4, &aom_highbd_8_variance8x16_sse2, 8),
2871 VarianceParams(3, 3, &aom_highbd_8_variance8x8_sse2, 8),
2872 #if !CONFIG_REALTIME_ONLY
2873 VarianceParams(6, 4, &aom_highbd_12_variance64x16_sse2, 12),
2874 VarianceParams(4, 6, &aom_highbd_12_variance16x64_sse2, 12),
2875 VarianceParams(5, 3, &aom_highbd_12_variance32x8_sse2, 12),
2876 VarianceParams(3, 5, &aom_highbd_12_variance8x32_sse2, 12),
2877 // VarianceParams(4, 2, &aom_highbd_12_variance16x4_sse2, 12),
2878 // VarianceParams(2, 4, &aom_highbd_12_variance4x16_sse2, 12),
2879 VarianceParams(6, 4, &aom_highbd_10_variance64x16_sse2, 10),
2880 VarianceParams(4, 6, &aom_highbd_10_variance16x64_sse2, 10),
2881 VarianceParams(5, 3, &aom_highbd_10_variance32x8_sse2, 10),
2882 VarianceParams(3, 5, &aom_highbd_10_variance8x32_sse2, 10),
2883 // VarianceParams(4, 2, &aom_highbd_10_variance16x4_sse2, 10),
2884 // VarianceParams(2, 4, &aom_highbd_10_variance4x16_sse2, 10),
2885 VarianceParams(6, 4, &aom_highbd_8_variance64x16_sse2, 8),
2886 VarianceParams(4, 6, &aom_highbd_8_variance16x64_sse2, 8),
2887 VarianceParams(5, 3, &aom_highbd_8_variance32x8_sse2, 8),
2888 VarianceParams(3, 5, &aom_highbd_8_variance8x32_sse2, 8),
2889 // VarianceParams(4, 2, &aom_highbd_8_variance16x4_sse2, 8),
2890 // VarianceParams(2, 4, &aom_highbd_8_variance4x16_sse2, 8),
2891 #endif
2892 };
2893 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDVarianceTest,
2894 ::testing::ValuesIn(kArrayHBDVariance_sse2));
2895
2896 #if HAVE_AVX2
2897
2898 INSTANTIATE_TEST_SUITE_P(
2899 AVX2, MseHBDWxHTest,
2900 ::testing::Values(MseHBDWxHParams(3, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
2901 MseHBDWxHParams(3, 2, &aom_mse_wxh_16bit_highbd_avx2, 10),
2902 MseHBDWxHParams(2, 3, &aom_mse_wxh_16bit_highbd_avx2, 10),
2903 MseHBDWxHParams(2, 2, &aom_mse_wxh_16bit_highbd_avx2,
2904 10)));
2905
2906 const VarianceParams kArrayHBDVariance_avx2[] = {
2907 VarianceParams(7, 7, &aom_highbd_10_variance128x128_avx2, 10),
2908 VarianceParams(7, 6, &aom_highbd_10_variance128x64_avx2, 10),
2909 VarianceParams(6, 7, &aom_highbd_10_variance64x128_avx2, 10),
2910 VarianceParams(6, 6, &aom_highbd_10_variance64x64_avx2, 10),
2911 VarianceParams(6, 5, &aom_highbd_10_variance64x32_avx2, 10),
2912 VarianceParams(5, 6, &aom_highbd_10_variance32x64_avx2, 10),
2913 VarianceParams(5, 5, &aom_highbd_10_variance32x32_avx2, 10),
2914 VarianceParams(5, 4, &aom_highbd_10_variance32x16_avx2, 10),
2915 VarianceParams(4, 5, &aom_highbd_10_variance16x32_avx2, 10),
2916 VarianceParams(4, 4, &aom_highbd_10_variance16x16_avx2, 10),
2917 VarianceParams(4, 3, &aom_highbd_10_variance16x8_avx2, 10),
2918 VarianceParams(3, 4, &aom_highbd_10_variance8x16_avx2, 10),
2919 VarianceParams(3, 3, &aom_highbd_10_variance8x8_avx2, 10),
2920 #if !CONFIG_REALTIME_ONLY
2921 VarianceParams(6, 4, &aom_highbd_10_variance64x16_avx2, 10),
2922 VarianceParams(5, 3, &aom_highbd_10_variance32x8_avx2, 10),
2923 VarianceParams(4, 6, &aom_highbd_10_variance16x64_avx2, 10),
2924 VarianceParams(3, 5, &aom_highbd_10_variance8x32_avx2, 10),
2925 #endif
2926 };
2927
2928 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDVarianceTest,
2929 ::testing::ValuesIn(kArrayHBDVariance_avx2));
2930
2931 const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = {
2932 SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_avx2, 10),
2933 SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_avx2, 10),
2934 SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_avx2, 10),
2935 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_avx2, 10),
2936 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_avx2, 10),
2937 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_avx2, 10),
2938 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_avx2, 10),
2939 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_avx2, 10),
2940 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_avx2, 10),
2941 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_avx2, 10),
2942 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_avx2, 10),
2943 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_avx2, 10),
2944 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_avx2, 10),
2945 };
2946
2947 INSTANTIATE_TEST_SUITE_P(AVX2, AvxHBDSubpelVarianceTest,
2948 ::testing::ValuesIn(kArrayHBDSubpelVariance_avx2));
2949 #endif // HAVE_AVX2
2950
2951 const SubpelVarianceParams kArrayHBDSubpelVariance_sse2[] = {
2952 SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_sse2, 12),
2953 SubpelVarianceParams(7, 6, &aom_highbd_12_sub_pixel_variance128x64_sse2, 12),
2954 SubpelVarianceParams(6, 7, &aom_highbd_12_sub_pixel_variance64x128_sse2, 12),
2955 SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_sse2, 12),
2956 SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_sse2, 12),
2957 SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_sse2, 12),
2958 SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12),
2959 SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12),
2960 SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12),
2961 SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12),
2962 SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12),
2963 SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12),
2964 SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12),
2965 SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12),
2966 SubpelVarianceParams(7, 7, &aom_highbd_10_sub_pixel_variance128x128_sse2, 10),
2967 SubpelVarianceParams(7, 6, &aom_highbd_10_sub_pixel_variance128x64_sse2, 10),
2968 SubpelVarianceParams(6, 7, &aom_highbd_10_sub_pixel_variance64x128_sse2, 10),
2969 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_sse2, 10),
2970 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_sse2, 10),
2971 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_sse2, 10),
2972 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10),
2973 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10),
2974 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10),
2975 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10),
2976 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10),
2977 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10),
2978 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10),
2979 SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10),
2980 SubpelVarianceParams(7, 7, &aom_highbd_8_sub_pixel_variance128x128_sse2, 8),
2981 SubpelVarianceParams(7, 6, &aom_highbd_8_sub_pixel_variance128x64_sse2, 8),
2982 SubpelVarianceParams(6, 7, &aom_highbd_8_sub_pixel_variance64x128_sse2, 8),
2983 SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_sse2, 8),
2984 SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_sse2, 8),
2985 SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_sse2, 8),
2986 SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8),
2987 SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8),
2988 SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8),
2989 SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8),
2990 SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
2991 SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
2992 SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
2993 SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8),
2994 #if !CONFIG_REALTIME_ONLY
2995 SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_sse2, 12),
2996 SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12),
2997 SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12),
2998 SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12),
2999 SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12),
3000 // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12),
3001 SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10),
3002 SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10),
3003 SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10),
3004 SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10),
3005 SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10),
3006 // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10),
3007 SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8),
3008 SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8),
3009 SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8),
3010 SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8),
3011 SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8),
3012 // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8),
3013 #endif
3014 };
3015 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest,
3016 ::testing::ValuesIn(kArrayHBDSubpelVariance_sse2));
3017
3018 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_sse2[] = {
3019 SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_sse2,
3020 12),
3021 SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_sse2,
3022 12),
3023 SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_sse2,
3024 12),
3025 SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_sse2,
3026 12),
3027 SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_sse2,
3028 12),
3029 SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2,
3030 12),
3031 SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2,
3032 12),
3033 SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2,
3034 12),
3035 SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2,
3036 12),
3037 SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2,
3038 12),
3039 SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_sse2,
3040 12),
3041 SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_sse2,
3042 10),
3043 SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_sse2,
3044 10),
3045 SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_sse2,
3046 10),
3047 SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_sse2,
3048 10),
3049 SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_sse2,
3050 10),
3051 SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2,
3052 10),
3053 SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2,
3054 10),
3055 SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2,
3056 10),
3057 SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2,
3058 10),
3059 SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2,
3060 10),
3061 SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_sse2,
3062 10),
3063 SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_sse2,
3064 8),
3065 SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_sse2,
3066 8),
3067 SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_sse2,
3068 8),
3069 SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_sse2,
3070 8),
3071 SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_sse2,
3072 8),
3073 SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2,
3074 8),
3075 SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2,
3076 8),
3077 SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2,
3078 8),
3079 SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2,
3080 8),
3081 SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2,
3082 8),
3083 SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_sse2,
3084 8),
3085
3086 #if !CONFIG_REALTIME_ONLY
3087 SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_sse2,
3088 12),
3089 SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_sse2,
3090 12),
3091 SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_sse2,
3092 12),
3093 SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2,
3094 12),
3095 SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2,
3096 12),
3097 // SubpelAvgVarianceParams(2, 4,
3098 // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12),
3099 SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2,
3100 10),
3101 SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_sse2,
3102 10),
3103 SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_sse2,
3104 10),
3105 SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2,
3106 10),
3107 SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2,
3108 10),
3109 // SubpelAvgVarianceParams(2, 4,
3110 // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10),
3111 SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2,
3112 8),
3113 SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_sse2,
3114 8),
3115 SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_sse2,
3116 8),
3117 SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2,
3118 8),
3119 SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2,
3120 8),
3121 // SubpelAvgVarianceParams(2, 4,
3122 // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8),
3123 #endif
3124 };
3125
3126 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelAvgVarianceTest,
3127 ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_sse2));
3128 #endif // HAVE_SSE2
3129 #endif // CONFIG_AV1_HIGHBITDEPTH
3130
3131 #if HAVE_SSSE3
3132 const SubpelVarianceParams kArraySubpelVariance_ssse3[] = {
3133 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_ssse3, 0),
3134 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_ssse3, 0),
3135 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_ssse3, 0),
3136 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_ssse3, 0),
3137 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_ssse3, 0),
3138 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_ssse3, 0),
3139 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_ssse3, 0),
3140 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_ssse3, 0),
3141 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_ssse3, 0),
3142 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_ssse3, 0),
3143 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_ssse3, 0),
3144 SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_ssse3, 0),
3145 SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_ssse3, 0),
3146 SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_ssse3, 0),
3147 SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_ssse3, 0),
3148 SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_ssse3, 0),
3149 #if !CONFIG_REALTIME_ONLY
3150 SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_ssse3, 0),
3151 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_ssse3, 0),
3152 SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_ssse3, 0),
3153 SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_ssse3, 0),
3154 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_ssse3, 0),
3155 SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_ssse3, 0),
3156 #endif
3157 };
3158 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelVarianceTest,
3159 ::testing::ValuesIn(kArraySubpelVariance_ssse3));
3160
3161 const SubpelAvgVarianceParams kArraySubpelAvgVariance_ssse3[] = {
3162 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_ssse3, 0),
3163 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_ssse3, 0),
3164 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_ssse3, 0),
3165 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_ssse3, 0),
3166 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_ssse3, 0),
3167 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_ssse3, 0),
3168 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_ssse3, 0),
3169 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_ssse3, 0),
3170 SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_ssse3, 0),
3171 SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_ssse3, 0),
3172 SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_ssse3, 0),
3173 SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_ssse3, 0),
3174 SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_ssse3, 0),
3175 SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_ssse3, 0),
3176 SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_ssse3, 0),
3177 SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_ssse3, 0),
3178 #if !CONFIG_REALTIME_ONLY
3179 SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_ssse3, 0),
3180 SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_ssse3, 0),
3181 SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_ssse3, 0),
3182 SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_ssse3, 0),
3183 SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_ssse3, 0),
3184 SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_ssse3, 0),
3185 #endif
3186 };
3187 INSTANTIATE_TEST_SUITE_P(SSSE3, AvxSubpelAvgVarianceTest,
3188 ::testing::ValuesIn(kArraySubpelAvgVariance_ssse3));
3189
3190 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_ssse3[] = {
3191 DistWtdSubpelAvgVarianceParams(
3192 7, 7, &aom_dist_wtd_sub_pixel_avg_variance128x128_ssse3, 0),
3193 DistWtdSubpelAvgVarianceParams(
3194 7, 6, &aom_dist_wtd_sub_pixel_avg_variance128x64_ssse3, 0),
3195 DistWtdSubpelAvgVarianceParams(
3196 6, 7, &aom_dist_wtd_sub_pixel_avg_variance64x128_ssse3, 0),
3197 DistWtdSubpelAvgVarianceParams(
3198 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_ssse3, 0),
3199 DistWtdSubpelAvgVarianceParams(
3200 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_ssse3, 0),
3201 DistWtdSubpelAvgVarianceParams(
3202 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_ssse3, 0),
3203 DistWtdSubpelAvgVarianceParams(
3204 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_ssse3, 0),
3205 DistWtdSubpelAvgVarianceParams(
3206 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_ssse3, 0),
3207 DistWtdSubpelAvgVarianceParams(
3208 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_ssse3, 0),
3209 DistWtdSubpelAvgVarianceParams(
3210 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_ssse3, 0),
3211 DistWtdSubpelAvgVarianceParams(
3212 4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_ssse3, 0),
3213 DistWtdSubpelAvgVarianceParams(
3214 3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_ssse3, 0),
3215 DistWtdSubpelAvgVarianceParams(
3216 3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_ssse3, 0),
3217 DistWtdSubpelAvgVarianceParams(
3218 3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_ssse3, 0),
3219 DistWtdSubpelAvgVarianceParams(
3220 2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_ssse3, 0),
3221 DistWtdSubpelAvgVarianceParams(
3222 2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_ssse3, 0),
3223 #if !CONFIG_REALTIME_ONLY
3224 DistWtdSubpelAvgVarianceParams(
3225 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_ssse3, 0),
3226 DistWtdSubpelAvgVarianceParams(
3227 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_ssse3, 0),
3228 DistWtdSubpelAvgVarianceParams(
3229 5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_ssse3, 0),
3230 DistWtdSubpelAvgVarianceParams(
3231 3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_ssse3, 0),
3232 DistWtdSubpelAvgVarianceParams(
3233 4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_ssse3, 0),
3234 DistWtdSubpelAvgVarianceParams(
3235 2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_ssse3, 0),
3236 #endif
3237 };
3238 INSTANTIATE_TEST_SUITE_P(
3239 SSSE3, AvxDistWtdSubpelAvgVarianceTest,
3240 ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_ssse3));
3241 #endif // HAVE_SSSE3
3242
3243 #if HAVE_SSE4_1
3244 #if !CONFIG_REALTIME_ONLY
3245 INSTANTIATE_TEST_SUITE_P(
3246 SSE4_1, AvxObmcSubpelVarianceTest,
3247 ::testing::Values(
3248 ObmcSubpelVarianceParams(7, 7,
3249 &aom_obmc_sub_pixel_variance128x128_sse4_1, 0),
3250 ObmcSubpelVarianceParams(7, 6,
3251 &aom_obmc_sub_pixel_variance128x64_sse4_1, 0),
3252 ObmcSubpelVarianceParams(6, 7,
3253 &aom_obmc_sub_pixel_variance64x128_sse4_1, 0),
3254 ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_sse4_1,
3255 0),
3256 ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_sse4_1,
3257 0),
3258 ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_sse4_1,
3259 0),
3260 ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_sse4_1,
3261 0),
3262 ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_sse4_1,
3263 0),
3264 ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_sse4_1,
3265 0),
3266 ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_sse4_1,
3267 0),
3268 ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_sse4_1,
3269 0),
3270 ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_sse4_1,
3271 0),
3272 ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_sse4_1,
3273 0),
3274 ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_sse4_1,
3275 0),
3276 ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_sse4_1,
3277 0),
3278 ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_sse4_1,
3279 0),
3280 ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_sse4_1,
3281 0),
3282 ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_sse4_1,
3283 0),
3284 ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_sse4_1,
3285 0),
3286 ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_sse4_1,
3287 0),
3288 ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_sse4_1,
3289 0),
3290 ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_sse4_1,
3291 0)));
3292 #endif
3293 #endif // HAVE_SSE4_1
3294
3295 #if HAVE_AVX2
3296
3297 INSTANTIATE_TEST_SUITE_P(
3298 AVX2, MseWxHTest,
3299 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_avx2, 8),
3300 MseWxHParams(3, 2, &aom_mse_wxh_16bit_avx2, 8),
3301 MseWxHParams(2, 3, &aom_mse_wxh_16bit_avx2, 8),
3302 MseWxHParams(2, 2, &aom_mse_wxh_16bit_avx2, 8)));
3303
3304 INSTANTIATE_TEST_SUITE_P(
3305 AVX2, Mse16xHTest,
3306 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_avx2, 8),
3307 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_avx2, 8),
3308 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_avx2, 8),
3309 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_avx2, 8)));
3310
3311 INSTANTIATE_TEST_SUITE_P(AVX2, AvxMseTest,
3312 ::testing::Values(MseParams(4, 4,
3313 &aom_mse16x16_avx2)));
3314
3315 const VarianceParams kArrayVariance_avx2[] = {
3316 VarianceParams(7, 7, &aom_variance128x128_avx2),
3317 VarianceParams(7, 6, &aom_variance128x64_avx2),
3318 VarianceParams(6, 7, &aom_variance64x128_avx2),
3319 VarianceParams(6, 6, &aom_variance64x64_avx2),
3320 VarianceParams(6, 5, &aom_variance64x32_avx2),
3321 VarianceParams(5, 6, &aom_variance32x64_avx2),
3322 VarianceParams(5, 5, &aom_variance32x32_avx2),
3323 VarianceParams(5, 4, &aom_variance32x16_avx2),
3324 VarianceParams(4, 5, &aom_variance16x32_avx2),
3325 VarianceParams(4, 4, &aom_variance16x16_avx2),
3326 VarianceParams(4, 3, &aom_variance16x8_avx2),
3327 #if !CONFIG_REALTIME_ONLY
3328 VarianceParams(6, 4, &aom_variance64x16_avx2),
3329 VarianceParams(4, 6, &aom_variance16x64_avx2),
3330 VarianceParams(5, 3, &aom_variance32x8_avx2),
3331 VarianceParams(4, 2, &aom_variance16x4_avx2),
3332 #endif
3333 };
3334 INSTANTIATE_TEST_SUITE_P(AVX2, AvxVarianceTest,
3335 ::testing::ValuesIn(kArrayVariance_avx2));
3336
3337 const GetSseSumParams kArrayGetSseSum8x8Quad_avx2[] = {
3338 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3339 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3340 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_avx2, 0),
3341 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_avx2, 0)
3342 };
3343 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum8x8QuadTest,
3344 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_avx2));
3345
3346 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_avx2[] = {
3347 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3348 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3349 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_avx2, 0),
3350 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_avx2, 0)
3351 };
3352 INSTANTIATE_TEST_SUITE_P(AVX2, GetSseSum16x16DualTest,
3353 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_avx2));
3354
3355 const SubpelVarianceParams kArraySubpelVariance_avx2[] = {
3356 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_avx2, 0),
3357 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_avx2, 0),
3358 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_avx2, 0),
3359 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_avx2, 0),
3360 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_avx2, 0),
3361 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_avx2, 0),
3362 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_avx2, 0),
3363 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_avx2, 0),
3364
3365 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_avx2, 0),
3366 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_avx2, 0),
3367 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_avx2, 0),
3368 #if !CONFIG_REALTIME_ONLY
3369 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_avx2, 0),
3370 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_avx2, 0),
3371 #endif
3372 };
3373 INSTANTIATE_TEST_SUITE_P(AVX2, AvxSubpelVarianceTest,
3374 ::testing::ValuesIn(kArraySubpelVariance_avx2));
3375
3376 INSTANTIATE_TEST_SUITE_P(
3377 AVX2, AvxSubpelAvgVarianceTest,
3378 ::testing::Values(
3379 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_avx2,
3380 0),
3381 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_avx2,
3382 0),
3383 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_avx2,
3384 0),
3385 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_avx2, 0),
3386 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_avx2, 0),
3387 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_avx2, 0),
3388 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_avx2, 0),
3389 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_avx2,
3390 0)));
3391 #endif // HAVE_AVX2
3392
3393 #if HAVE_NEON
3394 INSTANTIATE_TEST_SUITE_P(
3395 NEON, MseWxHTest,
3396 ::testing::Values(MseWxHParams(3, 3, &aom_mse_wxh_16bit_neon, 8),
3397 MseWxHParams(3, 2, &aom_mse_wxh_16bit_neon, 8),
3398 MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8),
3399 MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8)));
3400
3401 INSTANTIATE_TEST_SUITE_P(
3402 NEON, Mse16xHTest,
3403 ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8),
3404 Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8),
3405 Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8),
3406 Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8)));
3407
3408 INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest,
3409 ::testing::Values(aom_get_mb_ss_neon));
3410
3411 INSTANTIATE_TEST_SUITE_P(NEON, AvxMseTest,
3412 ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon),
3413 MseParams(3, 4, &aom_mse8x16_neon),
3414 MseParams(4, 4, &aom_mse16x16_neon),
3415 MseParams(4, 3, &aom_mse16x8_neon)));
3416
3417 const VarianceParams kArrayVariance_neon[] = {
3418 VarianceParams(7, 7, &aom_variance128x128_neon),
3419 VarianceParams(6, 6, &aom_variance64x64_neon),
3420 VarianceParams(7, 6, &aom_variance128x64_neon),
3421 VarianceParams(6, 7, &aom_variance64x128_neon),
3422 VarianceParams(6, 6, &aom_variance64x64_neon),
3423 VarianceParams(6, 5, &aom_variance64x32_neon),
3424 VarianceParams(5, 6, &aom_variance32x64_neon),
3425 VarianceParams(5, 5, &aom_variance32x32_neon),
3426 VarianceParams(5, 4, &aom_variance32x16_neon),
3427 VarianceParams(4, 5, &aom_variance16x32_neon),
3428 VarianceParams(4, 4, &aom_variance16x16_neon),
3429 VarianceParams(4, 3, &aom_variance16x8_neon),
3430 VarianceParams(3, 4, &aom_variance8x16_neon),
3431 VarianceParams(3, 3, &aom_variance8x8_neon),
3432 VarianceParams(3, 2, &aom_variance8x4_neon),
3433 VarianceParams(2, 3, &aom_variance4x8_neon),
3434 VarianceParams(2, 2, &aom_variance4x4_neon),
3435 #if !CONFIG_REALTIME_ONLY
3436 VarianceParams(2, 4, &aom_variance4x16_neon),
3437 VarianceParams(4, 2, &aom_variance16x4_neon),
3438 VarianceParams(3, 5, &aom_variance8x32_neon),
3439 VarianceParams(5, 3, &aom_variance32x8_neon),
3440 VarianceParams(4, 6, &aom_variance16x64_neon),
3441 VarianceParams(6, 4, &aom_variance64x16_neon),
3442 #endif
3443 };
3444
3445 INSTANTIATE_TEST_SUITE_P(NEON, AvxVarianceTest,
3446 ::testing::ValuesIn(kArrayVariance_neon));
3447
3448 const SubpelVarianceParams kArraySubpelVariance_neon[] = {
3449 SubpelVarianceParams(7, 7, &aom_sub_pixel_variance128x128_neon, 0),
3450 SubpelVarianceParams(7, 6, &aom_sub_pixel_variance128x64_neon, 0),
3451 SubpelVarianceParams(6, 7, &aom_sub_pixel_variance64x128_neon, 0),
3452 SubpelVarianceParams(6, 6, &aom_sub_pixel_variance64x64_neon, 0),
3453 SubpelVarianceParams(6, 5, &aom_sub_pixel_variance64x32_neon, 0),
3454 SubpelVarianceParams(5, 6, &aom_sub_pixel_variance32x64_neon, 0),
3455 SubpelVarianceParams(5, 5, &aom_sub_pixel_variance32x32_neon, 0),
3456 SubpelVarianceParams(5, 4, &aom_sub_pixel_variance32x16_neon, 0),
3457 SubpelVarianceParams(4, 5, &aom_sub_pixel_variance16x32_neon, 0),
3458 SubpelVarianceParams(4, 4, &aom_sub_pixel_variance16x16_neon, 0),
3459 SubpelVarianceParams(4, 3, &aom_sub_pixel_variance16x8_neon, 0),
3460 SubpelVarianceParams(3, 4, &aom_sub_pixel_variance8x16_neon, 0),
3461 SubpelVarianceParams(3, 3, &aom_sub_pixel_variance8x8_neon, 0),
3462 SubpelVarianceParams(3, 2, &aom_sub_pixel_variance8x4_neon, 0),
3463 SubpelVarianceParams(2, 3, &aom_sub_pixel_variance4x8_neon, 0),
3464 SubpelVarianceParams(2, 2, &aom_sub_pixel_variance4x4_neon, 0),
3465 #if !CONFIG_REALTIME_ONLY
3466 SubpelVarianceParams(6, 4, &aom_sub_pixel_variance64x16_neon, 0),
3467 SubpelVarianceParams(4, 6, &aom_sub_pixel_variance16x64_neon, 0),
3468 SubpelVarianceParams(5, 3, &aom_sub_pixel_variance32x8_neon, 0),
3469 SubpelVarianceParams(3, 5, &aom_sub_pixel_variance8x32_neon, 0),
3470 SubpelVarianceParams(4, 2, &aom_sub_pixel_variance16x4_neon, 0),
3471 SubpelVarianceParams(2, 4, &aom_sub_pixel_variance4x16_neon, 0),
3472 #endif
3473 };
3474 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelVarianceTest,
3475 ::testing::ValuesIn(kArraySubpelVariance_neon));
3476
3477 const SubpelAvgVarianceParams kArraySubpelAvgVariance_neon[] = {
3478 SubpelAvgVarianceParams(7, 7, &aom_sub_pixel_avg_variance128x128_neon, 0),
3479 SubpelAvgVarianceParams(7, 6, &aom_sub_pixel_avg_variance128x64_neon, 0),
3480 SubpelAvgVarianceParams(6, 7, &aom_sub_pixel_avg_variance64x128_neon, 0),
3481 SubpelAvgVarianceParams(6, 6, &aom_sub_pixel_avg_variance64x64_neon, 0),
3482 SubpelAvgVarianceParams(6, 5, &aom_sub_pixel_avg_variance64x32_neon, 0),
3483 SubpelAvgVarianceParams(5, 6, &aom_sub_pixel_avg_variance32x64_neon, 0),
3484 SubpelAvgVarianceParams(5, 5, &aom_sub_pixel_avg_variance32x32_neon, 0),
3485 SubpelAvgVarianceParams(5, 4, &aom_sub_pixel_avg_variance32x16_neon, 0),
3486 SubpelAvgVarianceParams(4, 5, &aom_sub_pixel_avg_variance16x32_neon, 0),
3487 SubpelAvgVarianceParams(4, 4, &aom_sub_pixel_avg_variance16x16_neon, 0),
3488 SubpelAvgVarianceParams(4, 3, &aom_sub_pixel_avg_variance16x8_neon, 0),
3489 SubpelAvgVarianceParams(3, 4, &aom_sub_pixel_avg_variance8x16_neon, 0),
3490 SubpelAvgVarianceParams(3, 3, &aom_sub_pixel_avg_variance8x8_neon, 0),
3491 SubpelAvgVarianceParams(3, 2, &aom_sub_pixel_avg_variance8x4_neon, 0),
3492 SubpelAvgVarianceParams(2, 3, &aom_sub_pixel_avg_variance4x8_neon, 0),
3493 SubpelAvgVarianceParams(2, 2, &aom_sub_pixel_avg_variance4x4_neon, 0),
3494 #if !CONFIG_REALTIME_ONLY
3495 SubpelAvgVarianceParams(6, 4, &aom_sub_pixel_avg_variance64x16_neon, 0),
3496 SubpelAvgVarianceParams(4, 6, &aom_sub_pixel_avg_variance16x64_neon, 0),
3497 SubpelAvgVarianceParams(5, 3, &aom_sub_pixel_avg_variance32x8_neon, 0),
3498 SubpelAvgVarianceParams(3, 5, &aom_sub_pixel_avg_variance8x32_neon, 0),
3499 SubpelAvgVarianceParams(4, 2, &aom_sub_pixel_avg_variance16x4_neon, 0),
3500 SubpelAvgVarianceParams(2, 4, &aom_sub_pixel_avg_variance4x16_neon, 0),
3501 #endif
3502 };
3503 INSTANTIATE_TEST_SUITE_P(NEON, AvxSubpelAvgVarianceTest,
3504 ::testing::ValuesIn(kArraySubpelAvgVariance_neon));
3505
3506 const DistWtdSubpelAvgVarianceParams kArrayDistWtdSubpelAvgVariance_neon[] = {
3507 DistWtdSubpelAvgVarianceParams(
3508 6, 6, &aom_dist_wtd_sub_pixel_avg_variance64x64_neon, 0),
3509 DistWtdSubpelAvgVarianceParams(
3510 6, 5, &aom_dist_wtd_sub_pixel_avg_variance64x32_neon, 0),
3511 DistWtdSubpelAvgVarianceParams(
3512 5, 6, &aom_dist_wtd_sub_pixel_avg_variance32x64_neon, 0),
3513 DistWtdSubpelAvgVarianceParams(
3514 5, 5, &aom_dist_wtd_sub_pixel_avg_variance32x32_neon, 0),
3515 DistWtdSubpelAvgVarianceParams(
3516 5, 4, &aom_dist_wtd_sub_pixel_avg_variance32x16_neon, 0),
3517 DistWtdSubpelAvgVarianceParams(
3518 4, 5, &aom_dist_wtd_sub_pixel_avg_variance16x32_neon, 0),
3519 DistWtdSubpelAvgVarianceParams(
3520 4, 4, &aom_dist_wtd_sub_pixel_avg_variance16x16_neon, 0),
3521 DistWtdSubpelAvgVarianceParams(
3522 4, 3, &aom_dist_wtd_sub_pixel_avg_variance16x8_neon, 0),
3523 DistWtdSubpelAvgVarianceParams(
3524 3, 4, &aom_dist_wtd_sub_pixel_avg_variance8x16_neon, 0),
3525 DistWtdSubpelAvgVarianceParams(
3526 3, 3, &aom_dist_wtd_sub_pixel_avg_variance8x8_neon, 0),
3527 DistWtdSubpelAvgVarianceParams(
3528 3, 2, &aom_dist_wtd_sub_pixel_avg_variance8x4_neon, 0),
3529 DistWtdSubpelAvgVarianceParams(
3530 2, 3, &aom_dist_wtd_sub_pixel_avg_variance4x8_neon, 0),
3531 DistWtdSubpelAvgVarianceParams(
3532 2, 2, &aom_dist_wtd_sub_pixel_avg_variance4x4_neon, 0),
3533 #if !CONFIG_REALTIME_ONLY
3534 DistWtdSubpelAvgVarianceParams(
3535 6, 4, &aom_dist_wtd_sub_pixel_avg_variance64x16_neon, 0),
3536 DistWtdSubpelAvgVarianceParams(
3537 4, 6, &aom_dist_wtd_sub_pixel_avg_variance16x64_neon, 0),
3538 DistWtdSubpelAvgVarianceParams(
3539 5, 3, &aom_dist_wtd_sub_pixel_avg_variance32x8_neon, 0),
3540 DistWtdSubpelAvgVarianceParams(
3541 3, 5, &aom_dist_wtd_sub_pixel_avg_variance8x32_neon, 0),
3542 DistWtdSubpelAvgVarianceParams(
3543 4, 2, &aom_dist_wtd_sub_pixel_avg_variance16x4_neon, 0),
3544 DistWtdSubpelAvgVarianceParams(
3545 2, 4, &aom_dist_wtd_sub_pixel_avg_variance4x16_neon, 0),
3546 #endif // !CONFIG_REALTIME_ONLY
3547 };
3548 INSTANTIATE_TEST_SUITE_P(
3549 NEON, AvxDistWtdSubpelAvgVarianceTest,
3550 ::testing::ValuesIn(kArrayDistWtdSubpelAvgVariance_neon));
3551
3552 #if !CONFIG_REALTIME_ONLY
3553 const ObmcSubpelVarianceParams kArrayObmcSubpelVariance_neon[] = {
3554 ObmcSubpelVarianceParams(7, 7, &aom_obmc_sub_pixel_variance128x128_neon, 0),
3555 ObmcSubpelVarianceParams(7, 6, &aom_obmc_sub_pixel_variance128x64_neon, 0),
3556 ObmcSubpelVarianceParams(6, 7, &aom_obmc_sub_pixel_variance64x128_neon, 0),
3557 ObmcSubpelVarianceParams(6, 6, &aom_obmc_sub_pixel_variance64x64_neon, 0),
3558 ObmcSubpelVarianceParams(6, 5, &aom_obmc_sub_pixel_variance64x32_neon, 0),
3559 ObmcSubpelVarianceParams(5, 6, &aom_obmc_sub_pixel_variance32x64_neon, 0),
3560 ObmcSubpelVarianceParams(5, 5, &aom_obmc_sub_pixel_variance32x32_neon, 0),
3561 ObmcSubpelVarianceParams(5, 4, &aom_obmc_sub_pixel_variance32x16_neon, 0),
3562 ObmcSubpelVarianceParams(4, 5, &aom_obmc_sub_pixel_variance16x32_neon, 0),
3563 ObmcSubpelVarianceParams(4, 4, &aom_obmc_sub_pixel_variance16x16_neon, 0),
3564 ObmcSubpelVarianceParams(4, 3, &aom_obmc_sub_pixel_variance16x8_neon, 0),
3565 ObmcSubpelVarianceParams(3, 4, &aom_obmc_sub_pixel_variance8x16_neon, 0),
3566 ObmcSubpelVarianceParams(3, 3, &aom_obmc_sub_pixel_variance8x8_neon, 0),
3567 ObmcSubpelVarianceParams(3, 2, &aom_obmc_sub_pixel_variance8x4_neon, 0),
3568 ObmcSubpelVarianceParams(2, 3, &aom_obmc_sub_pixel_variance4x8_neon, 0),
3569 ObmcSubpelVarianceParams(2, 2, &aom_obmc_sub_pixel_variance4x4_neon, 0),
3570 ObmcSubpelVarianceParams(6, 4, &aom_obmc_sub_pixel_variance64x16_neon, 0),
3571 ObmcSubpelVarianceParams(4, 6, &aom_obmc_sub_pixel_variance16x64_neon, 0),
3572 ObmcSubpelVarianceParams(5, 3, &aom_obmc_sub_pixel_variance32x8_neon, 0),
3573 ObmcSubpelVarianceParams(3, 5, &aom_obmc_sub_pixel_variance8x32_neon, 0),
3574 ObmcSubpelVarianceParams(4, 2, &aom_obmc_sub_pixel_variance16x4_neon, 0),
3575 ObmcSubpelVarianceParams(2, 4, &aom_obmc_sub_pixel_variance4x16_neon, 0),
3576 };
3577 INSTANTIATE_TEST_SUITE_P(NEON, AvxObmcSubpelVarianceTest,
3578 ::testing::ValuesIn(kArrayObmcSubpelVariance_neon));
3579 #endif
3580
3581 const GetSseSumParams kArrayGetSseSum8x8Quad_neon[] = {
3582 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3583 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3584 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon, 0),
3585 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon, 0)
3586 };
3587 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum8x8QuadTest,
3588 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon));
3589
3590 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon[] = {
3591 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3592 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3593 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon, 0),
3594 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon, 0)
3595 };
3596 INSTANTIATE_TEST_SUITE_P(NEON, GetSseSum16x16DualTest,
3597 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon));
3598
3599 #if CONFIG_AV1_HIGHBITDEPTH
3600 const VarianceParams kArrayHBDVariance_neon[] = {
3601 VarianceParams(7, 7, &aom_highbd_12_variance128x128_neon, 12),
3602 VarianceParams(7, 6, &aom_highbd_12_variance128x64_neon, 12),
3603 VarianceParams(6, 7, &aom_highbd_12_variance64x128_neon, 12),
3604 VarianceParams(6, 6, &aom_highbd_12_variance64x64_neon, 12),
3605 VarianceParams(6, 5, &aom_highbd_12_variance64x32_neon, 12),
3606 VarianceParams(5, 6, &aom_highbd_12_variance32x64_neon, 12),
3607 VarianceParams(5, 5, &aom_highbd_12_variance32x32_neon, 12),
3608 VarianceParams(5, 4, &aom_highbd_12_variance32x16_neon, 12),
3609 VarianceParams(4, 5, &aom_highbd_12_variance16x32_neon, 12),
3610 VarianceParams(4, 4, &aom_highbd_12_variance16x16_neon, 12),
3611 VarianceParams(4, 3, &aom_highbd_12_variance16x8_neon, 12),
3612 VarianceParams(3, 4, &aom_highbd_12_variance8x16_neon, 12),
3613 VarianceParams(3, 3, &aom_highbd_12_variance8x8_neon, 12),
3614 VarianceParams(3, 2, &aom_highbd_12_variance8x4_neon, 12),
3615 VarianceParams(2, 3, &aom_highbd_12_variance4x8_neon, 12),
3616 VarianceParams(2, 2, &aom_highbd_12_variance4x4_neon, 12),
3617 VarianceParams(7, 7, &aom_highbd_10_variance128x128_neon, 10),
3618 VarianceParams(7, 6, &aom_highbd_10_variance128x64_neon, 10),
3619 VarianceParams(6, 7, &aom_highbd_10_variance64x128_neon, 10),
3620 VarianceParams(6, 6, &aom_highbd_10_variance64x64_neon, 10),
3621 VarianceParams(6, 5, &aom_highbd_10_variance64x32_neon, 10),
3622 VarianceParams(5, 6, &aom_highbd_10_variance32x64_neon, 10),
3623 VarianceParams(5, 5, &aom_highbd_10_variance32x32_neon, 10),
3624 VarianceParams(5, 4, &aom_highbd_10_variance32x16_neon, 10),
3625 VarianceParams(4, 5, &aom_highbd_10_variance16x32_neon, 10),
3626 VarianceParams(4, 4, &aom_highbd_10_variance16x16_neon, 10),
3627 VarianceParams(4, 3, &aom_highbd_10_variance16x8_neon, 10),
3628 VarianceParams(3, 4, &aom_highbd_10_variance8x16_neon, 10),
3629 VarianceParams(3, 3, &aom_highbd_10_variance8x8_neon, 10),
3630 VarianceParams(3, 2, &aom_highbd_10_variance8x4_neon, 10),
3631 VarianceParams(2, 3, &aom_highbd_10_variance4x8_neon, 10),
3632 VarianceParams(2, 2, &aom_highbd_10_variance4x4_neon, 10),
3633 VarianceParams(7, 7, &aom_highbd_8_variance128x128_neon, 8),
3634 VarianceParams(7, 6, &aom_highbd_8_variance128x64_neon, 8),
3635 VarianceParams(6, 7, &aom_highbd_8_variance64x128_neon, 8),
3636 VarianceParams(6, 6, &aom_highbd_8_variance64x64_neon, 8),
3637 VarianceParams(6, 5, &aom_highbd_8_variance64x32_neon, 8),
3638 VarianceParams(5, 6, &aom_highbd_8_variance32x64_neon, 8),
3639 VarianceParams(5, 5, &aom_highbd_8_variance32x32_neon, 8),
3640 VarianceParams(5, 4, &aom_highbd_8_variance32x16_neon, 8),
3641 VarianceParams(4, 5, &aom_highbd_8_variance16x32_neon, 8),
3642 VarianceParams(4, 4, &aom_highbd_8_variance16x16_neon, 8),
3643 VarianceParams(4, 3, &aom_highbd_8_variance16x8_neon, 8),
3644 VarianceParams(3, 4, &aom_highbd_8_variance8x16_neon, 8),
3645 VarianceParams(3, 3, &aom_highbd_8_variance8x8_neon, 8),
3646 VarianceParams(3, 2, &aom_highbd_8_variance8x4_neon, 8),
3647 VarianceParams(2, 3, &aom_highbd_8_variance4x8_neon, 8),
3648 VarianceParams(2, 2, &aom_highbd_8_variance4x4_neon, 8),
3649 #if !CONFIG_REALTIME_ONLY
3650 VarianceParams(6, 4, &aom_highbd_12_variance64x16_neon, 12),
3651 VarianceParams(4, 6, &aom_highbd_12_variance16x64_neon, 12),
3652 VarianceParams(5, 3, &aom_highbd_12_variance32x8_neon, 12),
3653 VarianceParams(3, 5, &aom_highbd_12_variance8x32_neon, 12),
3654 VarianceParams(4, 2, &aom_highbd_12_variance16x4_neon, 12),
3655 VarianceParams(2, 4, &aom_highbd_12_variance4x16_neon, 12),
3656 VarianceParams(6, 4, &aom_highbd_10_variance64x16_neon, 10),
3657 VarianceParams(4, 6, &aom_highbd_10_variance16x64_neon, 10),
3658 VarianceParams(5, 3, &aom_highbd_10_variance32x8_neon, 10),
3659 VarianceParams(3, 5, &aom_highbd_10_variance8x32_neon, 10),
3660 VarianceParams(4, 2, &aom_highbd_10_variance16x4_neon, 10),
3661 VarianceParams(2, 4, &aom_highbd_10_variance4x16_neon, 10),
3662 VarianceParams(6, 4, &aom_highbd_8_variance64x16_neon, 8),
3663 VarianceParams(4, 6, &aom_highbd_8_variance16x64_neon, 8),
3664 VarianceParams(5, 3, &aom_highbd_8_variance32x8_neon, 8),
3665 VarianceParams(3, 5, &aom_highbd_8_variance8x32_neon, 8),
3666 VarianceParams(4, 2, &aom_highbd_8_variance16x4_neon, 8),
3667 VarianceParams(2, 4, &aom_highbd_8_variance4x16_neon, 8),
3668 #endif
3669 };
3670
3671 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDVarianceTest,
3672 ::testing::ValuesIn(kArrayHBDVariance_neon));
3673
3674 const SubpelVarianceParams kArrayHBDSubpelVariance_neon[] = {
3675 SubpelVarianceParams(6, 6, &aom_highbd_12_sub_pixel_variance64x64_neon, 12),
3676 SubpelVarianceParams(6, 5, &aom_highbd_12_sub_pixel_variance64x32_neon, 12),
3677 SubpelVarianceParams(5, 6, &aom_highbd_12_sub_pixel_variance32x64_neon, 12),
3678 SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_neon, 12),
3679 SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_neon, 12),
3680 SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_neon, 12),
3681 SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_neon, 12),
3682 SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_neon, 12),
3683 SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_neon, 12),
3684 SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_neon, 12),
3685 SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_neon, 12),
3686 SubpelVarianceParams(2, 3, &aom_highbd_12_sub_pixel_variance4x8_neon, 12),
3687 SubpelVarianceParams(2, 2, &aom_highbd_12_sub_pixel_variance4x4_neon, 12),
3688 SubpelVarianceParams(6, 6, &aom_highbd_10_sub_pixel_variance64x64_neon, 10),
3689 SubpelVarianceParams(6, 5, &aom_highbd_10_sub_pixel_variance64x32_neon, 10),
3690 SubpelVarianceParams(5, 6, &aom_highbd_10_sub_pixel_variance32x64_neon, 10),
3691 SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_neon, 10),
3692 SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_neon, 10),
3693 SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_neon, 10),
3694 SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_neon, 10),
3695 SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_neon, 10),
3696 SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_neon, 10),
3697 SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_neon, 10),
3698 SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_neon, 10),
3699 SubpelVarianceParams(2, 3, &aom_highbd_10_sub_pixel_variance4x8_neon, 10),
3700 SubpelVarianceParams(2, 2, &aom_highbd_10_sub_pixel_variance4x4_neon, 10),
3701 SubpelVarianceParams(6, 6, &aom_highbd_8_sub_pixel_variance64x64_neon, 8),
3702 SubpelVarianceParams(6, 5, &aom_highbd_8_sub_pixel_variance64x32_neon, 8),
3703 SubpelVarianceParams(5, 6, &aom_highbd_8_sub_pixel_variance32x64_neon, 8),
3704 SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_neon, 8),
3705 SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_neon, 8),
3706 SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_neon, 8),
3707 SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_neon, 8),
3708 SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_neon, 8),
3709 SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_neon, 8),
3710 SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_neon, 8),
3711 SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_neon, 8),
3712 SubpelVarianceParams(2, 3, &aom_highbd_8_sub_pixel_variance4x8_neon, 8),
3713 SubpelVarianceParams(2, 2, &aom_highbd_8_sub_pixel_variance4x4_neon, 8),
3714 #if !CONFIG_REALTIME_ONLY
3715 SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_neon, 8),
3716 SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_neon, 8),
3717 SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_neon, 8),
3718 SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_neon, 8),
3719 SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_neon, 8),
3720 SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_neon, 8),
3721 SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_neon, 10),
3722 SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_neon, 10),
3723 SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_neon, 10),
3724 SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_neon, 10),
3725 SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_neon, 10),
3726 SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_neon, 10),
3727 SubpelVarianceParams(6, 4, &aom_highbd_12_sub_pixel_variance64x16_neon, 12),
3728 SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_neon, 12),
3729 SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_neon, 12),
3730 SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_neon, 12),
3731 SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_neon, 12),
3732 SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_neon, 12),
3733 #endif //! CONFIG_REALTIME_ONLY
3734 };
3735
3736 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelVarianceTest,
3737 ::testing::ValuesIn(kArrayHBDSubpelVariance_neon));
3738
3739 const SubpelAvgVarianceParams kArrayHBDSubpelAvgVariance_neon[] = {
3740 SubpelAvgVarianceParams(7, 7,
3741 &aom_highbd_8_sub_pixel_avg_variance128x128_neon, 8),
3742 SubpelAvgVarianceParams(7, 6, &aom_highbd_8_sub_pixel_avg_variance128x64_neon,
3743 8),
3744 SubpelAvgVarianceParams(6, 7, &aom_highbd_8_sub_pixel_avg_variance64x128_neon,
3745 8),
3746 SubpelAvgVarianceParams(6, 6, &aom_highbd_8_sub_pixel_avg_variance64x64_neon,
3747 8),
3748 SubpelAvgVarianceParams(6, 5, &aom_highbd_8_sub_pixel_avg_variance64x32_neon,
3749 8),
3750 SubpelAvgVarianceParams(5, 6, &aom_highbd_8_sub_pixel_avg_variance32x64_neon,
3751 8),
3752 SubpelAvgVarianceParams(5, 5, &aom_highbd_8_sub_pixel_avg_variance32x32_neon,
3753 8),
3754 SubpelAvgVarianceParams(5, 4, &aom_highbd_8_sub_pixel_avg_variance32x16_neon,
3755 8),
3756 SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_neon,
3757 8),
3758 SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_neon,
3759 8),
3760 SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_neon,
3761 8),
3762 SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_neon,
3763 8),
3764 SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_neon,
3765 8),
3766 SubpelAvgVarianceParams(3, 2, &aom_highbd_8_sub_pixel_avg_variance8x4_neon,
3767 8),
3768 SubpelAvgVarianceParams(2, 3, &aom_highbd_8_sub_pixel_avg_variance4x8_neon,
3769 8),
3770 SubpelAvgVarianceParams(2, 2, &aom_highbd_8_sub_pixel_avg_variance4x4_neon,
3771 8),
3772 SubpelAvgVarianceParams(
3773 7, 7, &aom_highbd_10_sub_pixel_avg_variance128x128_neon, 10),
3774 SubpelAvgVarianceParams(7, 6,
3775 &aom_highbd_10_sub_pixel_avg_variance128x64_neon, 10),
3776 SubpelAvgVarianceParams(6, 7,
3777 &aom_highbd_10_sub_pixel_avg_variance64x128_neon, 10),
3778 SubpelAvgVarianceParams(6, 6, &aom_highbd_10_sub_pixel_avg_variance64x64_neon,
3779 10),
3780 SubpelAvgVarianceParams(6, 5, &aom_highbd_10_sub_pixel_avg_variance64x32_neon,
3781 10),
3782 SubpelAvgVarianceParams(5, 6, &aom_highbd_10_sub_pixel_avg_variance32x64_neon,
3783 10),
3784 SubpelAvgVarianceParams(5, 5, &aom_highbd_10_sub_pixel_avg_variance32x32_neon,
3785 10),
3786 SubpelAvgVarianceParams(5, 4, &aom_highbd_10_sub_pixel_avg_variance32x16_neon,
3787 10),
3788 SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_neon,
3789 10),
3790 SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_neon,
3791 10),
3792 SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_neon,
3793 10),
3794 SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_neon,
3795 10),
3796 SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_neon,
3797 10),
3798 SubpelAvgVarianceParams(3, 2, &aom_highbd_10_sub_pixel_avg_variance8x4_neon,
3799 10),
3800 SubpelAvgVarianceParams(2, 3, &aom_highbd_10_sub_pixel_avg_variance4x8_neon,
3801 10),
3802 SubpelAvgVarianceParams(2, 2, &aom_highbd_10_sub_pixel_avg_variance4x4_neon,
3803 10),
3804 SubpelAvgVarianceParams(
3805 7, 7, &aom_highbd_12_sub_pixel_avg_variance128x128_neon, 12),
3806 SubpelAvgVarianceParams(7, 6,
3807 &aom_highbd_12_sub_pixel_avg_variance128x64_neon, 12),
3808 SubpelAvgVarianceParams(6, 7,
3809 &aom_highbd_12_sub_pixel_avg_variance64x128_neon, 12),
3810 SubpelAvgVarianceParams(6, 6, &aom_highbd_12_sub_pixel_avg_variance64x64_neon,
3811 12),
3812 SubpelAvgVarianceParams(6, 5, &aom_highbd_12_sub_pixel_avg_variance64x32_neon,
3813 12),
3814 SubpelAvgVarianceParams(5, 6, &aom_highbd_12_sub_pixel_avg_variance32x64_neon,
3815 12),
3816 SubpelAvgVarianceParams(5, 5, &aom_highbd_12_sub_pixel_avg_variance32x32_neon,
3817 12),
3818 SubpelAvgVarianceParams(5, 4, &aom_highbd_12_sub_pixel_avg_variance32x16_neon,
3819 12),
3820 SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_neon,
3821 12),
3822 SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_neon,
3823 12),
3824 SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_neon,
3825 12),
3826 SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_neon,
3827 12),
3828 SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_neon,
3829 12),
3830 SubpelAvgVarianceParams(3, 2, &aom_highbd_12_sub_pixel_avg_variance8x4_neon,
3831 12),
3832 SubpelAvgVarianceParams(2, 3, &aom_highbd_12_sub_pixel_avg_variance4x8_neon,
3833 12),
3834 SubpelAvgVarianceParams(2, 2, &aom_highbd_12_sub_pixel_avg_variance4x4_neon,
3835 12),
3836
3837 #if !CONFIG_REALTIME_ONLY
3838 SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_neon,
3839 8),
3840 SubpelAvgVarianceParams(4, 6, &aom_highbd_8_sub_pixel_avg_variance16x64_neon,
3841 8),
3842 SubpelAvgVarianceParams(5, 3, &aom_highbd_8_sub_pixel_avg_variance32x8_neon,
3843 8),
3844 SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_neon,
3845 8),
3846 SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_neon,
3847 8),
3848 SubpelAvgVarianceParams(2, 4, &aom_highbd_8_sub_pixel_avg_variance4x16_neon,
3849 8),
3850 SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_neon,
3851 10),
3852 SubpelAvgVarianceParams(4, 6, &aom_highbd_10_sub_pixel_avg_variance16x64_neon,
3853 10),
3854 SubpelAvgVarianceParams(5, 3, &aom_highbd_10_sub_pixel_avg_variance32x8_neon,
3855 10),
3856 SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_neon,
3857 10),
3858 SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_neon,
3859 10),
3860 SubpelAvgVarianceParams(2, 4, &aom_highbd_10_sub_pixel_avg_variance4x16_neon,
3861 10),
3862 SubpelAvgVarianceParams(6, 4, &aom_highbd_12_sub_pixel_avg_variance64x16_neon,
3863 12),
3864 SubpelAvgVarianceParams(4, 6, &aom_highbd_12_sub_pixel_avg_variance16x64_neon,
3865 12),
3866 SubpelAvgVarianceParams(5, 3, &aom_highbd_12_sub_pixel_avg_variance32x8_neon,
3867 12),
3868 SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_neon,
3869 12),
3870 SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_neon,
3871 12),
3872 SubpelAvgVarianceParams(2, 4, &aom_highbd_12_sub_pixel_avg_variance4x16_neon,
3873 12),
3874 #endif
3875 };
3876
3877 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDSubpelAvgVarianceTest,
3878 ::testing::ValuesIn(kArrayHBDSubpelAvgVariance_neon));
3879
3880 const DistWtdSubpelAvgVarianceParams
3881 kArrayHBDDistWtdSubpelAvgVariance_neon[] = {
3882 DistWtdSubpelAvgVarianceParams(
3883 7, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x128_neon, 8),
3884 DistWtdSubpelAvgVarianceParams(
3885 7, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance128x64_neon, 8),
3886 DistWtdSubpelAvgVarianceParams(
3887 6, 7, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x128_neon, 8),
3888 DistWtdSubpelAvgVarianceParams(
3889 6, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x64_neon, 8),
3890 DistWtdSubpelAvgVarianceParams(
3891 6, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x32_neon, 8),
3892 DistWtdSubpelAvgVarianceParams(
3893 5, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x64_neon, 8),
3894 DistWtdSubpelAvgVarianceParams(
3895 5, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x32_neon, 8),
3896 DistWtdSubpelAvgVarianceParams(
3897 5, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x16_neon, 8),
3898 DistWtdSubpelAvgVarianceParams(
3899 4, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x32_neon, 8),
3900 DistWtdSubpelAvgVarianceParams(
3901 4, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x16_neon, 8),
3902 DistWtdSubpelAvgVarianceParams(
3903 4, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x8_neon, 8),
3904 DistWtdSubpelAvgVarianceParams(
3905 3, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x16_neon, 8),
3906 DistWtdSubpelAvgVarianceParams(
3907 3, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x8_neon, 8),
3908 DistWtdSubpelAvgVarianceParams(
3909 3, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x4_neon, 8),
3910 DistWtdSubpelAvgVarianceParams(
3911 2, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x8_neon, 8),
3912 DistWtdSubpelAvgVarianceParams(
3913 2, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x4_neon, 8),
3914 DistWtdSubpelAvgVarianceParams(
3915 7, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x128_neon, 10),
3916 DistWtdSubpelAvgVarianceParams(
3917 7, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance128x64_neon, 10),
3918 DistWtdSubpelAvgVarianceParams(
3919 6, 7, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x128_neon, 10),
3920 DistWtdSubpelAvgVarianceParams(
3921 6, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x64_neon, 10),
3922 DistWtdSubpelAvgVarianceParams(
3923 6, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x32_neon, 10),
3924 DistWtdSubpelAvgVarianceParams(
3925 5, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x64_neon, 10),
3926 DistWtdSubpelAvgVarianceParams(
3927 5, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x32_neon, 10),
3928 DistWtdSubpelAvgVarianceParams(
3929 5, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x16_neon, 10),
3930 DistWtdSubpelAvgVarianceParams(
3931 4, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x32_neon, 10),
3932 DistWtdSubpelAvgVarianceParams(
3933 4, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x16_neon, 10),
3934 DistWtdSubpelAvgVarianceParams(
3935 4, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x8_neon, 10),
3936 DistWtdSubpelAvgVarianceParams(
3937 3, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x16_neon, 10),
3938 DistWtdSubpelAvgVarianceParams(
3939 3, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x8_neon, 10),
3940 DistWtdSubpelAvgVarianceParams(
3941 3, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x4_neon, 10),
3942 DistWtdSubpelAvgVarianceParams(
3943 2, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x8_neon, 10),
3944 DistWtdSubpelAvgVarianceParams(
3945 2, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x4_neon, 10),
3946 DistWtdSubpelAvgVarianceParams(
3947 7, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x128_neon, 12),
3948 DistWtdSubpelAvgVarianceParams(
3949 7, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance128x64_neon, 12),
3950 DistWtdSubpelAvgVarianceParams(
3951 6, 7, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x128_neon, 12),
3952 DistWtdSubpelAvgVarianceParams(
3953 6, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x64_neon, 12),
3954 DistWtdSubpelAvgVarianceParams(
3955 6, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x32_neon, 12),
3956 DistWtdSubpelAvgVarianceParams(
3957 5, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x64_neon, 12),
3958 DistWtdSubpelAvgVarianceParams(
3959 5, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x32_neon, 12),
3960 DistWtdSubpelAvgVarianceParams(
3961 5, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x16_neon, 12),
3962 DistWtdSubpelAvgVarianceParams(
3963 4, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x32_neon, 12),
3964 DistWtdSubpelAvgVarianceParams(
3965 4, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x16_neon, 12),
3966 DistWtdSubpelAvgVarianceParams(
3967 4, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x8_neon, 12),
3968 DistWtdSubpelAvgVarianceParams(
3969 3, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x16_neon, 12),
3970 DistWtdSubpelAvgVarianceParams(
3971 3, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x8_neon, 12),
3972 DistWtdSubpelAvgVarianceParams(
3973 3, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x4_neon, 12),
3974 DistWtdSubpelAvgVarianceParams(
3975 2, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x8_neon, 12),
3976 DistWtdSubpelAvgVarianceParams(
3977 2, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x4_neon, 12),
3978
3979 #if !CONFIG_REALTIME_ONLY
3980 DistWtdSubpelAvgVarianceParams(
3981 6, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance64x16_neon, 8),
3982 DistWtdSubpelAvgVarianceParams(
3983 4, 6, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x64_neon, 8),
3984 DistWtdSubpelAvgVarianceParams(
3985 5, 3, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance32x8_neon, 8),
3986 DistWtdSubpelAvgVarianceParams(
3987 3, 5, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance8x32_neon, 8),
3988 DistWtdSubpelAvgVarianceParams(
3989 4, 2, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance16x4_neon, 8),
3990 DistWtdSubpelAvgVarianceParams(
3991 2, 4, &aom_highbd_8_dist_wtd_sub_pixel_avg_variance4x16_neon, 8),
3992 DistWtdSubpelAvgVarianceParams(
3993 6, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance64x16_neon, 10),
3994 DistWtdSubpelAvgVarianceParams(
3995 4, 6, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x64_neon, 10),
3996 DistWtdSubpelAvgVarianceParams(
3997 5, 3, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance32x8_neon, 10),
3998 DistWtdSubpelAvgVarianceParams(
3999 3, 5, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance8x32_neon, 10),
4000 DistWtdSubpelAvgVarianceParams(
4001 4, 2, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance16x4_neon, 10),
4002 DistWtdSubpelAvgVarianceParams(
4003 2, 4, &aom_highbd_10_dist_wtd_sub_pixel_avg_variance4x16_neon, 10),
4004 DistWtdSubpelAvgVarianceParams(
4005 6, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance64x16_neon, 12),
4006 DistWtdSubpelAvgVarianceParams(
4007 4, 6, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x64_neon, 12),
4008 DistWtdSubpelAvgVarianceParams(
4009 5, 3, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance32x8_neon, 12),
4010 DistWtdSubpelAvgVarianceParams(
4011 3, 5, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance8x32_neon, 12),
4012 DistWtdSubpelAvgVarianceParams(
4013 4, 2, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance16x4_neon, 12),
4014 DistWtdSubpelAvgVarianceParams(
4015 2, 4, &aom_highbd_12_dist_wtd_sub_pixel_avg_variance4x16_neon, 12),
4016 #endif // !CONFIG_REALTIME_ONLY
4017 };
4018 INSTANTIATE_TEST_SUITE_P(
4019 NEON, AvxHBDDistWtdSubpelAvgVarianceTest,
4020 ::testing::ValuesIn(kArrayHBDDistWtdSubpelAvgVariance_neon));
4021
4022 #if !CONFIG_REALTIME_ONLY
4023 const ObmcSubpelVarianceParams kArrayHBDObmcSubpelVariance_neon[] = {
4024 ObmcSubpelVarianceParams(
4025 7, 7, &aom_highbd_12_obmc_sub_pixel_variance128x128_neon, 12),
4026 ObmcSubpelVarianceParams(
4027 7, 6, &aom_highbd_12_obmc_sub_pixel_variance128x64_neon, 12),
4028 ObmcSubpelVarianceParams(
4029 6, 7, &aom_highbd_12_obmc_sub_pixel_variance64x128_neon, 12),
4030 ObmcSubpelVarianceParams(
4031 6, 6, &aom_highbd_12_obmc_sub_pixel_variance64x64_neon, 12),
4032 ObmcSubpelVarianceParams(
4033 6, 5, &aom_highbd_12_obmc_sub_pixel_variance64x32_neon, 12),
4034 ObmcSubpelVarianceParams(
4035 5, 6, &aom_highbd_12_obmc_sub_pixel_variance32x64_neon, 12),
4036 ObmcSubpelVarianceParams(
4037 5, 5, &aom_highbd_12_obmc_sub_pixel_variance32x32_neon, 12),
4038 ObmcSubpelVarianceParams(
4039 5, 4, &aom_highbd_12_obmc_sub_pixel_variance32x16_neon, 12),
4040 ObmcSubpelVarianceParams(
4041 4, 5, &aom_highbd_12_obmc_sub_pixel_variance16x32_neon, 12),
4042 ObmcSubpelVarianceParams(
4043 4, 4, &aom_highbd_12_obmc_sub_pixel_variance16x16_neon, 12),
4044 ObmcSubpelVarianceParams(4, 3,
4045 &aom_highbd_12_obmc_sub_pixel_variance16x8_neon, 12),
4046 ObmcSubpelVarianceParams(3, 4,
4047 &aom_highbd_12_obmc_sub_pixel_variance8x16_neon, 12),
4048 ObmcSubpelVarianceParams(3, 3, &aom_highbd_12_obmc_sub_pixel_variance8x8_neon,
4049 12),
4050 ObmcSubpelVarianceParams(3, 2, &aom_highbd_12_obmc_sub_pixel_variance8x4_neon,
4051 12),
4052 ObmcSubpelVarianceParams(2, 3, &aom_highbd_12_obmc_sub_pixel_variance4x8_neon,
4053 12),
4054 ObmcSubpelVarianceParams(2, 2, &aom_highbd_12_obmc_sub_pixel_variance4x4_neon,
4055 12),
4056 ObmcSubpelVarianceParams(
4057 6, 4, &aom_highbd_12_obmc_sub_pixel_variance64x16_neon, 12),
4058 ObmcSubpelVarianceParams(
4059 4, 6, &aom_highbd_12_obmc_sub_pixel_variance16x64_neon, 12),
4060 ObmcSubpelVarianceParams(5, 3,
4061 &aom_highbd_12_obmc_sub_pixel_variance32x8_neon, 12),
4062 ObmcSubpelVarianceParams(3, 5,
4063 &aom_highbd_12_obmc_sub_pixel_variance8x32_neon, 12),
4064 ObmcSubpelVarianceParams(4, 2,
4065 &aom_highbd_12_obmc_sub_pixel_variance16x4_neon, 12),
4066 ObmcSubpelVarianceParams(2, 4,
4067 &aom_highbd_12_obmc_sub_pixel_variance4x16_neon, 12),
4068 ObmcSubpelVarianceParams(
4069 7, 7, &aom_highbd_10_obmc_sub_pixel_variance128x128_neon, 10),
4070 ObmcSubpelVarianceParams(
4071 7, 6, &aom_highbd_10_obmc_sub_pixel_variance128x64_neon, 10),
4072 ObmcSubpelVarianceParams(
4073 6, 7, &aom_highbd_10_obmc_sub_pixel_variance64x128_neon, 10),
4074 ObmcSubpelVarianceParams(
4075 6, 6, &aom_highbd_10_obmc_sub_pixel_variance64x64_neon, 10),
4076 ObmcSubpelVarianceParams(
4077 6, 5, &aom_highbd_10_obmc_sub_pixel_variance64x32_neon, 10),
4078 ObmcSubpelVarianceParams(
4079 5, 6, &aom_highbd_10_obmc_sub_pixel_variance32x64_neon, 10),
4080 ObmcSubpelVarianceParams(
4081 5, 5, &aom_highbd_10_obmc_sub_pixel_variance32x32_neon, 10),
4082 ObmcSubpelVarianceParams(
4083 5, 4, &aom_highbd_10_obmc_sub_pixel_variance32x16_neon, 10),
4084 ObmcSubpelVarianceParams(
4085 4, 5, &aom_highbd_10_obmc_sub_pixel_variance16x32_neon, 10),
4086 ObmcSubpelVarianceParams(
4087 4, 4, &aom_highbd_10_obmc_sub_pixel_variance16x16_neon, 10),
4088 ObmcSubpelVarianceParams(4, 3,
4089 &aom_highbd_10_obmc_sub_pixel_variance16x8_neon, 10),
4090 ObmcSubpelVarianceParams(3, 4,
4091 &aom_highbd_10_obmc_sub_pixel_variance8x16_neon, 10),
4092 ObmcSubpelVarianceParams(3, 3, &aom_highbd_10_obmc_sub_pixel_variance8x8_neon,
4093 10),
4094 ObmcSubpelVarianceParams(3, 2, &aom_highbd_10_obmc_sub_pixel_variance8x4_neon,
4095 10),
4096 ObmcSubpelVarianceParams(2, 3, &aom_highbd_10_obmc_sub_pixel_variance4x8_neon,
4097 10),
4098 ObmcSubpelVarianceParams(2, 2, &aom_highbd_10_obmc_sub_pixel_variance4x4_neon,
4099 10),
4100 ObmcSubpelVarianceParams(
4101 6, 4, &aom_highbd_10_obmc_sub_pixel_variance64x16_neon, 10),
4102 ObmcSubpelVarianceParams(
4103 4, 6, &aom_highbd_10_obmc_sub_pixel_variance16x64_neon, 10),
4104 ObmcSubpelVarianceParams(5, 3,
4105 &aom_highbd_10_obmc_sub_pixel_variance32x8_neon, 10),
4106 ObmcSubpelVarianceParams(3, 5,
4107 &aom_highbd_10_obmc_sub_pixel_variance8x32_neon, 10),
4108 ObmcSubpelVarianceParams(4, 2,
4109 &aom_highbd_10_obmc_sub_pixel_variance16x4_neon, 10),
4110 ObmcSubpelVarianceParams(2, 4,
4111 &aom_highbd_10_obmc_sub_pixel_variance4x16_neon, 10),
4112 ObmcSubpelVarianceParams(
4113 7, 7, &aom_highbd_8_obmc_sub_pixel_variance128x128_neon, 8),
4114 ObmcSubpelVarianceParams(7, 6,
4115 &aom_highbd_8_obmc_sub_pixel_variance128x64_neon, 8),
4116 ObmcSubpelVarianceParams(6, 7,
4117 &aom_highbd_8_obmc_sub_pixel_variance64x128_neon, 8),
4118 ObmcSubpelVarianceParams(6, 6,
4119 &aom_highbd_8_obmc_sub_pixel_variance64x64_neon, 8),
4120 ObmcSubpelVarianceParams(6, 5,
4121 &aom_highbd_8_obmc_sub_pixel_variance64x32_neon, 8),
4122 ObmcSubpelVarianceParams(5, 6,
4123 &aom_highbd_8_obmc_sub_pixel_variance32x64_neon, 8),
4124 ObmcSubpelVarianceParams(5, 5,
4125 &aom_highbd_8_obmc_sub_pixel_variance32x32_neon, 8),
4126 ObmcSubpelVarianceParams(5, 4,
4127 &aom_highbd_8_obmc_sub_pixel_variance32x16_neon, 8),
4128 ObmcSubpelVarianceParams(4, 5,
4129 &aom_highbd_8_obmc_sub_pixel_variance16x32_neon, 8),
4130 ObmcSubpelVarianceParams(4, 4,
4131 &aom_highbd_8_obmc_sub_pixel_variance16x16_neon, 8),
4132 ObmcSubpelVarianceParams(4, 3, &aom_highbd_8_obmc_sub_pixel_variance16x8_neon,
4133 8),
4134 ObmcSubpelVarianceParams(3, 4, &aom_highbd_8_obmc_sub_pixel_variance8x16_neon,
4135 8),
4136 ObmcSubpelVarianceParams(3, 3, &aom_highbd_8_obmc_sub_pixel_variance8x8_neon,
4137 8),
4138 ObmcSubpelVarianceParams(3, 2, &aom_highbd_8_obmc_sub_pixel_variance8x4_neon,
4139 8),
4140 ObmcSubpelVarianceParams(2, 3, &aom_highbd_8_obmc_sub_pixel_variance4x8_neon,
4141 8),
4142 ObmcSubpelVarianceParams(2, 2, &aom_highbd_8_obmc_sub_pixel_variance4x4_neon,
4143 8),
4144 ObmcSubpelVarianceParams(6, 4,
4145 &aom_highbd_8_obmc_sub_pixel_variance64x16_neon, 8),
4146 ObmcSubpelVarianceParams(4, 6,
4147 &aom_highbd_8_obmc_sub_pixel_variance16x64_neon, 8),
4148 ObmcSubpelVarianceParams(5, 3, &aom_highbd_8_obmc_sub_pixel_variance32x8_neon,
4149 8),
4150 ObmcSubpelVarianceParams(3, 5, &aom_highbd_8_obmc_sub_pixel_variance8x32_neon,
4151 8),
4152 ObmcSubpelVarianceParams(4, 2, &aom_highbd_8_obmc_sub_pixel_variance16x4_neon,
4153 8),
4154 ObmcSubpelVarianceParams(2, 4, &aom_highbd_8_obmc_sub_pixel_variance4x16_neon,
4155 8),
4156 };
4157
4158 INSTANTIATE_TEST_SUITE_P(NEON, AvxHBDObmcSubpelVarianceTest,
4159 ::testing::ValuesIn(kArrayHBDObmcSubpelVariance_neon));
4160 #endif // !CONFIG_REALTIME_ONLY
4161
4162 #endif // CONFIG_AV1_HIGHBITDEPTH
4163
4164 #endif // HAVE_NEON
4165
4166 #if HAVE_NEON_DOTPROD
4167
4168 const VarianceParams kArrayVariance_neon_dotprod[] = {
4169 VarianceParams(7, 7, &aom_variance128x128_neon_dotprod),
4170 VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
4171 VarianceParams(7, 6, &aom_variance128x64_neon_dotprod),
4172 VarianceParams(6, 7, &aom_variance64x128_neon_dotprod),
4173 VarianceParams(6, 6, &aom_variance64x64_neon_dotprod),
4174 VarianceParams(6, 5, &aom_variance64x32_neon_dotprod),
4175 VarianceParams(5, 6, &aom_variance32x64_neon_dotprod),
4176 VarianceParams(5, 5, &aom_variance32x32_neon_dotprod),
4177 VarianceParams(5, 4, &aom_variance32x16_neon_dotprod),
4178 VarianceParams(4, 5, &aom_variance16x32_neon_dotprod),
4179 VarianceParams(4, 4, &aom_variance16x16_neon_dotprod),
4180 VarianceParams(4, 3, &aom_variance16x8_neon_dotprod),
4181 VarianceParams(3, 4, &aom_variance8x16_neon_dotprod),
4182 VarianceParams(3, 3, &aom_variance8x8_neon_dotprod),
4183 VarianceParams(3, 2, &aom_variance8x4_neon_dotprod),
4184 VarianceParams(2, 3, &aom_variance4x8_neon_dotprod),
4185 VarianceParams(2, 2, &aom_variance4x4_neon_dotprod),
4186 #if !CONFIG_REALTIME_ONLY
4187 VarianceParams(2, 4, &aom_variance4x16_neon_dotprod),
4188 VarianceParams(4, 2, &aom_variance16x4_neon_dotprod),
4189 VarianceParams(3, 5, &aom_variance8x32_neon_dotprod),
4190 VarianceParams(5, 3, &aom_variance32x8_neon_dotprod),
4191 VarianceParams(4, 6, &aom_variance16x64_neon_dotprod),
4192 VarianceParams(6, 4, &aom_variance64x16_neon_dotprod),
4193 #endif
4194 };
4195
4196 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD, AvxVarianceTest,
4197 ::testing::ValuesIn(kArrayVariance_neon_dotprod));
4198
4199 const GetSseSumParams kArrayGetSseSum8x8Quad_neon_dotprod[] = {
4200 GetSseSumParams(7, 7, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4201 GetSseSumParams(6, 6, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4202 GetSseSumParams(5, 5, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0),
4203 GetSseSumParams(5, 4, &aom_get_var_sse_sum_8x8_quad_neon_dotprod, 0)
4204 };
4205 INSTANTIATE_TEST_SUITE_P(
4206 NEON_DOTPROD, GetSseSum8x8QuadTest,
4207 ::testing::ValuesIn(kArrayGetSseSum8x8Quad_neon_dotprod));
4208
4209 const GetSseSumParamsDual kArrayGetSseSum16x16Dual_neon_dotprod[] = {
4210 GetSseSumParamsDual(7, 7, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4211 GetSseSumParamsDual(6, 6, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4212 GetSseSumParamsDual(5, 5, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0),
4213 GetSseSumParamsDual(5, 4, &aom_get_var_sse_sum_16x16_dual_neon_dotprod, 0)
4214 };
4215 INSTANTIATE_TEST_SUITE_P(
4216 NEON_DOTPROD, GetSseSum16x16DualTest,
4217 ::testing::ValuesIn(kArrayGetSseSum16x16Dual_neon_dotprod));
4218
4219 INSTANTIATE_TEST_SUITE_P(
4220 NEON_DOTPROD, AvxMseTest,
4221 ::testing::Values(MseParams(3, 3, &aom_mse8x8_neon_dotprod),
4222 MseParams(3, 4, &aom_mse8x16_neon_dotprod),
4223 MseParams(4, 4, &aom_mse16x16_neon_dotprod),
4224 MseParams(4, 3, &aom_mse16x8_neon_dotprod)));
4225
4226 #endif // HAVE_NEON_DOTPROD
4227
4228 #if HAVE_SVE
4229
4230 #if CONFIG_AV1_HIGHBITDEPTH
4231 const VarianceParams kArrayHBDVariance_sve[] = {
4232 VarianceParams(7, 7, &aom_highbd_12_variance128x128_sve, 12),
4233 VarianceParams(7, 6, &aom_highbd_12_variance128x64_sve, 12),
4234 VarianceParams(6, 7, &aom_highbd_12_variance64x128_sve, 12),
4235 VarianceParams(6, 6, &aom_highbd_12_variance64x64_sve, 12),
4236 VarianceParams(6, 5, &aom_highbd_12_variance64x32_sve, 12),
4237 VarianceParams(5, 6, &aom_highbd_12_variance32x64_sve, 12),
4238 VarianceParams(5, 5, &aom_highbd_12_variance32x32_sve, 12),
4239 VarianceParams(5, 4, &aom_highbd_12_variance32x16_sve, 12),
4240 VarianceParams(4, 5, &aom_highbd_12_variance16x32_sve, 12),
4241 VarianceParams(4, 4, &aom_highbd_12_variance16x16_sve, 12),
4242 VarianceParams(4, 3, &aom_highbd_12_variance16x8_sve, 12),
4243 VarianceParams(3, 4, &aom_highbd_12_variance8x16_sve, 12),
4244 VarianceParams(3, 3, &aom_highbd_12_variance8x8_sve, 12),
4245 VarianceParams(3, 2, &aom_highbd_12_variance8x4_sve, 12),
4246 VarianceParams(2, 3, &aom_highbd_12_variance4x8_sve, 12),
4247 VarianceParams(2, 2, &aom_highbd_12_variance4x4_sve, 12),
4248 VarianceParams(7, 7, &aom_highbd_10_variance128x128_sve, 10),
4249 VarianceParams(7, 6, &aom_highbd_10_variance128x64_sve, 10),
4250 VarianceParams(6, 7, &aom_highbd_10_variance64x128_sve, 10),
4251 VarianceParams(6, 6, &aom_highbd_10_variance64x64_sve, 10),
4252 VarianceParams(6, 5, &aom_highbd_10_variance64x32_sve, 10),
4253 VarianceParams(5, 6, &aom_highbd_10_variance32x64_sve, 10),
4254 VarianceParams(5, 5, &aom_highbd_10_variance32x32_sve, 10),
4255 VarianceParams(5, 4, &aom_highbd_10_variance32x16_sve, 10),
4256 VarianceParams(4, 5, &aom_highbd_10_variance16x32_sve, 10),
4257 VarianceParams(4, 4, &aom_highbd_10_variance16x16_sve, 10),
4258 VarianceParams(4, 3, &aom_highbd_10_variance16x8_sve, 10),
4259 VarianceParams(3, 4, &aom_highbd_10_variance8x16_sve, 10),
4260 VarianceParams(3, 3, &aom_highbd_10_variance8x8_sve, 10),
4261 VarianceParams(3, 2, &aom_highbd_10_variance8x4_sve, 10),
4262 VarianceParams(2, 3, &aom_highbd_10_variance4x8_sve, 10),
4263 VarianceParams(2, 2, &aom_highbd_10_variance4x4_sve, 10),
4264 VarianceParams(7, 7, &aom_highbd_8_variance128x128_sve, 8),
4265 VarianceParams(7, 6, &aom_highbd_8_variance128x64_sve, 8),
4266 VarianceParams(6, 7, &aom_highbd_8_variance64x128_sve, 8),
4267 VarianceParams(6, 6, &aom_highbd_8_variance64x64_sve, 8),
4268 VarianceParams(6, 5, &aom_highbd_8_variance64x32_sve, 8),
4269 VarianceParams(5, 6, &aom_highbd_8_variance32x64_sve, 8),
4270 VarianceParams(5, 5, &aom_highbd_8_variance32x32_sve, 8),
4271 VarianceParams(5, 4, &aom_highbd_8_variance32x16_sve, 8),
4272 VarianceParams(4, 5, &aom_highbd_8_variance16x32_sve, 8),
4273 VarianceParams(4, 4, &aom_highbd_8_variance16x16_sve, 8),
4274 VarianceParams(4, 3, &aom_highbd_8_variance16x8_sve, 8),
4275 VarianceParams(3, 4, &aom_highbd_8_variance8x16_sve, 8),
4276 VarianceParams(3, 3, &aom_highbd_8_variance8x8_sve, 8),
4277 VarianceParams(3, 2, &aom_highbd_8_variance8x4_sve, 8),
4278 VarianceParams(2, 3, &aom_highbd_8_variance4x8_sve, 8),
4279 VarianceParams(2, 2, &aom_highbd_8_variance4x4_sve, 8),
4280 #if !CONFIG_REALTIME_ONLY
4281 VarianceParams(6, 4, &aom_highbd_12_variance64x16_sve, 12),
4282 VarianceParams(4, 6, &aom_highbd_12_variance16x64_sve, 12),
4283 VarianceParams(5, 3, &aom_highbd_12_variance32x8_sve, 12),
4284 VarianceParams(3, 5, &aom_highbd_12_variance8x32_sve, 12),
4285 VarianceParams(4, 2, &aom_highbd_12_variance16x4_sve, 12),
4286 VarianceParams(2, 4, &aom_highbd_12_variance4x16_sve, 12),
4287 VarianceParams(6, 4, &aom_highbd_10_variance64x16_sve, 10),
4288 VarianceParams(4, 6, &aom_highbd_10_variance16x64_sve, 10),
4289 VarianceParams(5, 3, &aom_highbd_10_variance32x8_sve, 10),
4290 VarianceParams(3, 5, &aom_highbd_10_variance8x32_sve, 10),
4291 VarianceParams(4, 2, &aom_highbd_10_variance16x4_sve, 10),
4292 VarianceParams(2, 4, &aom_highbd_10_variance4x16_sve, 10),
4293 VarianceParams(6, 4, &aom_highbd_8_variance64x16_sve, 8),
4294 VarianceParams(4, 6, &aom_highbd_8_variance16x64_sve, 8),
4295 VarianceParams(5, 3, &aom_highbd_8_variance32x8_sve, 8),
4296 VarianceParams(3, 5, &aom_highbd_8_variance8x32_sve, 8),
4297 VarianceParams(4, 2, &aom_highbd_8_variance16x4_sve, 8),
4298 VarianceParams(2, 4, &aom_highbd_8_variance4x16_sve, 8),
4299 #endif
4300 };
4301
4302 INSTANTIATE_TEST_SUITE_P(SVE, AvxHBDVarianceTest,
4303 ::testing::ValuesIn(kArrayHBDVariance_sve));
4304
4305 #endif // CONFIG_AV1_HIGHBITDEPTH
4306 #endif // HAVE_SVE
4307
4308 } // namespace
4309