1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <string.h>
12
13 #include "third_party/googletest/src/include/gtest/gtest.h"
14
15 #include "./vp9_rtcd.h"
16 #include "./vpx_config.h"
17 #include "./vpx_dsp_rtcd.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 #include "vp9/common/vp9_common.h"
23 #include "vp9/common/vp9_filter.h"
24 #include "vpx_dsp/vpx_dsp_common.h"
25 #include "vpx_dsp/vpx_filter.h"
26 #include "vpx_mem/vpx_mem.h"
27 #include "vpx_ports/mem.h"
28 #include "vpx_ports/vpx_timer.h"
29
30 namespace {
31
32 static const unsigned int kMaxDimension = 64;
33
34 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
35 uint8_t *dst, ptrdiff_t dst_stride,
36 const int16_t *filter_x, int filter_x_stride,
37 const int16_t *filter_y, int filter_y_stride,
38 int w, int h);
39
40 typedef void (*WrapperFilterBlock2d8Func)(
41 const uint8_t *src_ptr, const unsigned int src_stride,
42 const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
43 unsigned int dst_stride, unsigned int output_width,
44 unsigned int output_height, int use_highbd);
45
46 struct ConvolveFunctions {
ConvolveFunctions__anond4a532260111::ConvolveFunctions47 ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
48 ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
49 ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8,
50 ConvolveFunc sh8_avg, ConvolveFunc sv8,
51 ConvolveFunc sv8_avg, ConvolveFunc shv8,
52 ConvolveFunc shv8_avg, int bd)
53 : use_highbd_(bd) {
54 copy_[0] = copy;
55 copy_[1] = avg;
56 h8_[0] = h8;
57 h8_[1] = h8_avg;
58 v8_[0] = v8;
59 v8_[1] = v8_avg;
60 hv8_[0] = hv8;
61 hv8_[1] = hv8_avg;
62 sh8_[0] = sh8;
63 sh8_[1] = sh8_avg;
64 sv8_[0] = sv8;
65 sv8_[1] = sv8_avg;
66 shv8_[0] = shv8;
67 shv8_[1] = shv8_avg;
68 }
69
70 ConvolveFunc copy_[2];
71 ConvolveFunc h8_[2];
72 ConvolveFunc v8_[2];
73 ConvolveFunc hv8_[2];
74 ConvolveFunc sh8_[2]; // scaled horiz
75 ConvolveFunc sv8_[2]; // scaled vert
76 ConvolveFunc shv8_[2]; // scaled horiz/vert
77 int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth.
78 };
79
80 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
81
82 #define ALL_SIZES(convolve_fn) \
83 make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \
84 make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \
85 make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \
86 make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
87 make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
88 make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
89 make_tuple(64, 64, &convolve_fn)
90
91 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
92 #define VP9_FILTER_WEIGHT 128
93 #define VP9_FILTER_SHIFT 7
clip_pixel(int x)94 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
95
filter_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)96 void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride,
97 const int16_t *hfilter, const int16_t *vfilter,
98 uint8_t *dst_ptr, unsigned int dst_stride,
99 unsigned int output_width, unsigned int output_height) {
100 // Between passes, we use an intermediate buffer whose height is extended to
101 // have enough horizontally filtered values as input for the vertical pass.
102 // This buffer is allocated to be big enough for the largest block type we
103 // support.
104 const int kInterp_Extend = 4;
105 const unsigned int intermediate_height =
106 (kInterp_Extend - 1) + output_height + kInterp_Extend;
107 unsigned int i, j;
108
109 // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
110 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
111 // + kInterp_Extend
112 // = 3 + 16 + 4
113 // = 23
114 // and filter_max_width = 16
115 //
116 uint8_t intermediate_buffer[71 * kMaxDimension];
117 const int intermediate_next_stride =
118 1 - static_cast<int>(intermediate_height * output_width);
119
120 // Horizontal pass (src -> transposed intermediate).
121 uint8_t *output_ptr = intermediate_buffer;
122 const int src_next_row_stride = src_stride - output_width;
123 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
124 for (i = 0; i < intermediate_height; ++i) {
125 for (j = 0; j < output_width; ++j) {
126 // Apply filter...
127 const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
128 (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
129 (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
130 (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
131 (VP9_FILTER_WEIGHT >> 1); // Rounding
132
133 // Normalize back to 0-255...
134 *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
135 ++src_ptr;
136 output_ptr += intermediate_height;
137 }
138 src_ptr += src_next_row_stride;
139 output_ptr += intermediate_next_stride;
140 }
141
142 // Vertical pass (transposed intermediate -> dst).
143 src_ptr = intermediate_buffer;
144 const int dst_next_row_stride = dst_stride - output_width;
145 for (i = 0; i < output_height; ++i) {
146 for (j = 0; j < output_width; ++j) {
147 // Apply filter...
148 const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
149 (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
150 (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
151 (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
152 (VP9_FILTER_WEIGHT >> 1); // Rounding
153
154 // Normalize back to 0-255...
155 *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
156 src_ptr += intermediate_height;
157 }
158 src_ptr += intermediate_next_stride;
159 dst_ptr += dst_next_row_stride;
160 }
161 }
162
block2d_average_c(uint8_t * src,unsigned int src_stride,uint8_t * output_ptr,unsigned int output_stride,unsigned int output_width,unsigned int output_height)163 void block2d_average_c(uint8_t *src, unsigned int src_stride,
164 uint8_t *output_ptr, unsigned int output_stride,
165 unsigned int output_width, unsigned int output_height) {
166 unsigned int i, j;
167 for (i = 0; i < output_height; ++i) {
168 for (j = 0; j < output_width; ++j) {
169 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
170 }
171 output_ptr += output_stride;
172 }
173 }
174
filter_average_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)175 void filter_average_block2d_8_c(const uint8_t *src_ptr,
176 const unsigned int src_stride,
177 const int16_t *hfilter, const int16_t *vfilter,
178 uint8_t *dst_ptr, unsigned int dst_stride,
179 unsigned int output_width,
180 unsigned int output_height) {
181 uint8_t tmp[kMaxDimension * kMaxDimension];
182
183 assert(output_width <= kMaxDimension);
184 assert(output_height <= kMaxDimension);
185 filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
186 output_width, output_height);
187 block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height);
188 }
189
190 #if CONFIG_VP9_HIGHBITDEPTH
highbd_filter_block2d_8_c(const uint16_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint16_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int bd)191 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
192 const unsigned int src_stride,
193 const int16_t *hfilter, const int16_t *vfilter,
194 uint16_t *dst_ptr, unsigned int dst_stride,
195 unsigned int output_width,
196 unsigned int output_height, int bd) {
197 // Between passes, we use an intermediate buffer whose height is extended to
198 // have enough horizontally filtered values as input for the vertical pass.
199 // This buffer is allocated to be big enough for the largest block type we
200 // support.
201 const int kInterp_Extend = 4;
202 const unsigned int intermediate_height =
203 (kInterp_Extend - 1) + output_height + kInterp_Extend;
204
205 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
206 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
207 * + kInterp_Extend
208 * = 3 + 16 + 4
209 * = 23
210 * and filter_max_width = 16
211 */
212 uint16_t intermediate_buffer[71 * kMaxDimension];
213 const int intermediate_next_stride =
214 1 - static_cast<int>(intermediate_height * output_width);
215
216 // Horizontal pass (src -> transposed intermediate).
217 {
218 uint16_t *output_ptr = intermediate_buffer;
219 const int src_next_row_stride = src_stride - output_width;
220 unsigned int i, j;
221 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
222 for (i = 0; i < intermediate_height; ++i) {
223 for (j = 0; j < output_width; ++j) {
224 // Apply filter...
225 const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
226 (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
227 (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
228 (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
229 (VP9_FILTER_WEIGHT >> 1); // Rounding
230
231 // Normalize back to 0-255...
232 *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
233 ++src_ptr;
234 output_ptr += intermediate_height;
235 }
236 src_ptr += src_next_row_stride;
237 output_ptr += intermediate_next_stride;
238 }
239 }
240
241 // Vertical pass (transposed intermediate -> dst).
242 {
243 uint16_t *src_ptr = intermediate_buffer;
244 const int dst_next_row_stride = dst_stride - output_width;
245 unsigned int i, j;
246 for (i = 0; i < output_height; ++i) {
247 for (j = 0; j < output_width; ++j) {
248 // Apply filter...
249 const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
250 (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
251 (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
252 (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
253 (VP9_FILTER_WEIGHT >> 1); // Rounding
254
255 // Normalize back to 0-255...
256 *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
257 src_ptr += intermediate_height;
258 }
259 src_ptr += intermediate_next_stride;
260 dst_ptr += dst_next_row_stride;
261 }
262 }
263 }
264
highbd_block2d_average_c(uint16_t * src,unsigned int src_stride,uint16_t * output_ptr,unsigned int output_stride,unsigned int output_width,unsigned int output_height)265 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
266 uint16_t *output_ptr, unsigned int output_stride,
267 unsigned int output_width,
268 unsigned int output_height) {
269 unsigned int i, j;
270 for (i = 0; i < output_height; ++i) {
271 for (j = 0; j < output_width; ++j) {
272 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
273 }
274 output_ptr += output_stride;
275 }
276 }
277
highbd_filter_average_block2d_8_c(const uint16_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint16_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int bd)278 void highbd_filter_average_block2d_8_c(
279 const uint16_t *src_ptr, const unsigned int src_stride,
280 const int16_t *hfilter, const int16_t *vfilter, uint16_t *dst_ptr,
281 unsigned int dst_stride, unsigned int output_width,
282 unsigned int output_height, int bd) {
283 uint16_t tmp[kMaxDimension * kMaxDimension];
284
285 assert(output_width <= kMaxDimension);
286 assert(output_height <= kMaxDimension);
287 highbd_filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
288 output_width, output_height, bd);
289 highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width,
290 output_height);
291 }
292 #endif // CONFIG_VP9_HIGHBITDEPTH
293
wrapper_filter_average_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int use_highbd)294 void wrapper_filter_average_block2d_8_c(
295 const uint8_t *src_ptr, const unsigned int src_stride,
296 const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
297 unsigned int dst_stride, unsigned int output_width,
298 unsigned int output_height, int use_highbd) {
299 #if CONFIG_VP9_HIGHBITDEPTH
300 if (use_highbd == 0) {
301 filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
302 dst_stride, output_width, output_height);
303 } else {
304 highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride,
305 hfilter, vfilter,
306 CAST_TO_SHORTPTR(dst_ptr), dst_stride,
307 output_width, output_height, use_highbd);
308 }
309 #else
310 ASSERT_EQ(0, use_highbd);
311 filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
312 dst_stride, output_width, output_height);
313 #endif
314 }
315
wrapper_filter_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int use_highbd)316 void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
317 const unsigned int src_stride,
318 const int16_t *hfilter, const int16_t *vfilter,
319 uint8_t *dst_ptr, unsigned int dst_stride,
320 unsigned int output_width,
321 unsigned int output_height, int use_highbd) {
322 #if CONFIG_VP9_HIGHBITDEPTH
323 if (use_highbd == 0) {
324 filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
325 dst_stride, output_width, output_height);
326 } else {
327 highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter,
328 vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride,
329 output_width, output_height, use_highbd);
330 }
331 #else
332 ASSERT_EQ(0, use_highbd);
333 filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride,
334 output_width, output_height);
335 #endif
336 }
337
338 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
339 public:
SetUpTestCase()340 static void SetUpTestCase() {
341 // Force input_ to be unaligned, output to be 16 byte aligned.
342 input_ = reinterpret_cast<uint8_t *>(
343 vpx_memalign(kDataAlignment, kInputBufferSize + 1)) +
344 1;
345 output_ = reinterpret_cast<uint8_t *>(
346 vpx_memalign(kDataAlignment, kOutputBufferSize));
347 output_ref_ = reinterpret_cast<uint8_t *>(
348 vpx_memalign(kDataAlignment, kOutputBufferSize));
349 #if CONFIG_VP9_HIGHBITDEPTH
350 input16_ = reinterpret_cast<uint16_t *>(vpx_memalign(
351 kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
352 1;
353 output16_ = reinterpret_cast<uint16_t *>(
354 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
355 output16_ref_ = reinterpret_cast<uint16_t *>(
356 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
357 #endif
358 }
359
TearDown()360 virtual void TearDown() { libvpx_test::ClearSystemState(); }
361
TearDownTestCase()362 static void TearDownTestCase() {
363 vpx_free(input_ - 1);
364 input_ = NULL;
365 vpx_free(output_);
366 output_ = NULL;
367 vpx_free(output_ref_);
368 output_ref_ = NULL;
369 #if CONFIG_VP9_HIGHBITDEPTH
370 vpx_free(input16_ - 1);
371 input16_ = NULL;
372 vpx_free(output16_);
373 output16_ = NULL;
374 vpx_free(output16_ref_);
375 output16_ref_ = NULL;
376 #endif
377 }
378
379 protected:
380 static const int kDataAlignment = 16;
381 static const int kOuterBlockSize = 256;
382 static const int kInputStride = kOuterBlockSize;
383 static const int kOutputStride = kOuterBlockSize;
384 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
385 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
386
Width() const387 int Width() const { return GET_PARAM(0); }
Height() const388 int Height() const { return GET_PARAM(1); }
BorderLeft() const389 int BorderLeft() const {
390 const int center = (kOuterBlockSize - Width()) / 2;
391 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
392 }
BorderTop() const393 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
394
IsIndexInBorder(int i)395 bool IsIndexInBorder(int i) {
396 return (i < BorderTop() * kOuterBlockSize ||
397 i >= (BorderTop() + Height()) * kOuterBlockSize ||
398 i % kOuterBlockSize < BorderLeft() ||
399 i % kOuterBlockSize >= (BorderLeft() + Width()));
400 }
401
SetUp()402 virtual void SetUp() {
403 UUT_ = GET_PARAM(2);
404 #if CONFIG_VP9_HIGHBITDEPTH
405 if (UUT_->use_highbd_ != 0) {
406 mask_ = (1 << UUT_->use_highbd_) - 1;
407 } else {
408 mask_ = 255;
409 }
410 #endif
411 /* Set up guard blocks for an inner block centered in the outer block */
412 for (int i = 0; i < kOutputBufferSize; ++i) {
413 if (IsIndexInBorder(i)) {
414 output_[i] = 255;
415 } else {
416 output_[i] = 0;
417 }
418 }
419
420 ::libvpx_test::ACMRandom prng;
421 for (int i = 0; i < kInputBufferSize; ++i) {
422 if (i & 1) {
423 input_[i] = 255;
424 #if CONFIG_VP9_HIGHBITDEPTH
425 input16_[i] = mask_;
426 #endif
427 } else {
428 input_[i] = prng.Rand8Extremes();
429 #if CONFIG_VP9_HIGHBITDEPTH
430 input16_[i] = prng.Rand16() & mask_;
431 #endif
432 }
433 }
434 }
435
SetConstantInput(int value)436 void SetConstantInput(int value) {
437 memset(input_, value, kInputBufferSize);
438 #if CONFIG_VP9_HIGHBITDEPTH
439 vpx_memset16(input16_, value, kInputBufferSize);
440 #endif
441 }
442
CopyOutputToRef()443 void CopyOutputToRef() {
444 memcpy(output_ref_, output_, kOutputBufferSize);
445 #if CONFIG_VP9_HIGHBITDEPTH
446 memcpy(output16_ref_, output16_,
447 kOutputBufferSize * sizeof(output16_ref_[0]));
448 #endif
449 }
450
CheckGuardBlocks()451 void CheckGuardBlocks() {
452 for (int i = 0; i < kOutputBufferSize; ++i) {
453 if (IsIndexInBorder(i)) EXPECT_EQ(255, output_[i]);
454 }
455 }
456
input() const457 uint8_t *input() const {
458 const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
459 #if CONFIG_VP9_HIGHBITDEPTH
460 if (UUT_->use_highbd_ == 0) {
461 return input_ + offset;
462 } else {
463 return CAST_TO_BYTEPTR(input16_ + offset);
464 }
465 #else
466 return input_ + offset;
467 #endif
468 }
469
output() const470 uint8_t *output() const {
471 const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
472 #if CONFIG_VP9_HIGHBITDEPTH
473 if (UUT_->use_highbd_ == 0) {
474 return output_ + offset;
475 } else {
476 return CAST_TO_BYTEPTR(output16_ + offset);
477 }
478 #else
479 return output_ + offset;
480 #endif
481 }
482
output_ref() const483 uint8_t *output_ref() const {
484 const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
485 #if CONFIG_VP9_HIGHBITDEPTH
486 if (UUT_->use_highbd_ == 0) {
487 return output_ref_ + offset;
488 } else {
489 return CAST_TO_BYTEPTR(output16_ref_ + offset);
490 }
491 #else
492 return output_ref_ + offset;
493 #endif
494 }
495
lookup(uint8_t * list,int index) const496 uint16_t lookup(uint8_t *list, int index) const {
497 #if CONFIG_VP9_HIGHBITDEPTH
498 if (UUT_->use_highbd_ == 0) {
499 return list[index];
500 } else {
501 return CAST_TO_SHORTPTR(list)[index];
502 }
503 #else
504 return list[index];
505 #endif
506 }
507
assign_val(uint8_t * list,int index,uint16_t val) const508 void assign_val(uint8_t *list, int index, uint16_t val) const {
509 #if CONFIG_VP9_HIGHBITDEPTH
510 if (UUT_->use_highbd_ == 0) {
511 list[index] = (uint8_t)val;
512 } else {
513 CAST_TO_SHORTPTR(list)[index] = val;
514 }
515 #else
516 list[index] = (uint8_t)val;
517 #endif
518 }
519
520 const ConvolveFunctions *UUT_;
521 static uint8_t *input_;
522 static uint8_t *output_;
523 static uint8_t *output_ref_;
524 #if CONFIG_VP9_HIGHBITDEPTH
525 static uint16_t *input16_;
526 static uint16_t *output16_;
527 static uint16_t *output16_ref_;
528 int mask_;
529 #endif
530 };
531
532 uint8_t *ConvolveTest::input_ = NULL;
533 uint8_t *ConvolveTest::output_ = NULL;
534 uint8_t *ConvolveTest::output_ref_ = NULL;
535 #if CONFIG_VP9_HIGHBITDEPTH
536 uint16_t *ConvolveTest::input16_ = NULL;
537 uint16_t *ConvolveTest::output16_ = NULL;
538 uint16_t *ConvolveTest::output16_ref_ = NULL;
539 #endif
540
TEST_P(ConvolveTest,GuardBlocks)541 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
542
TEST_P(ConvolveTest,DISABLED_Copy_Speed)543 TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
544 const uint8_t *const in = input();
545 uint8_t *const out = output();
546 const int kNumTests = 5000000;
547 const int width = Width();
548 const int height = Height();
549 vpx_usec_timer timer;
550
551 vpx_usec_timer_start(&timer);
552 for (int n = 0; n < kNumTests; ++n) {
553 UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
554 width, height);
555 }
556 vpx_usec_timer_mark(&timer);
557
558 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
559 printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
560 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
561 }
562
TEST_P(ConvolveTest,DISABLED_Avg_Speed)563 TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
564 const uint8_t *const in = input();
565 uint8_t *const out = output();
566 const int kNumTests = 5000000;
567 const int width = Width();
568 const int height = Height();
569 vpx_usec_timer timer;
570
571 vpx_usec_timer_start(&timer);
572 for (int n = 0; n < kNumTests; ++n) {
573 UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
574 width, height);
575 }
576 vpx_usec_timer_mark(&timer);
577
578 const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
579 printf("convolve_avg_%dx%d_%d: %d us\n", width, height,
580 UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
581 }
582
TEST_P(ConvolveTest,Copy)583 TEST_P(ConvolveTest, Copy) {
584 uint8_t *const in = input();
585 uint8_t *const out = output();
586
587 ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride,
588 NULL, 0, NULL, 0, Width(), Height()));
589
590 CheckGuardBlocks();
591
592 for (int y = 0; y < Height(); ++y) {
593 for (int x = 0; x < Width(); ++x)
594 ASSERT_EQ(lookup(out, y * kOutputStride + x),
595 lookup(in, y * kInputStride + x))
596 << "(" << x << "," << y << ")";
597 }
598 }
599
TEST_P(ConvolveTest,Avg)600 TEST_P(ConvolveTest, Avg) {
601 uint8_t *const in = input();
602 uint8_t *const out = output();
603 uint8_t *const out_ref = output_ref();
604 CopyOutputToRef();
605
606 ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride,
607 NULL, 0, NULL, 0, Width(), Height()));
608
609 CheckGuardBlocks();
610
611 for (int y = 0; y < Height(); ++y) {
612 for (int x = 0; x < Width(); ++x)
613 ASSERT_EQ(lookup(out, y * kOutputStride + x),
614 ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
615 lookup(out_ref, y * kOutputStride + x),
616 1))
617 << "(" << x << "," << y << ")";
618 }
619 }
620
TEST_P(ConvolveTest,CopyHoriz)621 TEST_P(ConvolveTest, CopyHoriz) {
622 uint8_t *const in = input();
623 uint8_t *const out = output();
624 DECLARE_ALIGNED(256, const int16_t,
625 filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
626
627 ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride,
628 filter8, 16, filter8, 16, Width(),
629 Height()));
630
631 CheckGuardBlocks();
632
633 for (int y = 0; y < Height(); ++y) {
634 for (int x = 0; x < Width(); ++x)
635 ASSERT_EQ(lookup(out, y * kOutputStride + x),
636 lookup(in, y * kInputStride + x))
637 << "(" << x << "," << y << ")";
638 }
639 }
640
TEST_P(ConvolveTest,CopyVert)641 TEST_P(ConvolveTest, CopyVert) {
642 uint8_t *const in = input();
643 uint8_t *const out = output();
644 DECLARE_ALIGNED(256, const int16_t,
645 filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
646
647 ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride,
648 filter8, 16, filter8, 16, Width(),
649 Height()));
650
651 CheckGuardBlocks();
652
653 for (int y = 0; y < Height(); ++y) {
654 for (int x = 0; x < Width(); ++x)
655 ASSERT_EQ(lookup(out, y * kOutputStride + x),
656 lookup(in, y * kInputStride + x))
657 << "(" << x << "," << y << ")";
658 }
659 }
660
TEST_P(ConvolveTest,Copy2D)661 TEST_P(ConvolveTest, Copy2D) {
662 uint8_t *const in = input();
663 uint8_t *const out = output();
664 DECLARE_ALIGNED(256, const int16_t,
665 filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
666
667 ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride,
668 filter8, 16, filter8, 16, Width(),
669 Height()));
670
671 CheckGuardBlocks();
672
673 for (int y = 0; y < Height(); ++y) {
674 for (int x = 0; x < Width(); ++x)
675 ASSERT_EQ(lookup(out, y * kOutputStride + x),
676 lookup(in, y * kInputStride + x))
677 << "(" << x << "," << y << ")";
678 }
679 }
680
681 const int kNumFilterBanks = 4;
682 const int kNumFilters = 16;
683
TEST(ConvolveTest,FiltersWontSaturateWhenAddedPairwise)684 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
685 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
686 const InterpKernel *filters =
687 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
688 for (int i = 0; i < kNumFilters; i++) {
689 const int p0 = filters[i][0] + filters[i][1];
690 const int p1 = filters[i][2] + filters[i][3];
691 const int p2 = filters[i][4] + filters[i][5];
692 const int p3 = filters[i][6] + filters[i][7];
693 EXPECT_LE(p0, 128);
694 EXPECT_LE(p1, 128);
695 EXPECT_LE(p2, 128);
696 EXPECT_LE(p3, 128);
697 EXPECT_LE(p0 + p3, 128);
698 EXPECT_LE(p0 + p3 + p1, 128);
699 EXPECT_LE(p0 + p3 + p1 + p2, 128);
700 EXPECT_EQ(p0 + p1 + p2 + p3, 128);
701 }
702 }
703 }
704
705 const int16_t kInvalidFilter[8] = { 0 };
706 const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = {
707 wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c
708 };
709
TEST_P(ConvolveTest,MatchesReferenceSubpixelFilter)710 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
711 for (int i = 0; i < 2; ++i) {
712 uint8_t *const in = input();
713 uint8_t *const out = output();
714 #if CONFIG_VP9_HIGHBITDEPTH
715 uint8_t ref8[kOutputStride * kMaxDimension];
716 uint16_t ref16[kOutputStride * kMaxDimension];
717 uint8_t *ref;
718 if (UUT_->use_highbd_ == 0) {
719 ref = ref8;
720 } else {
721 ref = CAST_TO_BYTEPTR(ref16);
722 }
723 #else
724 uint8_t ref[kOutputStride * kMaxDimension];
725 #endif
726
727 // Populate ref and out with some random data
728 ::libvpx_test::ACMRandom prng;
729 for (int y = 0; y < Height(); ++y) {
730 for (int x = 0; x < Width(); ++x) {
731 uint16_t r;
732 #if CONFIG_VP9_HIGHBITDEPTH
733 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
734 r = prng.Rand8Extremes();
735 } else {
736 r = prng.Rand16() & mask_;
737 }
738 #else
739 r = prng.Rand8Extremes();
740 #endif
741
742 assign_val(out, y * kOutputStride + x, r);
743 assign_val(ref, y * kOutputStride + x, r);
744 }
745 }
746
747 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
748 const InterpKernel *filters =
749 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
750
751 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
752 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
753 wrapper_filter_block2d_8[i](in, kInputStride, filters[filter_x],
754 filters[filter_y], ref, kOutputStride,
755 Width(), Height(), UUT_->use_highbd_);
756
757 if (filter_x && filter_y)
758 ASM_REGISTER_STATE_CHECK(UUT_->hv8_[i](
759 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
760 filters[filter_y], 16, Width(), Height()));
761 else if (filter_y)
762 ASM_REGISTER_STATE_CHECK(UUT_->v8_[i](
763 in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
764 filters[filter_y], 16, Width(), Height()));
765 else if (filter_x)
766 ASM_REGISTER_STATE_CHECK(UUT_->h8_[i](
767 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
768 kInvalidFilter, 16, Width(), Height()));
769 else
770 ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](
771 in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
772 kInvalidFilter, 0, Width(), Height()));
773
774 CheckGuardBlocks();
775
776 for (int y = 0; y < Height(); ++y) {
777 for (int x = 0; x < Width(); ++x)
778 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
779 lookup(out, y * kOutputStride + x))
780 << "mismatch at (" << x << "," << y << "), "
781 << "filters (" << filter_bank << "," << filter_x << ","
782 << filter_y << ")";
783 }
784 }
785 }
786 }
787 }
788 }
789
TEST_P(ConvolveTest,FilterExtremes)790 TEST_P(ConvolveTest, FilterExtremes) {
791 uint8_t *const in = input();
792 uint8_t *const out = output();
793 #if CONFIG_VP9_HIGHBITDEPTH
794 uint8_t ref8[kOutputStride * kMaxDimension];
795 uint16_t ref16[kOutputStride * kMaxDimension];
796 uint8_t *ref;
797 if (UUT_->use_highbd_ == 0) {
798 ref = ref8;
799 } else {
800 ref = CAST_TO_BYTEPTR(ref16);
801 }
802 #else
803 uint8_t ref[kOutputStride * kMaxDimension];
804 #endif
805
806 // Populate ref and out with some random data
807 ::libvpx_test::ACMRandom prng;
808 for (int y = 0; y < Height(); ++y) {
809 for (int x = 0; x < Width(); ++x) {
810 uint16_t r;
811 #if CONFIG_VP9_HIGHBITDEPTH
812 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
813 r = prng.Rand8Extremes();
814 } else {
815 r = prng.Rand16() & mask_;
816 }
817 #else
818 r = prng.Rand8Extremes();
819 #endif
820 assign_val(out, y * kOutputStride + x, r);
821 assign_val(ref, y * kOutputStride + x, r);
822 }
823 }
824
825 for (int axis = 0; axis < 2; axis++) {
826 int seed_val = 0;
827 while (seed_val < 256) {
828 for (int y = 0; y < 8; ++y) {
829 for (int x = 0; x < 8; ++x) {
830 #if CONFIG_VP9_HIGHBITDEPTH
831 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
832 ((seed_val >> (axis ? y : x)) & 1) * mask_);
833 #else
834 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
835 ((seed_val >> (axis ? y : x)) & 1) * 255);
836 #endif
837 if (axis) seed_val++;
838 }
839 if (axis) {
840 seed_val -= 8;
841 } else {
842 seed_val++;
843 }
844 }
845 if (axis) seed_val += 8;
846
847 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
848 const InterpKernel *filters =
849 vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
850 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
851 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
852 wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
853 filters[filter_y], ref, kOutputStride,
854 Width(), Height(), UUT_->use_highbd_);
855 if (filter_x && filter_y)
856 ASM_REGISTER_STATE_CHECK(UUT_->hv8_[0](
857 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
858 filters[filter_y], 16, Width(), Height()));
859 else if (filter_y)
860 ASM_REGISTER_STATE_CHECK(UUT_->v8_[0](
861 in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
862 filters[filter_y], 16, Width(), Height()));
863 else if (filter_x)
864 ASM_REGISTER_STATE_CHECK(UUT_->h8_[0](
865 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
866 kInvalidFilter, 16, Width(), Height()));
867 else
868 ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](
869 in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
870 kInvalidFilter, 0, Width(), Height()));
871
872 for (int y = 0; y < Height(); ++y) {
873 for (int x = 0; x < Width(); ++x)
874 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
875 lookup(out, y * kOutputStride + x))
876 << "mismatch at (" << x << "," << y << "), "
877 << "filters (" << filter_bank << "," << filter_x << ","
878 << filter_y << ")";
879 }
880 }
881 }
882 }
883 }
884 }
885 }
886
887 /* This test exercises that enough rows and columns are filtered with every
888 possible initial fractional positions and scaling steps. */
TEST_P(ConvolveTest,CheckScalingFiltering)889 TEST_P(ConvolveTest, CheckScalingFiltering) {
890 uint8_t *const in = input();
891 uint8_t *const out = output();
892 const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];
893
894 SetConstantInput(127);
895
896 for (int frac = 0; frac < 16; ++frac) {
897 for (int step = 1; step <= 32; ++step) {
898 /* Test the horizontal and vertical filters in combination. */
899 ASM_REGISTER_STATE_CHECK(
900 UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap[frac],
901 step, eighttap[frac], step, Width(), Height()));
902
903 CheckGuardBlocks();
904
905 for (int y = 0; y < Height(); ++y) {
906 for (int x = 0; x < Width(); ++x) {
907 ASSERT_EQ(lookup(in, y * kInputStride + x),
908 lookup(out, y * kOutputStride + x))
909 << "x == " << x << ", y == " << y << ", frac == " << frac
910 << ", step == " << step;
911 }
912 }
913 }
914 }
915 }
916
917 using std::tr1::make_tuple;
918
919 #if CONFIG_VP9_HIGHBITDEPTH
920 #define WRAP(func, bd) \
921 void wrap_##func##_##bd( \
922 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
923 ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \
924 const int16_t *filter_y, int filter_y_stride, int w, int h) { \
925 vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride, \
926 reinterpret_cast<uint16_t *>(dst), dst_stride, filter_x, \
927 filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
928 }
929
930 #if HAVE_SSE2 && ARCH_X86_64
931 WRAP(convolve_copy_sse2, 8)
932 WRAP(convolve_avg_sse2, 8)
933 WRAP(convolve_copy_sse2, 10)
934 WRAP(convolve_avg_sse2, 10)
935 WRAP(convolve_copy_sse2, 12)
936 WRAP(convolve_avg_sse2, 12)
937 WRAP(convolve8_horiz_sse2, 8)
938 WRAP(convolve8_avg_horiz_sse2, 8)
939 WRAP(convolve8_vert_sse2, 8)
940 WRAP(convolve8_avg_vert_sse2, 8)
941 WRAP(convolve8_sse2, 8)
942 WRAP(convolve8_avg_sse2, 8)
943 WRAP(convolve8_horiz_sse2, 10)
944 WRAP(convolve8_avg_horiz_sse2, 10)
945 WRAP(convolve8_vert_sse2, 10)
946 WRAP(convolve8_avg_vert_sse2, 10)
947 WRAP(convolve8_sse2, 10)
948 WRAP(convolve8_avg_sse2, 10)
949 WRAP(convolve8_horiz_sse2, 12)
950 WRAP(convolve8_avg_horiz_sse2, 12)
951 WRAP(convolve8_vert_sse2, 12)
952 WRAP(convolve8_avg_vert_sse2, 12)
953 WRAP(convolve8_sse2, 12)
954 WRAP(convolve8_avg_sse2, 12)
955 #endif // HAVE_SSE2 && ARCH_X86_64
956
957 #if HAVE_AVX2
958 WRAP(convolve_copy_avx2, 8)
959 WRAP(convolve_avg_avx2, 8)
960 WRAP(convolve8_horiz_avx2, 8)
961 WRAP(convolve8_avg_horiz_avx2, 8)
962 WRAP(convolve8_vert_avx2, 8)
963 WRAP(convolve8_avg_vert_avx2, 8)
964 WRAP(convolve8_avx2, 8)
965 WRAP(convolve8_avg_avx2, 8)
966
967 WRAP(convolve_copy_avx2, 10)
968 WRAP(convolve_avg_avx2, 10)
969 WRAP(convolve8_avx2, 10)
970 WRAP(convolve8_horiz_avx2, 10)
971 WRAP(convolve8_vert_avx2, 10)
972 WRAP(convolve8_avg_avx2, 10)
973 WRAP(convolve8_avg_horiz_avx2, 10)
974 WRAP(convolve8_avg_vert_avx2, 10)
975
976 WRAP(convolve_copy_avx2, 12)
977 WRAP(convolve_avg_avx2, 12)
978 WRAP(convolve8_avx2, 12)
979 WRAP(convolve8_horiz_avx2, 12)
980 WRAP(convolve8_vert_avx2, 12)
981 WRAP(convolve8_avg_avx2, 12)
982 WRAP(convolve8_avg_horiz_avx2, 12)
983 WRAP(convolve8_avg_vert_avx2, 12)
984 #endif // HAVE_AVX2
985
986 #if HAVE_NEON
987 WRAP(convolve_copy_neon, 8)
988 WRAP(convolve_avg_neon, 8)
989 WRAP(convolve_copy_neon, 10)
990 WRAP(convolve_avg_neon, 10)
991 WRAP(convolve_copy_neon, 12)
992 WRAP(convolve_avg_neon, 12)
993 WRAP(convolve8_horiz_neon, 8)
994 WRAP(convolve8_avg_horiz_neon, 8)
995 WRAP(convolve8_vert_neon, 8)
996 WRAP(convolve8_avg_vert_neon, 8)
997 WRAP(convolve8_neon, 8)
998 WRAP(convolve8_avg_neon, 8)
999 WRAP(convolve8_horiz_neon, 10)
1000 WRAP(convolve8_avg_horiz_neon, 10)
1001 WRAP(convolve8_vert_neon, 10)
1002 WRAP(convolve8_avg_vert_neon, 10)
1003 WRAP(convolve8_neon, 10)
1004 WRAP(convolve8_avg_neon, 10)
1005 WRAP(convolve8_horiz_neon, 12)
1006 WRAP(convolve8_avg_horiz_neon, 12)
1007 WRAP(convolve8_vert_neon, 12)
1008 WRAP(convolve8_avg_vert_neon, 12)
1009 WRAP(convolve8_neon, 12)
1010 WRAP(convolve8_avg_neon, 12)
1011 #endif // HAVE_NEON
1012
1013 WRAP(convolve_copy_c, 8)
1014 WRAP(convolve_avg_c, 8)
1015 WRAP(convolve8_horiz_c, 8)
1016 WRAP(convolve8_avg_horiz_c, 8)
1017 WRAP(convolve8_vert_c, 8)
1018 WRAP(convolve8_avg_vert_c, 8)
1019 WRAP(convolve8_c, 8)
1020 WRAP(convolve8_avg_c, 8)
1021 WRAP(convolve_copy_c, 10)
1022 WRAP(convolve_avg_c, 10)
1023 WRAP(convolve8_horiz_c, 10)
1024 WRAP(convolve8_avg_horiz_c, 10)
1025 WRAP(convolve8_vert_c, 10)
1026 WRAP(convolve8_avg_vert_c, 10)
1027 WRAP(convolve8_c, 10)
1028 WRAP(convolve8_avg_c, 10)
1029 WRAP(convolve_copy_c, 12)
1030 WRAP(convolve_avg_c, 12)
1031 WRAP(convolve8_horiz_c, 12)
1032 WRAP(convolve8_avg_horiz_c, 12)
1033 WRAP(convolve8_vert_c, 12)
1034 WRAP(convolve8_avg_vert_c, 12)
1035 WRAP(convolve8_c, 12)
1036 WRAP(convolve8_avg_c, 12)
1037 #undef WRAP
1038
1039 const ConvolveFunctions convolve8_c(
1040 wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8,
1041 wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
1042 wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
1043 wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1044 wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8,
1045 wrap_convolve8_avg_c_8, 8);
1046 const ConvolveFunctions convolve10_c(
1047 wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10,
1048 wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10,
1049 wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
1050 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1051 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
1052 wrap_convolve8_avg_c_10, 10);
1053 const ConvolveFunctions convolve12_c(
1054 wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12,
1055 wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12,
1056 wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
1057 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1058 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
1059 wrap_convolve8_avg_c_12, 12);
1060 const ConvolveParam kArrayConvolve_c[] = {
1061 ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
1062 };
1063
1064 #else
1065 const ConvolveFunctions convolve8_c(
1066 vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_c,
1067 vpx_convolve8_avg_horiz_c, vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
1068 vpx_convolve8_c, vpx_convolve8_avg_c, vpx_scaled_horiz_c,
1069 vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1070 vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1071 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
1072 #endif
1073 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
1074
1075 #if HAVE_SSE2 && ARCH_X86_64
1076 #if CONFIG_VP9_HIGHBITDEPTH
1077 const ConvolveFunctions convolve8_sse2(
1078 wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
1079 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1080 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1081 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
1082 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1083 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1084 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1085 const ConvolveFunctions convolve10_sse2(
1086 wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
1087 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1088 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1089 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
1090 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1091 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1092 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1093 const ConvolveFunctions convolve12_sse2(
1094 wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
1095 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1096 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1097 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
1098 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1099 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1100 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1101 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
1102 ALL_SIZES(convolve10_sse2),
1103 ALL_SIZES(convolve12_sse2) };
1104 #else
1105 const ConvolveFunctions convolve8_sse2(
1106 vpx_convolve_copy_sse2, vpx_convolve_avg_sse2, vpx_convolve8_horiz_sse2,
1107 vpx_convolve8_avg_horiz_sse2, vpx_convolve8_vert_sse2,
1108 vpx_convolve8_avg_vert_sse2, vpx_convolve8_sse2, vpx_convolve8_avg_sse2,
1109 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1110 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1111
1112 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
1113 #endif // CONFIG_VP9_HIGHBITDEPTH
1114 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
1115 ::testing::ValuesIn(kArrayConvolve_sse2));
1116 #endif
1117
1118 #if HAVE_SSSE3
1119 const ConvolveFunctions convolve8_ssse3(
1120 vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_ssse3,
1121 vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_ssse3,
1122 vpx_convolve8_avg_vert_ssse3, vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
1123 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1124 vpx_scaled_avg_vert_c, vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0);
1125
1126 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
1127 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
1128 ::testing::ValuesIn(kArrayConvolve8_ssse3));
1129 #endif
1130
1131 #if HAVE_AVX2
1132 #if CONFIG_VP9_HIGHBITDEPTH
1133 const ConvolveFunctions convolve8_avx2(
1134 wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8,
1135 wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8,
1136 wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8,
1137 wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8,
1138 wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
1139 wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1140 const ConvolveFunctions convolve10_avx2(
1141 wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10,
1142 wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10,
1143 wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10,
1144 wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10,
1145 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1146 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
1147 wrap_convolve8_avg_c_10, 10);
1148 const ConvolveFunctions convolve12_avx2(
1149 wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12,
1150 wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12,
1151 wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12,
1152 wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12,
1153 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1154 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
1155 wrap_convolve8_avg_c_12, 12);
1156 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2),
1157 ALL_SIZES(convolve10_avx2),
1158 ALL_SIZES(convolve12_avx2) };
1159 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
1160 ::testing::ValuesIn(kArrayConvolve8_avx2));
1161 #else // !CONFIG_VP9_HIGHBITDEPTH
1162 const ConvolveFunctions convolve8_avx2(
1163 vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_avx2,
1164 vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_avx2,
1165 vpx_convolve8_avg_vert_ssse3, vpx_convolve8_avx2, vpx_convolve8_avg_ssse3,
1166 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1167 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1168 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
1169 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
1170 ::testing::ValuesIn(kArrayConvolve8_avx2));
1171 #endif // CONFIG_VP9_HIGHBITDEPTH
1172 #endif // HAVE_AVX2
1173
1174 #if HAVE_NEON
1175 #if CONFIG_VP9_HIGHBITDEPTH
1176 const ConvolveFunctions convolve8_neon(
1177 wrap_convolve_copy_neon_8, wrap_convolve_avg_neon_8,
1178 wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8,
1179 wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8,
1180 wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8,
1181 wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8,
1182 wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8,
1183 wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, 8);
1184 const ConvolveFunctions convolve10_neon(
1185 wrap_convolve_copy_neon_10, wrap_convolve_avg_neon_10,
1186 wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10,
1187 wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10,
1188 wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10,
1189 wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10,
1190 wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10,
1191 wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, 10);
1192 const ConvolveFunctions convolve12_neon(
1193 wrap_convolve_copy_neon_12, wrap_convolve_avg_neon_12,
1194 wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12,
1195 wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12,
1196 wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12,
1197 wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12,
1198 wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12,
1199 wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, 12);
1200 const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon),
1201 ALL_SIZES(convolve10_neon),
1202 ALL_SIZES(convolve12_neon) };
1203 #else
1204 const ConvolveFunctions convolve8_neon(
1205 vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon,
1206 vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon,
1207 vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon,
1208 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1209 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1210
1211 const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon) };
1212 #endif // CONFIG_VP9_HIGHBITDEPTH
1213 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
1214 ::testing::ValuesIn(kArrayConvolve_neon));
1215 #endif // HAVE_NEON
1216
1217 #if HAVE_DSPR2
1218 const ConvolveFunctions convolve8_dspr2(
1219 vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2, vpx_convolve8_horiz_dspr2,
1220 vpx_convolve8_avg_horiz_dspr2, vpx_convolve8_vert_dspr2,
1221 vpx_convolve8_avg_vert_dspr2, vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2,
1222 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1223 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1224
1225 const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) };
1226 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
1227 ::testing::ValuesIn(kArrayConvolve8_dspr2));
1228 #endif // HAVE_DSPR2
1229
1230 #if HAVE_MSA
1231 const ConvolveFunctions convolve8_msa(
1232 vpx_convolve_copy_msa, vpx_convolve_avg_msa, vpx_convolve8_horiz_msa,
1233 vpx_convolve8_avg_horiz_msa, vpx_convolve8_vert_msa,
1234 vpx_convolve8_avg_vert_msa, vpx_convolve8_msa, vpx_convolve8_avg_msa,
1235 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1236 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1237
1238 const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
1239 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
1240 ::testing::ValuesIn(kArrayConvolve8_msa));
1241 #endif // HAVE_MSA
1242
1243 #if HAVE_VSX
1244 const ConvolveFunctions convolve8_vsx(
1245 vpx_convolve_copy_vsx, vpx_convolve_avg_vsx, vpx_convolve8_horiz_vsx,
1246 vpx_convolve8_avg_horiz_vsx, vpx_convolve8_vert_vsx,
1247 vpx_convolve8_avg_vert_vsx, vpx_convolve8_vsx, vpx_convolve8_avg_vsx,
1248 vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1249 vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1250 const ConvolveParam kArrayConvolve_vsx[] = { ALL_SIZES(convolve8_vsx) };
1251 INSTANTIATE_TEST_CASE_P(VSX, ConvolveTest,
1252 ::testing::ValuesIn(kArrayConvolve_vsx));
1253 #endif // HAVE_VSX
1254 } // namespace
1255